TMP: remove optimization where later ranking rules are not applied on buckets of a single document

Merge #3786
3786: Consistently use wrapping add to avoid overflow in debug when query s… r=dureuill a=dureuill # Pull Request ## Related issue Fixes https://github.com/meilisearch/meilisearch/issues/3785 ## What does this PR do? - Some of the code paths would erroneously use the default addition operator that has the semantics that "overflow is an error, checked at runtime in debug" instead of the intended "overflow is expected" semantics that this code use (this code is using `u16::MAX` as a sentinel). This PR makes it so the wrapping add operator is used everywhere. Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2025-07-22 22:30:58 +00:00 · 2023-05-30 11:12:28 +02:00 · 2023-05-29 12:39:54 +00:00 · 2023-05-29 11:58:26 +02:00 · 2023-05-29 11:54:12 +02:00 · 2023-05-24 10:07:41 +00:00
72 changed files with 2178 additions and 497 deletions
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@ -19,8 +19,8 @@ If Meilisearch does not offer optimized support for your language, please consid
 ## Assumptions

 1. **You're familiar with [GitHub](https://github.com) and the [Pull Requests (PR)](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests/about-pull-requests) workflow.**
-2. **You've read the Meilisearch [documentation](https://docs.meilisearch.com).**
-3. **You know about the [Meilisearch community](https://docs.meilisearch.com/learn/what_is_meilisearch/contact.html).
+2. **You've read the Meilisearch [documentation](https://www.meilisearch.com/docs).**
+3. **You know about the [Meilisearch community on Discord](https://discord.meilisearch.com).
   Please use this for help.**

 ## How to Contribute
--- a/Cargo.lock
+++ b/Cargo.lock
@ -463,7 +463,7 @@ checksum = "b645a089122eccb6111b4f81cbc1a49f5900ac4666bb93ac027feaecf15607bf"

 [[package]]
 name = "benchmarks"
-version = "1.1.1"
+version = "1.2.0"
 dependencies = [
 "anyhow",
 "bytes",
@ -1209,7 +1209,7 @@ dependencies = [

 [[package]]
 name = "dump"
-version = "1.1.1"
+version = "1.2.0"
 dependencies = [
 "anyhow",
 "big_s",
@ -1428,7 +1428,7 @@ dependencies = [

 [[package]]
 name = "file-store"
-version = "1.1.1"
+version = "1.2.0"
 dependencies = [
 "faux",
 "tempfile",
@ -1450,7 +1450,7 @@ dependencies = [

 [[package]]
 name = "filter-parser"
-version = "1.1.1"
+version = "1.2.0"
 dependencies = [
 "insta",
 "nom",
@ -1476,7 +1476,7 @@ dependencies = [

 [[package]]
 name = "flatten-serde-json"
-version = "1.1.1"
+version = "1.2.0"
 dependencies = [
 "criterion",
 "serde_json",
@ -1794,7 +1794,7 @@ checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"
 [[package]]
 name = "heed"
 version = "0.12.5"
-source = "git+https://github.com/meilisearch/heed?tag=v0.12.5#4158a6c484752afaaf9e2530a6ee0e7ab0f24ee8"
+source = "git+https://github.com/meilisearch/heed?tag=v0.12.6#8c5b94225fc949c02bb7b900cc50ffaf6b584b1e"
 dependencies = [
 "byteorder",
 "heed-traits",
@ -1811,12 +1811,12 @@ dependencies = [
 [[package]]
 name = "heed-traits"
 version = "0.7.0"
-source = "git+https://github.com/meilisearch/heed?tag=v0.12.5#4158a6c484752afaaf9e2530a6ee0e7ab0f24ee8"
+source = "git+https://github.com/meilisearch/heed?tag=v0.12.6#8c5b94225fc949c02bb7b900cc50ffaf6b584b1e"

 [[package]]
 name = "heed-types"
 version = "0.7.2"
-source = "git+https://github.com/meilisearch/heed?tag=v0.12.5#4158a6c484752afaaf9e2530a6ee0e7ab0f24ee8"
+source = "git+https://github.com/meilisearch/heed?tag=v0.12.6#8c5b94225fc949c02bb7b900cc50ffaf6b584b1e"
 dependencies = [
 "bincode",
 "heed-traits",
@ -1959,7 +1959,7 @@ dependencies = [

 [[package]]
 name = "index-scheduler"
-version = "1.1.1"
+version = "1.2.0"
 dependencies = [
 "anyhow",
 "big_s",
@ -2113,7 +2113,7 @@ dependencies = [

 [[package]]
 name = "json-depth-checker"
-version = "1.1.1"
+version = "1.2.0"
 dependencies = [
 "criterion",
 "serde_json",
@ -2539,7 +2539,7 @@ checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771"

 [[package]]
 name = "meili-snap"
-version = "1.1.1"
+version = "1.2.0"
 dependencies = [
 "insta",
 "md5",
@ -2548,7 +2548,7 @@ dependencies = [

 [[package]]
 name = "meilisearch"
-version = "1.1.1"
+version = "1.2.0"
 dependencies = [
 "actix-cors",
 "actix-http",
@ -2636,7 +2636,7 @@ dependencies = [

 [[package]]
 name = "meilisearch-auth"
-version = "1.1.1"
+version = "1.2.0"
 dependencies = [
 "base64 0.21.0",
 "enum-iterator",
@ -2655,7 +2655,7 @@ dependencies = [

 [[package]]
 name = "meilisearch-types"
-version = "1.1.1"
+version = "1.2.0"
 dependencies = [
 "actix-web",
 "anyhow",
@ -2709,7 +2709,7 @@ dependencies = [

 [[package]]
 name = "milli"
-version = "1.1.1"
+version = "1.2.0"
 dependencies = [
 "big_s",
 "bimap",
@ -3064,7 +3064,7 @@ checksum = "478c572c3d73181ff3c2539045f6eb99e5491218eae919370993b890cdbdd98e"

 [[package]]
 name = "permissive-json-pointer"
-version = "1.1.1"
+version = "1.2.0"
 dependencies = [
 "big_s",
 "serde_json",
--- a/Cargo.toml
+++ b/Cargo.toml
@ -17,7 +17,7 @@ members = [
 ]

 [workspace.package]
-version = "1.1.1"
+version = "1.2.0"
 authors = ["Quentin de Quelen <quentin@dequelen.me>", "Clément Renault <clement@meilisearch.com>"]
 description = "Meilisearch HTTP server"
 homepage = "https://meilisearch.com"
--- a/README.md
+++ b/README.md
@ -7,8 +7,8 @@
  <a href="https://www.meilisearch.com">Website</a> |
  <a href="https://roadmap.meilisearch.com/tabs/1-under-consideration">Roadmap</a> |
  <a href="https://blog.meilisearch.com">Blog</a> |
-  <a href="https://meilisearch.com/docs">Documentation</a> |
-  <a href="https://meilisearch.com/docs/faq">FAQ</a> |
+  <a href="https://www.meilisearch.com/docs">Documentation</a> |
+  <a href="https://www.meilisearch.com/docs/faq">FAQ</a> |
  <a href="https://discord.meilisearch.com">Discord</a>
 </h4>

@ -36,27 +36,27 @@ Meilisearch helps you shape a delightful search experience in a snap, offering f
 ## ✨ Features

 - **Search-as-you-type:** find search results in less than 50 milliseconds
- **[Typo tolerance](https://meilisearch.com/docs/learn/getting_started/customizing_relevancy#typo-tolerance):** get relevant matches even when queries contain typos and misspellings
- **[Filtering](https://meilisearch.com/docs/learn/advanced/filtering) and [faceted search](https://meilisearch.com/docs/learn/advanced/faceted_search):** enhance your user's search experience with custom filters and build a faceted search interface in a few lines of code
- **[Sorting](https://meilisearch.com/docs/learn/advanced/sorting):** sort results based on price, date, or pretty much anything else your users need
- **[Synonym support](https://meilisearch.com/docs/learn/getting_started/customizing_relevancy#synonyms):** configure synonyms to include more relevant content in your search results
- **[Geosearch](https://meilisearch.com/docs/learn/advanced/geosearch):** filter and sort documents based on geographic data
- **[Extensive language support](https://meilisearch.com/docs/learn/what_is_meilisearch/language):** search datasets in any language, with optimized support for Chinese, Japanese, Hebrew, and languages using the Latin alphabet
- **[Security management](https://meilisearch.com/docs/learn/security/master_api_keys):** control which users can access what data with API keys that allow fine-grained permissions handling
- **[Multi-Tenancy](https://meilisearch.com/docs/learn/security/tenant_tokens):** personalize search results for any number of application tenants
+- **[Typo tolerance](https://www.meilisearch.com/docs/learn/getting_started/customizing_relevancy#typo-tolerance):** get relevant matches even when queries contain typos and misspellings
+- **[Filtering](https://www.meilisearch.com/docs/learn/advanced/filtering) and [faceted search](https://www.meilisearch.com/docs/learn/advanced/faceted_search):** enhance your user's search experience with custom filters and build a faceted search interface in a few lines of code
+- **[Sorting](https://www.meilisearch.com/docs/learn/advanced/sorting):** sort results based on price, date, or pretty much anything else your users need
+- **[Synonym support](https://www.meilisearch.com/docs/learn/getting_started/customizing_relevancy#synonyms):** configure synonyms to include more relevant content in your search results
+- **[Geosearch](https://www.meilisearch.com/docs/learn/advanced/geosearch):** filter and sort documents based on geographic data
+- **[Extensive language support](https://www.meilisearch.com/docs/learn/what_is_meilisearch/language):** search datasets in any language, with optimized support for Chinese, Japanese, Hebrew, and languages using the Latin alphabet
+- **[Security management](https://www.meilisearch.com/docs/learn/security/master_api_keys):** control which users can access what data with API keys that allow fine-grained permissions handling
+- **[Multi-Tenancy](https://www.meilisearch.com/docs/learn/security/tenant_tokens):** personalize search results for any number of application tenants
 - **Highly Customizable:** customize Meilisearch to your specific needs or use our out-of-the-box and hassle-free presets
- **[RESTful API](https://meilisearch.com/docs/reference/api/overview):** integrate Meilisearch in your technical stack with our plugins and SDKs
+- **[RESTful API](https://www.meilisearch.com/docs/reference/api/overview):** integrate Meilisearch in your technical stack with our plugins and SDKs
 - **Easy to install, deploy, and maintain**

 ## 📖 Documentation

-You can consult Meilisearch's documentation at [https://meilisearch.com/docs](https://meilisearch.com/docs/).
+You can consult Meilisearch's documentation at [https://www.meilisearch.com/docs](https://www.meilisearch.com/docs/).

 ## 🚀 Getting started

-For basic instructions on how to set up Meilisearch, add documents to an index, and search for documents, take a look at our [Quick Start](https://meilisearch.com/docs/learn/getting_started/quick_start) guide.
+For basic instructions on how to set up Meilisearch, add documents to an index, and search for documents, take a look at our [Quick Start](https://www.meilisearch.com/docs/learn/getting_started/quick_start) guide.

-You may also want to check out [Meilisearch 101](https://meilisearch.com/docs/learn/getting_started/filtering_and_sorting) for an introduction to some of Meilisearch's most popular features.
+You may also want to check out [Meilisearch 101](https://www.meilisearch.com/docs/learn/getting_started/filtering_and_sorting) for an introduction to some of Meilisearch's most popular features.

 ## ☁️ Meilisearch cloud

@ -66,7 +66,7 @@ Let us manage your infrastructure so you can focus on integrating a great search

 Install one of our SDKs in your project for seamless integration between Meilisearch and your favorite language or framework!

-Take a look at the complete [Meilisearch integration list](https://meilisearch.com/docs/learn/what_is_meilisearch/sdks).
+Take a look at the complete [Meilisearch integration list](https://www.meilisearch.com/docs/learn/what_is_meilisearch/sdks).

 [![Logos belonging to different languages and frameworks supported by Meilisearch, including React, Ruby on Rails, Go, Rust, and PHP](assets/integrations.png)](https://www.meilisearch.com/docs/learn/what_is_meilisearch/sdks)

@ -74,17 +74,17 @@ Take a look at the complete [Meilisearch integration list](https://meilisearch.c

 Experienced users will want to keep our [API Reference](https://www.meilisearch.com/docs/reference/api/overview) close at hand.

-We also offer a wide range of dedicated guides to all Meilisearch features, such as [filtering](https://meilisearch.com/docs/learn/advanced/filtering), [sorting](https://meilisearch.com/docs/learn/advanced/sorting), [geosearch](https://meilisearch.com/docs/learn/advanced/geosearch), [API keys](https://meilisearch.com/docs/learn/security/master_api_keys), and [tenant tokens](https://meilisearch.com/docs/learn/security/tenant_tokens).
+We also offer a wide range of dedicated guides to all Meilisearch features, such as [filtering](https://www.meilisearch.com/docs/learn/advanced/filtering), [sorting](https://www.meilisearch.com/docs/learn/advanced/sorting), [geosearch](https://www.meilisearch.com/docs/learn/advanced/geosearch), [API keys](https://www.meilisearch.com/docs/learn/security/master_api_keys), and [tenant tokens](https://www.meilisearch.com/docs/learn/security/tenant_tokens).

-Finally, for more in-depth information, refer to our articles explaining fundamental Meilisearch concepts such as [documents](https://meilisearch.com/docs/learn/core_concepts/documents) and [indexes](https://meilisearch.com/docs/learn/core_concepts/indexes).
+Finally, for more in-depth information, refer to our articles explaining fundamental Meilisearch concepts such as [documents](https://www.meilisearch.com/docs/learn/core_concepts/documents) and [indexes](https://www.meilisearch.com/docs/learn/core_concepts/indexes).

 ## 📊 Telemetry

-Meilisearch collects **anonymized** data from users to help us improve our product. You can [deactivate this](https://meilisearch.com/docs/learn/what_is_meilisearch/telemetry#how-to-disable-data-collection) whenever you want.
+Meilisearch collects **anonymized** data from users to help us improve our product. You can [deactivate this](https://www.meilisearch.com/docs/learn/what_is_meilisearch/telemetry#how-to-disable-data-collection) whenever you want.

 To request deletion of collected data, please write to us at [privacy@meilisearch.com](mailto:privacy@meilisearch.com). Don't forget to include your `Instance UID` in the message, as this helps us quickly find and delete your data.

-If you want to know more about the kind of data we collect and what we use it for, check the [telemetry section](https://meilisearch.com/docs/learn/what_is_meilisearch/telemetry) of our documentation.
+If you want to know more about the kind of data we collect and what we use it for, check the [telemetry section](https://www.meilisearch.com/docs/learn/what_is_meilisearch/telemetry) of our documentation.

 ## 📫 Get in touch!

--- a/benchmarks/Cargo.toml
+++ b/benchmarks/Cargo.toml
@ -13,7 +13,7 @@ license.workspace = true
 [dependencies]
 anyhow = "1.0.70"
 csv = "1.2.1"
-milli = { path = "../milli", default-features = false }
+milli = { path = "../milli" }
 mimalloc = { version = "0.1.36", default-features = false }
 serde_json = { version = "1.0.95", features = ["preserve_order"] }

@ -31,7 +31,7 @@ flate2 = "1.0.25"
 reqwest = { version = "0.11.16", features = ["blocking", "rustls-tls"], default-features = false }

 [features]
-default = ["milli/default"]
+default = ["milli/all-tokenizations"]

 [[bench]]
 name = "search_songs"
--- a/benchmarks/README.md
+++ b/benchmarks/README.md
@ -119,9 +119,9 @@ _[Download the `smol-wiki` dataset](https://milli-benchmarks.fra1.digitaloceansp

 ### Movies

-`movies` is a really small dataset we uses as our example in the [getting started](https://docs.meilisearch.com/learn/getting_started/)
+`movies` is a really small dataset we uses as our example in the [getting started](https://www.meilisearch.com/docs/learn/getting_started/quick_start)

-_[Download the `movies` dataset](https://docs.meilisearch.com/movies.json)._
+_[Download the `movies` dataset](https://www.meilisearch.com/movies.json)._


 ### All Countries
--- a/config.toml
+++ b/config.toml
@ -1,43 +1,43 @@
 # This file shows the default configuration of Meilisearch.
-# All variables are defined here: https://docs.meilisearch.com/learn/configuration/instance_options.html#environment-variables
+# All variables are defined here: https://www.meilisearch.com/docs/learn/configuration/instance_options#environment-variables

 db_path = "./data.ms"
 # Designates the location where database files will be created and retrieved.
-# https://docs.meilisearch.com/learn/configuration/instance_options.html#database-path
+# https://www.meilisearch.com/docs/learn/configuration/instance_options#database-path

 env = "development"
 # Configures the instance's environment. Value must be either `production` or `development`.
-# https://docs.meilisearch.com/learn/configuration/instance_options.html#environment
+# https://www.meilisearch.com/docs/learn/configuration/instance_options#environment

 http_addr = "localhost:7700"
 # The address on which the HTTP server will listen.

 # master_key = "YOUR_MASTER_KEY_VALUE"
 # Sets the instance's master key, automatically protecting all routes except GET /health.
-# https://docs.meilisearch.com/learn/configuration/instance_options.html#master-key
+# https://www.meilisearch.com/docs/learn/configuration/instance_options#master-key

 # no_analytics = true
 # Deactivates Meilisearch's built-in telemetry when provided.
 # Meilisearch automatically collects data from all instances that do not opt out using this flag.
 # All gathered data is used solely for the purpose of improving Meilisearch, and can be deleted at any time.
-# https://docs.meilisearch.com/learn/configuration/instance_options.html#disable-analytics
+# https://www.meilisearch.com/docs/learn/configuration/instance_options#disable-analytics

 http_payload_size_limit = "100 MB"
 # Sets the maximum size of accepted payloads.
-# https://docs.meilisearch.com/learn/configuration/instance_options.html#payload-limit-size
+# https://www.meilisearch.com/docs/learn/configuration/instance_options#payload-limit-size

 log_level = "INFO"
 # Defines how much detail should be present in Meilisearch's logs.
 # Meilisearch currently supports six log levels, listed in order of increasing verbosity:  `OFF`, `ERROR`, `WARN`, `INFO`, `DEBUG`, `TRACE`
-# https://docs.meilisearch.com/learn/configuration/instance_options.html#log-level
+# https://www.meilisearch.com/docs/learn/configuration/instance_options#log-level

 # max_indexing_memory = "2 GiB"
 # Sets the maximum amount of RAM Meilisearch can use when indexing.
-# https://docs.meilisearch.com/learn/configuration/instance_options.html#max-indexing-memory
+# https://www.meilisearch.com/docs/learn/configuration/instance_options#max-indexing-memory

 # max_indexing_threads = 4
 # Sets the maximum number of threads Meilisearch can use during indexing.
-# https://docs.meilisearch.com/learn/configuration/instance_options.html#max-indexing-threads
+# https://www.meilisearch.com/docs/learn/configuration/instance_options#max-indexing-threads

 #############
 ### DUMPS ###
@ -45,19 +45,19 @@ log_level = "INFO"

 dump_dir = "dumps/"
 # Sets the directory where Meilisearch will create dump files.
-# https://docs.meilisearch.com/learn/configuration/instance_options.html#dump-directory
+# https://www.meilisearch.com/docs/learn/configuration/instance_options#dump-directory

 # import_dump = "./path/to/my/file.dump"
 # Imports the dump file located at the specified path. Path must point to a .dump file.
-# https://docs.meilisearch.com/learn/configuration/instance_options.html#import-dump
+# https://www.meilisearch.com/docs/learn/configuration/instance_options#import-dump

 ignore_missing_dump = false
 # Prevents Meilisearch from throwing an error when `import_dump` does not point to a valid dump file.
-# https://docs.meilisearch.com/learn/configuration/instance_options.html#ignore-missing-dump
+# https://www.meilisearch.com/docs/learn/configuration/instance_options#ignore-missing-dump

 ignore_dump_if_db_exists = false
 # Prevents a Meilisearch instance with an existing database from throwing an error when using `import_dump`.
-# https://docs.meilisearch.com/learn/configuration/instance_options.html#ignore-dump-if-db-exists
+# https://www.meilisearch.com/docs/learn/configuration/instance_options#ignore-dump-if-db-exists


 #################
@ -68,23 +68,23 @@ schedule_snapshot = false
 # Enables scheduled snapshots when true, disable when false (the default).
 # If the value is given as an integer, then enables the scheduled snapshot with the passed value as the interval
 # between each snapshot, in seconds.
-# https://docs.meilisearch.com/learn/configuration/instance_options.html#schedule-snapshot-creation
+# https://www.meilisearch.com/docs/learn/configuration/instance_options#schedule-snapshot-creation

 snapshot_dir = "snapshots/"
 # Sets the directory where Meilisearch will store snapshots.
-# https://docs.meilisearch.com/learn/configuration/instance_options.html#snapshot-destination
+# https://www.meilisearch.com/docs/learn/configuration/instance_options#snapshot-destination

 # import_snapshot = "./path/to/my/snapshot"
 # Launches Meilisearch after importing a previously-generated snapshot at the given filepath.
-# https://docs.meilisearch.com/learn/configuration/instance_options.html#import-snapshot
+# https://www.meilisearch.com/docs/learn/configuration/instance_options#import-snapshot

 ignore_missing_snapshot = false
 # Prevents a Meilisearch instance from throwing an error when `import_snapshot` does not point to a valid snapshot file.
-# https://docs.meilisearch.com/learn/configuration/instance_options.html#ignore-missing-snapshot
+# https://www.meilisearch.com/docs/learn/configuration/instance_options#ignore-missing-snapshot

 ignore_snapshot_if_db_exists = false
 # Prevents a Meilisearch instance with an existing database from throwing an error when using `import_snapshot`.
-# https://docs.meilisearch.com/learn/configuration/instance_options.html#ignore-snapshot-if-db-exists
+# https://www.meilisearch.com/docs/learn/configuration/instance_options#ignore-snapshot-if-db-exists


 ###########
@ -93,31 +93,31 @@ ignore_snapshot_if_db_exists = false

 # ssl_auth_path = "./path/to/root"
 # Enables client authentication in the specified path.
-# https://docs.meilisearch.com/learn/configuration/instance_options.html#ssl-authentication-path
+# https://www.meilisearch.com/docs/learn/configuration/instance_options#ssl-authentication-path

 # ssl_cert_path = "./path/to/certfile"
 # Sets the server's SSL certificates.
-# https://docs.meilisearch.com/learn/configuration/instance_options.html#ssl-certificates-path
+# https://www.meilisearch.com/docs/learn/configuration/instance_options#ssl-certificates-path

 # ssl_key_path = "./path/to/private-key"
 # Sets the server's SSL key files.
-# https://docs.meilisearch.com/learn/configuration/instance_options.html#ssl-key-path
+# https://www.meilisearch.com/docs/learn/configuration/instance_options#ssl-key-path

 # ssl_ocsp_path = "./path/to/ocsp-file"
 # Sets the server's OCSP file.
-# https://docs.meilisearch.com/learn/configuration/instance_options.html#ssl-ocsp-path
+# https://www.meilisearch.com/docs/learn/configuration/instance_options#ssl-ocsp-path

 ssl_require_auth = false
 # Makes SSL authentication mandatory.
-# https://docs.meilisearch.com/learn/configuration/instance_options.html#ssl-require-auth
+# https://www.meilisearch.com/docs/learn/configuration/instance_options#ssl-require-auth

 ssl_resumption = false
 # Activates SSL session resumption.
-# https://docs.meilisearch.com/learn/configuration/instance_options.html#ssl-resumption
+# https://www.meilisearch.com/docs/learn/configuration/instance_options#ssl-resumption

 ssl_tickets = false
 # Activates SSL tickets.
-# https://docs.meilisearch.com/learn/configuration/instance_options.html#ssl-tickets
+# https://www.meilisearch.com/docs/learn/configuration/instance_options#ssl-tickets

 #############################
 ### Experimental features ###
@ -127,4 +127,5 @@ experimental_enable_metrics = false
 # Experimental metrics feature. For more information, see: <https://github.com/meilisearch/meilisearch/discussions/3518>
 # Enables the Prometheus metrics on the `GET /metrics` endpoint.

-
+experimental_reduce_indexing_memory_usage = false
+# Experimental RAM reduction during indexing, do not use in production, see: <https://github.com/meilisearch/product/discussions/652>
--- a/dump/src/lib.rs
+++ b/dump/src/lib.rs
@ -101,6 +101,9 @@ pub enum KindDump {
        documents_ids: Vec<String>,
    },
    DocumentClear,
+    DocumentDeletionByFilter {
+        filter: serde_json::Value,
+    },
    Settings {
        settings: Box<meilisearch_types::settings::Settings<Unchecked>>,
        is_deletion: bool,
@ -166,6 +169,9 @@ impl From<KindWithContent> for KindDump {
            KindWithContent::DocumentDeletion { documents_ids, .. } => {
                KindDump::DocumentDeletion { documents_ids }
            }
+            KindWithContent::DocumentDeletionByFilter { filter_expr, .. } => {
+                KindDump::DocumentDeletionByFilter { filter: filter_expr }
+            }
            KindWithContent::DocumentClear { .. } => KindDump::DocumentClear,
            KindWithContent::SettingsUpdate {
                new_settings,
--- a/index-scheduler/src/autobatcher.rs
+++ b/index-scheduler/src/autobatcher.rs
@ -25,6 +25,7 @@ enum AutobatchKind {
        primary_key: Option<String>,
    },
    DocumentDeletion,
+    DocumentDeletionByFilter,
    DocumentClear,
    Settings {
        allow_index_creation: bool,
@ -64,6 +65,9 @@ impl From<KindWithContent> for AutobatchKind {
            } => AutobatchKind::DocumentImport { method, allow_index_creation, primary_key },
            KindWithContent::DocumentDeletion { .. } => AutobatchKind::DocumentDeletion,
            KindWithContent::DocumentClear { .. } => AutobatchKind::DocumentClear,
+            KindWithContent::DocumentDeletionByFilter { .. } => {
+                AutobatchKind::DocumentDeletionByFilter
+            }
            KindWithContent::SettingsUpdate { allow_index_creation, is_deletion, .. } => {
                AutobatchKind::Settings {
                    allow_index_creation: allow_index_creation && !is_deletion,
@ -97,6 +101,9 @@ pub enum BatchKind {
    DocumentDeletion {
        deletion_ids: Vec<TaskId>,
    },
+    DocumentDeletionByFilter {
+        id: TaskId,
+    },
    ClearAndSettings {
        other: Vec<TaskId>,
        allow_index_creation: bool,
@ -195,6 +202,9 @@ impl BatchKind {
            K::DocumentDeletion => {
                (Continue(BatchKind::DocumentDeletion { deletion_ids: vec![task_id] }), false)
            }
+            K::DocumentDeletionByFilter => {
+                (Break(BatchKind::DocumentDeletionByFilter { id: task_id }), false)
+            }
            K::Settings { allow_index_creation } => (
                Continue(BatchKind::Settings { allow_index_creation, settings_ids: vec![task_id] }),
                allow_index_creation,
@ -212,7 +222,7 @@ impl BatchKind {

        match (self, kind) {
            // We don't batch any of these operations
-            (this, K::IndexCreation | K::IndexUpdate | K::IndexSwap) => Break(this),
+            (this, K::IndexCreation | K::IndexUpdate | K::IndexSwap | K::DocumentDeletionByFilter) => Break(this),
            // We must not batch tasks that don't have the same index creation rights if the index doesn't already exists.
            (this, kind) if !index_already_exists && this.allow_index_creation() == Some(false) && kind.allow_index_creation() == Some(true) => {
                Break(this)
@ -471,7 +481,8 @@ impl BatchKind {
                BatchKind::IndexCreation { .. }
                | BatchKind::IndexDeletion { .. }
                | BatchKind::IndexUpdate { .. }
-                | BatchKind::IndexSwap { .. },
+                | BatchKind::IndexSwap { .. }
+                | BatchKind::DocumentDeletionByFilter { .. },
                _,
            ) => {
                unreachable!()
--- a/index-scheduler/src/batch.rs
+++ b/index-scheduler/src/batch.rs
@ -24,13 +24,15 @@ use std::io::BufWriter;

 use dump::IndexMetadata;
 use log::{debug, error, info};
+use meilisearch_types::error::Code;
 use meilisearch_types::heed::{RoTxn, RwTxn};
 use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader};
 use meilisearch_types::milli::heed::CompactionOption;
 use meilisearch_types::milli::update::{
-    DocumentDeletionResult, IndexDocumentsConfig, IndexDocumentsMethod, Settings as MilliSettings,
+    DeleteDocuments, DocumentDeletionResult, IndexDocumentsConfig, IndexDocumentsMethod,
+    Settings as MilliSettings,
 };
-use meilisearch_types::milli::{self, BEU32};
+use meilisearch_types::milli::{self, Filter, BEU32};
 use meilisearch_types::settings::{apply_settings_to_builder, Settings, Unchecked};
 use meilisearch_types::tasks::{Details, IndexSwap, Kind, KindWithContent, Status, Task};
 use meilisearch_types::{compression, Index, VERSION_FILE_NAME};
@ -65,6 +67,10 @@ pub(crate) enum Batch {
        op: IndexOperation,
        must_create_index: bool,
    },
+    IndexDocumentDeletionByFilter {
+        index_uid: String,
+        task: Task,
+    },
    IndexCreation {
        index_uid: String,
        primary_key: Option<String>,
@ -149,6 +155,7 @@ impl Batch {
            | Batch::TaskDeletion(task)
            | Batch::Dump(task)
            | Batch::IndexCreation { task, .. }
+            | Batch::IndexDocumentDeletionByFilter { task, .. }
            | Batch::IndexUpdate { task, .. } => vec![task.uid],
            Batch::SnapshotCreation(tasks) | Batch::IndexDeletion { tasks, .. } => {
                tasks.iter().map(|task| task.uid).collect()
@ -187,7 +194,8 @@ impl Batch {
            IndexOperation { op, .. } => Some(op.index_uid()),
            IndexCreation { index_uid, .. }
            | IndexUpdate { index_uid, .. }
-            | IndexDeletion { index_uid, .. } => Some(index_uid),
+            | IndexDeletion { index_uid, .. }
+            | IndexDocumentDeletionByFilter { index_uid, .. } => Some(index_uid),
        }
    }
 }
@ -227,6 +235,18 @@ impl IndexScheduler {
                },
                must_create_index,
            })),
+            BatchKind::DocumentDeletionByFilter { id } => {
+                let task = self.get_task(rtxn, id)?.ok_or(Error::CorruptedTaskQueue)?;
+                match &task.kind {
+                    KindWithContent::DocumentDeletionByFilter { index_uid, .. } => {
+                        Ok(Some(Batch::IndexDocumentDeletionByFilter {
+                            index_uid: index_uid.clone(),
+                            task,
+                        }))
+                    }
+                    _ => unreachable!(),
+                }
+            }
            BatchKind::DocumentOperation { method, operation_ids, .. } => {
                let tasks = self.get_existing_tasks(rtxn, operation_ids)?;
                let primary_key = tasks
@ -867,6 +887,51 @@ impl IndexScheduler {

                Ok(tasks)
            }
+            Batch::IndexDocumentDeletionByFilter { mut task, index_uid: _ } => {
+                let (index_uid, filter) =
+                    if let KindWithContent::DocumentDeletionByFilter { index_uid, filter_expr } =
+                        &task.kind
+                    {
+                        (index_uid, filter_expr)
+                    } else {
+                        unreachable!()
+                    };
+                let index = {
+                    let rtxn = self.env.read_txn()?;
+                    self.index_mapper.index(&rtxn, index_uid)?
+                };
+                let deleted_documents = delete_document_by_filter(filter, index);
+                let original_filter = if let Some(Details::DocumentDeletionByFilter {
+                    original_filter,
+                    deleted_documents: _,
+                }) = task.details
+                {
+                    original_filter
+                } else {
+                    // In the case of a `documentDeleteByFilter` the details MUST be set
+                    unreachable!();
+                };
+
+                match deleted_documents {
+                    Ok(deleted_documents) => {
+                        task.status = Status::Succeeded;
+                        task.details = Some(Details::DocumentDeletionByFilter {
+                            original_filter,
+                            deleted_documents: Some(deleted_documents),
+                        });
+                    }
+                    Err(e) => {
+                        task.status = Status::Failed;
+                        task.details = Some(Details::DocumentDeletionByFilter {
+                            original_filter,
+                            deleted_documents: Some(0),
+                        });
+                        task.error = Some(e.into());
+                    }
+                }
+
+                Ok(vec![task])
+            }
            Batch::IndexCreation { index_uid, primary_key, task } => {
                let wtxn = self.env.write_txn()?;
                if self.index_mapper.exists(&wtxn, &index_uid)? {
@ -1421,3 +1486,25 @@ impl IndexScheduler {
        Ok(content_files_to_delete)
    }
 }
+
+fn delete_document_by_filter(filter: &serde_json::Value, index: Index) -> Result<u64> {
+    let filter = Filter::from_json(filter)?;
+    Ok(if let Some(filter) = filter {
+        let mut wtxn = index.write_txn()?;
+
+        let candidates = filter.evaluate(&wtxn, &index).map_err(|err| match err {
+            milli::Error::UserError(milli::UserError::InvalidFilter(_)) => {
+                Error::from(err).with_custom_error_code(Code::InvalidDocumentFilter)
+            }
+            e => e.into(),
+        })?;
+        let mut delete_operation = DeleteDocuments::new(&mut wtxn, &index)?;
+        delete_operation.delete_documents(&candidates);
+        let deleted_documents =
+            delete_operation.execute().map(|result| result.deleted_documents)?;
+        wtxn.commit()?;
+        deleted_documents
+    } else {
+        0
+    })
+}
--- a/index-scheduler/src/error.rs
+++ b/index-scheduler/src/error.rs
@ -46,6 +46,8 @@ impl From<DateField> for Code {
 #[allow(clippy::large_enum_variant)]
 #[derive(Error, Debug)]
 pub enum Error {
+    #[error("{1}")]
+    WithCustomErrorCode(Code, Box<Self>),
    #[error("Index `{0}` not found.")]
    IndexNotFound(String),
    #[error("Index `{0}` already exists.")]
@ -134,11 +136,59 @@ pub enum Error {
    TaskDatabaseUpdate(Box<Self>),
    #[error(transparent)]
    HeedTransaction(heed::Error),
+
+    #[cfg(test)]
+    #[error("Planned failure for tests.")]
+    PlannedFailure,
+}
+
+impl Error {
+    pub fn is_recoverable(&self) -> bool {
+        match self {
+            Error::IndexNotFound(_)
+            | Error::WithCustomErrorCode(_, _)
+            | Error::IndexAlreadyExists(_)
+            | Error::SwapDuplicateIndexFound(_)
+            | Error::SwapDuplicateIndexesFound(_)
+            | Error::SwapIndexNotFound(_)
+            | Error::NoSpaceLeftInTaskQueue
+            | Error::SwapIndexesNotFound(_)
+            | Error::CorruptedDump
+            | Error::InvalidTaskDate { .. }
+            | Error::InvalidTaskUids { .. }
+            | Error::InvalidTaskStatuses { .. }
+            | Error::InvalidTaskTypes { .. }
+            | Error::InvalidTaskCanceledBy { .. }
+            | Error::InvalidIndexUid { .. }
+            | Error::TaskNotFound(_)
+            | Error::TaskDeletionWithEmptyQuery
+            | Error::TaskCancelationWithEmptyQuery
+            | Error::Dump(_)
+            | Error::Heed(_)
+            | Error::Milli(_)
+            | Error::ProcessBatchPanicked
+            | Error::FileStore(_)
+            | Error::IoError(_)
+            | Error::Persist(_)
+            | Error::Anyhow(_) => true,
+            Error::CreateBatch(_)
+            | Error::CorruptedTaskQueue
+            | Error::TaskDatabaseUpdate(_)
+            | Error::HeedTransaction(_) => false,
+            #[cfg(test)]
+            Error::PlannedFailure => false,
+        }
+    }
+
+    pub fn with_custom_error_code(self, code: Code) -> Self {
+        Self::WithCustomErrorCode(code, Box::new(self))
+    }
 }

 impl ErrorCode for Error {
    fn error_code(&self) -> Code {
        match self {
+            Error::WithCustomErrorCode(code, _) => *code,
            Error::IndexNotFound(_) => Code::IndexNotFound,
            Error::IndexAlreadyExists(_) => Code::IndexAlreadyExists,
            Error::SwapDuplicateIndexesFound(_) => Code::InvalidSwapDuplicateIndexFound,
@ -171,6 +221,9 @@ impl ErrorCode for Error {
            Error::CorruptedDump => Code::Internal,
            Error::TaskDatabaseUpdate(_) => Code::Internal,
            Error::CreateBatch(_) => Code::Internal,
+
+            #[cfg(test)]
+            Error::PlannedFailure => Code::Internal,
        }
    }
 }
--- a/index-scheduler/src/index_mapper/index_map.rs
+++ b/index-scheduler/src/index_mapper/index_map.rs
@ -5,6 +5,7 @@ use std::collections::BTreeMap;
 use std::path::Path;
 use std::time::Duration;

+use meilisearch_types::heed::flags::Flags;
 use meilisearch_types::heed::{EnvClosingEvent, EnvOpenOptions};
 use meilisearch_types::milli::Index;
 use time::OffsetDateTime;
@ -53,6 +54,7 @@ pub struct IndexMap {
 pub struct ClosingIndex {
    uuid: Uuid,
    closing_event: EnvClosingEvent,
+    enable_mdb_writemap: bool,
    map_size: usize,
    generation: usize,
 }
@ -68,6 +70,7 @@ impl ClosingIndex {
    pub fn wait_timeout(self, timeout: Duration) -> Option<ReopenableIndex> {
        self.closing_event.wait_timeout(timeout).then_some(ReopenableIndex {
            uuid: self.uuid,
+            enable_mdb_writemap: self.enable_mdb_writemap,
            map_size: self.map_size,
            generation: self.generation,
        })
@ -76,6 +79,7 @@ impl ClosingIndex {

 pub struct ReopenableIndex {
    uuid: Uuid,
+    enable_mdb_writemap: bool,
    map_size: usize,
    generation: usize,
 }
@ -103,7 +107,7 @@ impl ReopenableIndex {
                return Ok(());
            }
            map.unavailable.remove(&self.uuid);
-            map.create(&self.uuid, path, None, self.map_size)?;
+            map.create(&self.uuid, path, None, self.enable_mdb_writemap, self.map_size)?;
        }
        Ok(())
    }
@ -170,16 +174,17 @@ impl IndexMap {
        uuid: &Uuid,
        path: &Path,
        date: Option<(OffsetDateTime, OffsetDateTime)>,
+        enable_mdb_writemap: bool,
        map_size: usize,
    ) -> Result<Index> {
        if !matches!(self.get_unavailable(uuid), Missing) {
            panic!("Attempt to open an index that was unavailable");
        }
-        let index = create_or_open_index(path, date, map_size)?;
+        let index = create_or_open_index(path, date, enable_mdb_writemap, map_size)?;
        match self.available.insert(*uuid, index.clone()) {
            InsertionOutcome::InsertedNew => (),
            InsertionOutcome::Evicted(evicted_uuid, evicted_index) => {
-                self.close(evicted_uuid, evicted_index, 0);
+                self.close(evicted_uuid, evicted_index, enable_mdb_writemap, 0);
            }
            InsertionOutcome::Replaced(_) => {
                panic!("Attempt to open an index that was already opened")
@ -212,17 +217,30 @@ impl IndexMap {
    /// | Closing         | Closing       |
    /// | Available       | Closing       |
    ///
-    pub fn close_for_resize(&mut self, uuid: &Uuid, map_size_growth: usize) {
+    pub fn close_for_resize(
+        &mut self,
+        uuid: &Uuid,
+        enable_mdb_writemap: bool,
+        map_size_growth: usize,
+    ) {
        let Some(index) = self.available.remove(uuid) else { return; };
-        self.close(*uuid, index, map_size_growth);
+        self.close(*uuid, index, enable_mdb_writemap, map_size_growth);
    }

-    fn close(&mut self, uuid: Uuid, index: Index, map_size_growth: usize) {
+    fn close(
+        &mut self,
+        uuid: Uuid,
+        index: Index,
+        enable_mdb_writemap: bool,
+        map_size_growth: usize,
+    ) {
        let map_size = index.map_size().unwrap_or(DEFAULT_MAP_SIZE) + map_size_growth;
        let closing_event = index.prepare_for_closing();
        let generation = self.next_generation();
-        self.unavailable
-            .insert(uuid, Some(ClosingIndex { uuid, closing_event, map_size, generation }));
+        self.unavailable.insert(
+            uuid,
+            Some(ClosingIndex { uuid, closing_event, enable_mdb_writemap, map_size, generation }),
+        );
    }

    /// Attempts to delete and index.
@ -282,11 +300,15 @@ impl IndexMap {
 fn create_or_open_index(
    path: &Path,
    date: Option<(OffsetDateTime, OffsetDateTime)>,
+    enable_mdb_writemap: bool,
    map_size: usize,
 ) -> Result<Index> {
    let mut options = EnvOpenOptions::new();
    options.map_size(clamp_to_page_size(map_size));
    options.max_readers(1024);
+    if enable_mdb_writemap {
+        unsafe { options.flag(Flags::MdbWriteMap) };
+    }

    if let Some((created, updated)) = date {
        Ok(Index::new_with_creation_dates(options, path, created, updated)?)
--- a/index-scheduler/src/index_mapper/mod.rs
+++ b/index-scheduler/src/index_mapper/mod.rs
@ -66,6 +66,8 @@ pub struct IndexMapper {
    index_base_map_size: usize,
    /// The quantity by which the map size of an index is incremented upon reopening, in bytes.
    index_growth_amount: usize,
+    /// Whether we open a meilisearch index with the MDB_WRITEMAP option or not.
+    enable_mdb_writemap: bool,
    pub indexer_config: Arc<IndexerConfig>,
 }

@ -123,15 +125,22 @@ impl IndexMapper {
        index_base_map_size: usize,
        index_growth_amount: usize,
        index_count: usize,
+        enable_mdb_writemap: bool,
        indexer_config: IndexerConfig,
    ) -> Result<Self> {
+        let mut wtxn = env.write_txn()?;
+        let index_mapping = env.create_database(&mut wtxn, Some(INDEX_MAPPING))?;
+        let index_stats = env.create_database(&mut wtxn, Some(INDEX_STATS))?;
+        wtxn.commit()?;
+
        Ok(Self {
            index_map: Arc::new(RwLock::new(IndexMap::new(index_count))),
-            index_mapping: env.create_database(Some(INDEX_MAPPING))?,
-            index_stats: env.create_database(Some(INDEX_STATS))?,
+            index_mapping,
+            index_stats,
            base_path,
            index_base_map_size,
            index_growth_amount,
+            enable_mdb_writemap,
            indexer_config: Arc::new(indexer_config),
        })
    }
@ -162,6 +171,7 @@ impl IndexMapper {
                    &uuid,
                    &index_path,
                    date,
+                    self.enable_mdb_writemap,
                    self.index_base_map_size,
                )?;

@ -273,7 +283,11 @@ impl IndexMapper {
            .ok_or_else(|| Error::IndexNotFound(name.to_string()))?;

        // We remove the index from the in-memory index map.
-        self.index_map.write().unwrap().close_for_resize(&uuid, self.index_growth_amount);
+        self.index_map.write().unwrap().close_for_resize(
+            &uuid,
+            self.enable_mdb_writemap,
+            self.index_growth_amount,
+        );

        Ok(())
    }
@ -338,6 +352,7 @@ impl IndexMapper {
                                &uuid,
                                &index_path,
                                None,
+                                self.enable_mdb_writemap,
                                self.index_base_map_size,
                            )?;
                        }
--- a/index-scheduler/src/insta_snapshot.rs
+++ b/index-scheduler/src/insta_snapshot.rs
@ -184,6 +184,9 @@ fn snapshot_details(d: &Details) -> String {
            provided_ids: received_document_ids,
            deleted_documents,
        } => format!("{{ received_document_ids: {received_document_ids}, deleted_documents: {deleted_documents:?} }}"),
+        Details::DocumentDeletionByFilter { original_filter, deleted_documents } => format!(
+           "{{ original_filter: {original_filter}, deleted_documents: {deleted_documents:?} }}"
+        ),
        Details::ClearAll { deleted_documents } => {
            format!("{{ deleted_documents: {deleted_documents:?} }}")
        },
--- a/index-scheduler/src/lib.rs
+++ b/index-scheduler/src/lib.rs
@ -233,6 +233,8 @@ pub struct IndexSchedulerOptions {
    pub task_db_size: usize,
    /// The size, in bytes, with which a meilisearch index is opened the first time of each meilisearch index.
    pub index_base_map_size: usize,
+    /// Whether we open a meilisearch index with the MDB_WRITEMAP option or not.
+    pub enable_mdb_writemap: bool,
    /// The size, in bytes, by which the map size of an index is increased when it resized due to being full.
    pub index_growth_amount: usize,
    /// The number of indexes that can be concurrently opened in memory.
@ -374,6 +376,11 @@ impl IndexScheduler {
        std::fs::create_dir_all(&options.indexes_path)?;
        std::fs::create_dir_all(&options.dumps_path)?;

+        if cfg!(windows) && options.enable_mdb_writemap {
+            // programmer error if this happens: in normal use passing the option on Windows is an error in main
+            panic!("Windows doesn't support the MDB_WRITEMAP LMDB option");
+        }
+
        let task_db_size = clamp_to_page_size(options.task_db_size);
        let budget = if options.indexer_config.skip_index_budget {
            IndexBudget {
@ -396,25 +403,37 @@ impl IndexScheduler {
            .open(options.tasks_path)?;
        let file_store = FileStore::new(&options.update_file_path)?;

+        let mut wtxn = env.write_txn()?;
+        let all_tasks = env.create_database(&mut wtxn, Some(db_name::ALL_TASKS))?;
+        let status = env.create_database(&mut wtxn, Some(db_name::STATUS))?;
+        let kind = env.create_database(&mut wtxn, Some(db_name::KIND))?;
+        let index_tasks = env.create_database(&mut wtxn, Some(db_name::INDEX_TASKS))?;
+        let canceled_by = env.create_database(&mut wtxn, Some(db_name::CANCELED_BY))?;
+        let enqueued_at = env.create_database(&mut wtxn, Some(db_name::ENQUEUED_AT))?;
+        let started_at = env.create_database(&mut wtxn, Some(db_name::STARTED_AT))?;
+        let finished_at = env.create_database(&mut wtxn, Some(db_name::FINISHED_AT))?;
+        wtxn.commit()?;
+
        // allow unreachable_code to get rids of the warning in the case of a test build.
        let this = Self {
            must_stop_processing: MustStopProcessing::default(),
            processing_tasks: Arc::new(RwLock::new(ProcessingTasks::new())),
            file_store,
-            all_tasks: env.create_database(Some(db_name::ALL_TASKS))?,
-            status: env.create_database(Some(db_name::STATUS))?,
-            kind: env.create_database(Some(db_name::KIND))?,
-            index_tasks: env.create_database(Some(db_name::INDEX_TASKS))?,
-            canceled_by: env.create_database(Some(db_name::CANCELED_BY))?,
-            enqueued_at: env.create_database(Some(db_name::ENQUEUED_AT))?,
-            started_at: env.create_database(Some(db_name::STARTED_AT))?,
-            finished_at: env.create_database(Some(db_name::FINISHED_AT))?,
+            all_tasks,
+            status,
+            kind,
+            index_tasks,
+            canceled_by,
+            enqueued_at,
+            started_at,
+            finished_at,
            index_mapper: IndexMapper::new(
                &env,
                options.indexes_path,
                budget.map_size,
                options.index_growth_amount,
                budget.index_count,
+                options.enable_mdb_writemap,
                options.indexer_config,
            )?,
            env,
@ -540,13 +559,7 @@ impl IndexScheduler {
                        Err(e) => {
                            log::error!("{}", e);
                            // Wait one second when an irrecoverable error occurs.
-                            if matches!(
-                                e,
-                                Error::CorruptedTaskQueue
-                                    | Error::TaskDatabaseUpdate(_)
-                                    | Error::HeedTransaction(_)
-                                    | Error::CreateBatch(_)
-                            ) {
+                            if !e.is_recoverable() {
                                std::thread::sleep(Duration::from_secs(1));
                            }
                        }
@ -1270,6 +1283,12 @@ impl<'a> Dump<'a> {
                    documents_ids,
                    index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
                },
+                KindDump::DocumentDeletionByFilter { filter } => {
+                    KindWithContent::DocumentDeletionByFilter {
+                        filter_expr: filter,
+                        index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
+                    }
+                }
                KindDump::DocumentClear => KindWithContent::DocumentClear {
                    index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
                },
@ -1471,6 +1490,7 @@ mod tests {
                dumps_path: tempdir.path().join("dumps"),
                task_db_size: 1000 * 1000, // 1 MB, we don't use MiB on purpose.
                index_base_map_size: 1000 * 1000, // 1 MB, we don't use MiB on purpose.
+                enable_mdb_writemap: false,
                index_growth_amount: 1000 * 1000, // 1 MB
                index_count: 5,
                indexer_config,
@ -1500,7 +1520,7 @@ mod tests {
            (index_scheduler, index_scheduler_handle)
        }

-        /// Return a [`CorruptedTaskQueue`](Error::CorruptedTaskQueue) error if a failure is planned
+        /// Return a [`PlannedFailure`](Error::PlannedFailure) error if a failure is planned
        /// for the given location and current run loop iteration.
        pub fn maybe_fail(&self, location: FailureLocation) -> Result<()> {
            if self.planned_failures.contains(&(*self.run_loop_iteration.read().unwrap(), location))
@ -1509,7 +1529,7 @@ mod tests {
                    FailureLocation::PanicInsideProcessBatch => {
                        panic!("simulated panic")
                    }
-                    _ => Err(Error::CorruptedTaskQueue),
+                    _ => Err(Error::PlannedFailure),
                }
            } else {
                Ok(())
--- a/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_addition/document_addition_failed.snap
+++ b/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_addition/document_addition_failed.snap
@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
 []
 ----------------------------------------------------------------------
 ### All Tasks:
-0 {uid: 0, status: failed, error: ResponseError { code: 200, message: "Corrupted task queue.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { received_documents: 1, indexed_documents: Some(0) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
+0 {uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { received_documents: 1, indexed_documents: Some(0) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
 ----------------------------------------------------------------------
 ### Status:
 enqueued []
--- a/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_index_creation/index_creation_failed.snap
+++ b/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_index_creation/index_creation_failed.snap
@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
 []
 ----------------------------------------------------------------------
 ### All Tasks:
-0 {uid: 0, status: failed, error: ResponseError { code: 200, message: "Corrupted task queue.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
+0 {uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
 ----------------------------------------------------------------------
 ### Status:
 enqueued []
--- a/index-scheduler/src/snapshots/lib.rs/query_tasks_simple/end.snap
+++ b/index-scheduler/src/snapshots/lib.rs/query_tasks_simple/end.snap
@ -8,7 +8,7 @@ source: index-scheduler/src/lib.rs
 ### All Tasks:
 0 {uid: 0, status: succeeded, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
 1 {uid: 1, status: succeeded, details: { primary_key: Some("sheep") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("sheep") }}
-2 {uid: 2, status: failed, error: ResponseError { code: 200, message: "Corrupted task queue.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { primary_key: Some("fish") }, kind: IndexCreation { index_uid: "whalo", primary_key: Some("fish") }}
+2 {uid: 2, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { primary_key: Some("fish") }, kind: IndexCreation { index_uid: "whalo", primary_key: Some("fish") }}
 ----------------------------------------------------------------------
 ### Status:
 enqueued []
--- a/index-scheduler/src/utils.rs
+++ b/index-scheduler/src/utils.rs
@ -239,6 +239,7 @@ pub fn swap_index_uid_in_task(task: &mut Task, swap: (&str, &str)) {
    match &mut task.kind {
        K::DocumentAdditionOrUpdate { index_uid, .. } => index_uids.push(index_uid),
        K::DocumentDeletion { index_uid, .. } => index_uids.push(index_uid),
+        K::DocumentDeletionByFilter { index_uid, .. } => index_uids.push(index_uid),
        K::DocumentClear { index_uid } => index_uids.push(index_uid),
        K::SettingsUpdate { index_uid, .. } => index_uids.push(index_uid),
        K::IndexDeletion { index_uid } => index_uids.push(index_uid),
@ -464,6 +465,29 @@ impl IndexScheduler {
                            }
                        }
                    }
+                    Details::DocumentDeletionByFilter { deleted_documents, original_filter: _ } => {
+                        assert_eq!(kind.as_kind(), Kind::DocumentDeletionByFilter);
+                        let (index_uid, _) = if let KindWithContent::DocumentDeletionByFilter {
+                            ref index_uid,
+                            ref filter_expr,
+                        } = kind
+                        {
+                            (index_uid, filter_expr)
+                        } else {
+                            unreachable!()
+                        };
+                        assert_eq!(&task_index_uid.unwrap(), index_uid);
+
+                        match status {
+                            Status::Enqueued | Status::Processing => (),
+                            Status::Succeeded => {
+                                assert!(deleted_documents.is_some());
+                            }
+                            Status::Failed | Status::Canceled => {
+                                assert!(deleted_documents == Some(0));
+                            }
+                        }
+                    }
                    Details::ClearAll { deleted_documents } => {
                        assert!(matches!(
                            kind.as_kind(),
--- a/meilisearch-auth/src/store.rs
+++ b/meilisearch-auth/src/store.rs
@ -55,9 +55,11 @@ impl HeedAuthStore {
        let path = path.as_ref().join(AUTH_DB_PATH);
        create_dir_all(&path)?;
        let env = Arc::new(open_auth_store_env(path.as_ref())?);
-        let keys = env.create_database(Some(KEY_DB_NAME))?;
+        let mut wtxn = env.write_txn()?;
+        let keys = env.create_database(&mut wtxn, Some(KEY_DB_NAME))?;
        let action_keyid_index_expiration =
-            env.create_database(Some(KEY_ID_ACTION_INDEX_EXPIRATION_DB_NAME))?;
+            env.create_database(&mut wtxn, Some(KEY_ID_ACTION_INDEX_EXPIRATION_DB_NAME))?;
+        wtxn.commit()?;
        Ok(Self { env, keys, action_keyid_index_expiration, should_close_on_drop: true })
    }

--- a/meilisearch-types/Cargo.toml
+++ b/meilisearch-types/Cargo.toml
@ -22,7 +22,7 @@ file-store = { path = "../file-store" }
 flate2 = "1.0.25"
 fst = "0.4.7"
 memmap2 = "0.5.10"
-milli = { path = "../milli", default-features = false }
+milli = { path = "../milli" }
 roaring = { version = "0.10.1", features = ["serde"] }
 serde = { version = "1.0.160", features = ["derive"] }
 serde-cs = "0.2.4"
@ -40,7 +40,7 @@ meili-snap = { path = "../meili-snap" }

 [features]
 # all specialized tokenizations
-default = ["milli/default"]
+all-tokenizations = ["milli/all-tokenizations"]

 # chinese specialized tokenization
 chinese = ["milli/chinese"]
--- a/meilisearch-types/src/deserr/mod.rs
+++ b/meilisearch-types/src/deserr/mod.rs
@ -150,6 +150,7 @@ make_missing_field_convenience_builder!(MissingApiKeyActions, missing_api_key_ac
 make_missing_field_convenience_builder!(MissingApiKeyExpiresAt, missing_api_key_expires_at);
 make_missing_field_convenience_builder!(MissingApiKeyIndexes, missing_api_key_indexes);
 make_missing_field_convenience_builder!(MissingSwapIndexes, missing_swap_indexes);
+make_missing_field_convenience_builder!(MissingDocumentFilter, missing_document_filter);

 // Integrate a sub-error into a [`DeserrError`] by taking its error message but using
 // the default error code (C) from `Self`
--- a/meilisearch-types/src/error.rs
+++ b/meilisearch-types/src/error.rs
@ -214,6 +214,8 @@ InvalidApiKeyUid                      , InvalidRequest       , BAD_REQUEST ;
 InvalidContentType                    , InvalidRequest       , UNSUPPORTED_MEDIA_TYPE ;
 InvalidDocumentCsvDelimiter           , InvalidRequest       , BAD_REQUEST ;
 InvalidDocumentFields                 , InvalidRequest       , BAD_REQUEST ;
+MissingDocumentFilter                 , InvalidRequest       , BAD_REQUEST ;
+InvalidDocumentFilter                 , InvalidRequest       , BAD_REQUEST ;
 InvalidDocumentGeoField               , InvalidRequest       , BAD_REQUEST ;
 InvalidDocumentId                     , InvalidRequest       , BAD_REQUEST ;
 InvalidDocumentLimit                  , InvalidRequest       , BAD_REQUEST ;
@ -315,6 +317,7 @@ impl ErrorCode for milli::Error {
                    UserError::MaxDatabaseSizeReached => Code::DatabaseSizeLimitReached,
                    UserError::AttributeLimitReached => Code::MaxFieldsLimitExceeded,
                    UserError::InvalidFilter(_) => Code::InvalidSearchFilter,
+                    UserError::InvalidFilterExpression(..) => Code::InvalidSearchFilter,
                    UserError::MissingDocumentId { .. } => Code::MissingDocumentId,
                    UserError::InvalidDocumentId { .. } | UserError::TooManyDocumentIds { .. } => {
                        Code::InvalidDocumentId
--- a/meilisearch-types/src/tasks.rs
+++ b/meilisearch-types/src/tasks.rs
@ -49,6 +49,7 @@ impl Task {
            | IndexSwap { .. } => None,
            DocumentAdditionOrUpdate { index_uid, .. }
            | DocumentDeletion { index_uid, .. }
+            | DocumentDeletionByFilter { index_uid, .. }
            | DocumentClear { index_uid }
            | SettingsUpdate { index_uid, .. }
            | IndexCreation { index_uid, .. }
@ -67,6 +68,7 @@ impl Task {
        match self.kind {
            KindWithContent::DocumentAdditionOrUpdate { content_file, .. } => Some(content_file),
            KindWithContent::DocumentDeletion { .. }
+            | KindWithContent::DocumentDeletionByFilter { .. }
            | KindWithContent::DocumentClear { .. }
            | KindWithContent::SettingsUpdate { .. }
            | KindWithContent::IndexDeletion { .. }
@ -96,6 +98,10 @@ pub enum KindWithContent {
        index_uid: String,
        documents_ids: Vec<String>,
    },
+    DocumentDeletionByFilter {
+        index_uid: String,
+        filter_expr: serde_json::Value,
+    },
    DocumentClear {
        index_uid: String,
    },
@ -145,6 +151,7 @@ impl KindWithContent {
        match self {
            KindWithContent::DocumentAdditionOrUpdate { .. } => Kind::DocumentAdditionOrUpdate,
            KindWithContent::DocumentDeletion { .. } => Kind::DocumentDeletion,
+            KindWithContent::DocumentDeletionByFilter { .. } => Kind::DocumentDeletion,
            KindWithContent::DocumentClear { .. } => Kind::DocumentDeletion,
            KindWithContent::SettingsUpdate { .. } => Kind::SettingsUpdate,
            KindWithContent::IndexCreation { .. } => Kind::IndexCreation,
@ -168,6 +175,7 @@ impl KindWithContent {
            | TaskDeletion { .. } => vec![],
            DocumentAdditionOrUpdate { index_uid, .. }
            | DocumentDeletion { index_uid, .. }
+            | DocumentDeletionByFilter { index_uid, .. }
            | DocumentClear { index_uid }
            | SettingsUpdate { index_uid, .. }
            | IndexCreation { index_uid, .. }
@ -200,6 +208,12 @@ impl KindWithContent {
                    deleted_documents: None,
                })
            }
+            KindWithContent::DocumentDeletionByFilter { index_uid: _, filter_expr } => {
+                Some(Details::DocumentDeletionByFilter {
+                    original_filter: filter_expr.to_string(),
+                    deleted_documents: None,
+                })
+            }
            KindWithContent::DocumentClear { .. } | KindWithContent::IndexDeletion { .. } => {
                Some(Details::ClearAll { deleted_documents: None })
            }
@ -242,6 +256,12 @@ impl KindWithContent {
                    deleted_documents: Some(0),
                })
            }
+            KindWithContent::DocumentDeletionByFilter { index_uid: _, filter_expr } => {
+                Some(Details::DocumentDeletionByFilter {
+                    original_filter: filter_expr.to_string(),
+                    deleted_documents: Some(0),
+                })
+            }
            KindWithContent::DocumentClear { .. } => {
                Some(Details::ClearAll { deleted_documents: None })
            }
@ -282,6 +302,7 @@ impl From<&KindWithContent> for Option<Details> {
                })
            }
            KindWithContent::DocumentDeletion { .. } => None,
+            KindWithContent::DocumentDeletionByFilter { .. } => None,
            KindWithContent::DocumentClear { .. } => None,
            KindWithContent::SettingsUpdate { new_settings, .. } => {
                Some(Details::SettingsUpdate { settings: new_settings.clone() })
@ -374,6 +395,7 @@ impl std::error::Error for ParseTaskStatusError {}
 pub enum Kind {
    DocumentAdditionOrUpdate,
    DocumentDeletion,
+    DocumentDeletionByFilter,
    SettingsUpdate,
    IndexCreation,
    IndexDeletion,
@ -390,6 +412,7 @@ impl Kind {
        match self {
            Kind::DocumentAdditionOrUpdate
            | Kind::DocumentDeletion
+            | Kind::DocumentDeletionByFilter
            | Kind::SettingsUpdate
            | Kind::IndexCreation
            | Kind::IndexDeletion
@ -407,6 +430,7 @@ impl Display for Kind {
        match self {
            Kind::DocumentAdditionOrUpdate => write!(f, "documentAdditionOrUpdate"),
            Kind::DocumentDeletion => write!(f, "documentDeletion"),
+            Kind::DocumentDeletionByFilter => write!(f, "documentDeletionByFilter"),
            Kind::SettingsUpdate => write!(f, "settingsUpdate"),
            Kind::IndexCreation => write!(f, "indexCreation"),
            Kind::IndexDeletion => write!(f, "indexDeletion"),
@ -478,6 +502,7 @@ pub enum Details {
    SettingsUpdate { settings: Box<Settings<Unchecked>> },
    IndexInfo { primary_key: Option<String> },
    DocumentDeletion { provided_ids: usize, deleted_documents: Option<u64> },
+    DocumentDeletionByFilter { original_filter: String, deleted_documents: Option<u64> },
    ClearAll { deleted_documents: Option<u64> },
    TaskCancelation { matched_tasks: u64, canceled_tasks: Option<u64>, original_filter: String },
    TaskDeletion { matched_tasks: u64, deleted_tasks: Option<u64>, original_filter: String },
@ -493,6 +518,9 @@ impl Details {
                *indexed_documents = Some(0)
            }
            Self::DocumentDeletion { deleted_documents, .. } => *deleted_documents = Some(0),
+            Self::DocumentDeletionByFilter { deleted_documents, .. } => {
+                *deleted_documents = Some(0)
+            }
            Self::ClearAll { deleted_documents } => *deleted_documents = Some(0),
            Self::TaskCancelation { canceled_tasks, .. } => *canceled_tasks = Some(0),
            Self::TaskDeletion { deleted_tasks, .. } => *deleted_tasks = Some(0),
--- a/meilisearch/Cargo.toml
+++ b/meilisearch/Cargo.toml
@ -106,7 +106,7 @@ vergen = { version = "7.5.1", default-features = false, features = ["git"] }
 zip = { version = "0.6.4", optional = true }

 [features]
-default = ["analytics", "meilisearch-types/default", "mini-dashboard"]
+default = ["analytics", "meilisearch-types/all-tokenizations", "mini-dashboard"]
 analytics = ["segment"]
 mini-dashboard = ["actix-web-static-files", "static-files", "anyhow", "cargo_toml", "hex", "reqwest", "sha-1", "tempfile", "zip"]
 chinese = ["meilisearch-types/chinese"]
--- a/meilisearch/src/analytics/mock_analytics.rs
+++ b/meilisearch/src/analytics/mock_analytics.rs
@ -5,7 +5,7 @@ use actix_web::HttpRequest;
 use meilisearch_types::InstanceUid;
 use serde_json::Value;

-use super::{find_user_id, Analytics, DocumentDeletionKind};
+use super::{find_user_id, Analytics, DocumentDeletionKind, DocumentFetchKind};
 use crate::routes::indexes::documents::UpdateDocumentsQuery;
 use crate::routes::tasks::TasksFilterQuery;
 use crate::Opt;
@ -71,6 +71,8 @@ impl Analytics for MockAnalytics {
        _request: &HttpRequest,
    ) {
    }
+    fn get_fetch_documents(&self, _documents_query: &DocumentFetchKind, _request: &HttpRequest) {}
+    fn post_fetch_documents(&self, _documents_query: &DocumentFetchKind, _request: &HttpRequest) {}
    fn get_tasks(&self, _query: &TasksFilterQuery, _request: &HttpRequest) {}
    fn health_seen(&self, _request: &HttpRequest) {}
 }
--- a/meilisearch/src/analytics/mod.rs
+++ b/meilisearch/src/analytics/mod.rs
@ -64,6 +64,13 @@ pub enum DocumentDeletionKind {
    PerDocumentId,
    ClearAll,
    PerBatch,
+    PerFilter,
+}
+
+#[derive(Copy, Clone, Debug, PartialEq, Eq)]
+pub enum DocumentFetchKind {
+    PerDocumentId,
+    Normal { with_filter: bool, limit: usize, offset: usize },
 }

 pub trait Analytics: Sync + Send {
@ -89,6 +96,12 @@ pub trait Analytics: Sync + Send {
        request: &HttpRequest,
    );

+    // this method should be called to aggregate a fetch documents request
+    fn get_fetch_documents(&self, documents_query: &DocumentFetchKind, request: &HttpRequest);
+
+    // this method should be called to aggregate a fetch documents request
+    fn post_fetch_documents(&self, documents_query: &DocumentFetchKind, request: &HttpRequest);
+
    // this method should be called to aggregate a add documents request
    fn delete_documents(&self, kind: DocumentDeletionKind, request: &HttpRequest);

--- a/meilisearch/src/analytics/segment_analytics.rs
+++ b/meilisearch/src/analytics/segment_analytics.rs
@ -23,7 +23,9 @@ use tokio::select;
 use tokio::sync::mpsc::{self, Receiver, Sender};
 use uuid::Uuid;

-use super::{config_user_id_path, DocumentDeletionKind, MEILISEARCH_CONFIG_PATH};
+use super::{
+    config_user_id_path, DocumentDeletionKind, DocumentFetchKind, MEILISEARCH_CONFIG_PATH,
+};
 use crate::analytics::Analytics;
 use crate::option::{default_http_addr, IndexerOpts, MaxMemory, MaxThreads, ScheduleSnapshot};
 use crate::routes::indexes::documents::UpdateDocumentsQuery;
@ -72,6 +74,8 @@ pub enum AnalyticsMsg {
    AggregateAddDocuments(DocumentsAggregator),
    AggregateDeleteDocuments(DocumentsDeletionAggregator),
    AggregateUpdateDocuments(DocumentsAggregator),
+    AggregateGetFetchDocuments(DocumentsFetchAggregator),
+    AggregatePostFetchDocuments(DocumentsFetchAggregator),
    AggregateTasks(TasksAggregator),
    AggregateHealth(HealthAggregator),
 }
@ -139,6 +143,8 @@ impl SegmentAnalytics {
            add_documents_aggregator: DocumentsAggregator::default(),
            delete_documents_aggregator: DocumentsDeletionAggregator::default(),
            update_documents_aggregator: DocumentsAggregator::default(),
+            get_fetch_documents_aggregator: DocumentsFetchAggregator::default(),
+            post_fetch_documents_aggregator: DocumentsFetchAggregator::default(),
            get_tasks_aggregator: TasksAggregator::default(),
            health_aggregator: HealthAggregator::default(),
        });
@ -205,6 +211,16 @@ impl super::Analytics for SegmentAnalytics {
        let _ = self.sender.try_send(AnalyticsMsg::AggregateUpdateDocuments(aggregate));
    }

+    fn get_fetch_documents(&self, documents_query: &DocumentFetchKind, request: &HttpRequest) {
+        let aggregate = DocumentsFetchAggregator::from_query(documents_query, request);
+        let _ = self.sender.try_send(AnalyticsMsg::AggregateGetFetchDocuments(aggregate));
+    }
+
+    fn post_fetch_documents(&self, documents_query: &DocumentFetchKind, request: &HttpRequest) {
+        let aggregate = DocumentsFetchAggregator::from_query(documents_query, request);
+        let _ = self.sender.try_send(AnalyticsMsg::AggregatePostFetchDocuments(aggregate));
+    }
+
    fn get_tasks(&self, query: &TasksFilterQuery, request: &HttpRequest) {
        let aggregate = TasksAggregator::from_query(query, request);
        let _ = self.sender.try_send(AnalyticsMsg::AggregateTasks(aggregate));
@ -225,6 +241,7 @@ impl super::Analytics for SegmentAnalytics {
 struct Infos {
    env: String,
    experimental_enable_metrics: bool,
+    experimental_reduce_indexing_memory_usage: bool,
    db_path: bool,
    import_dump: bool,
    dump_dir: bool,
@ -258,6 +275,7 @@ impl From<Opt> for Infos {
        let Opt {
            db_path,
            experimental_enable_metrics,
+            experimental_reduce_indexing_memory_usage,
            http_addr,
            master_key: _,
            env,
@ -300,6 +318,7 @@ impl From<Opt> for Infos {
        Self {
            env,
            experimental_enable_metrics,
+            experimental_reduce_indexing_memory_usage,
            db_path: db_path != PathBuf::from("./data.ms"),
            import_dump: import_dump.is_some(),
            dump_dir: dump_dir != PathBuf::from("dumps/"),
@ -338,6 +357,8 @@ pub struct Segment {
    add_documents_aggregator: DocumentsAggregator,
    delete_documents_aggregator: DocumentsDeletionAggregator,
    update_documents_aggregator: DocumentsAggregator,
+    get_fetch_documents_aggregator: DocumentsFetchAggregator,
+    post_fetch_documents_aggregator: DocumentsFetchAggregator,
    get_tasks_aggregator: TasksAggregator,
    health_aggregator: HealthAggregator,
 }
@ -400,6 +421,8 @@ impl Segment {
                        Some(AnalyticsMsg::AggregateAddDocuments(agreg)) => self.add_documents_aggregator.aggregate(agreg),
                        Some(AnalyticsMsg::AggregateDeleteDocuments(agreg)) => self.delete_documents_aggregator.aggregate(agreg),
                        Some(AnalyticsMsg::AggregateUpdateDocuments(agreg)) => self.update_documents_aggregator.aggregate(agreg),
+                        Some(AnalyticsMsg::AggregateGetFetchDocuments(agreg)) => self.get_fetch_documents_aggregator.aggregate(agreg),
+                        Some(AnalyticsMsg::AggregatePostFetchDocuments(agreg)) => self.post_fetch_documents_aggregator.aggregate(agreg),
                        Some(AnalyticsMsg::AggregateTasks(agreg)) => self.get_tasks_aggregator.aggregate(agreg),
                        Some(AnalyticsMsg::AggregateHealth(agreg)) => self.health_aggregator.aggregate(agreg),
                        None => (),
@ -450,6 +473,10 @@ impl Segment {
            .into_event(&self.user, "Documents Deleted");
        let update_documents = std::mem::take(&mut self.update_documents_aggregator)
            .into_event(&self.user, "Documents Updated");
+        let get_fetch_documents = std::mem::take(&mut self.get_fetch_documents_aggregator)
+            .into_event(&self.user, "Documents Fetched GET");
+        let post_fetch_documents = std::mem::take(&mut self.post_fetch_documents_aggregator)
+            .into_event(&self.user, "Documents Fetched POST");
        let get_tasks =
            std::mem::take(&mut self.get_tasks_aggregator).into_event(&self.user, "Tasks Seen");
        let health =
@ -473,6 +500,12 @@ impl Segment {
        if let Some(update_documents) = update_documents {
            let _ = self.batcher.push(update_documents).await;
        }
+        if let Some(get_fetch_documents) = get_fetch_documents {
+            let _ = self.batcher.push(get_fetch_documents).await;
+        }
+        if let Some(post_fetch_documents) = post_fetch_documents {
+            let _ = self.batcher.push(post_fetch_documents).await;
+        }
        if let Some(get_tasks) = get_tasks {
            let _ = self.batcher.push(get_tasks).await;
        }
@ -949,6 +982,7 @@ pub struct DocumentsDeletionAggregator {
    per_document_id: bool,
    clear_all: bool,
    per_batch: bool,
+    per_filter: bool,
 }

 impl DocumentsDeletionAggregator {
@ -962,6 +996,7 @@ impl DocumentsDeletionAggregator {
            DocumentDeletionKind::PerDocumentId => ret.per_document_id = true,
            DocumentDeletionKind::ClearAll => ret.clear_all = true,
            DocumentDeletionKind::PerBatch => ret.per_batch = true,
+            DocumentDeletionKind::PerFilter => ret.per_filter = true,
        }

        ret
@ -981,6 +1016,7 @@ impl DocumentsDeletionAggregator {
        self.per_document_id |= other.per_document_id;
        self.clear_all |= other.clear_all;
        self.per_batch |= other.per_batch;
+        self.per_filter |= other.per_filter;
    }

    pub fn into_event(self, user: &User, event_name: &str) -> Option<Track> {
@ -1132,3 +1168,76 @@ impl HealthAggregator {
        })
    }
 }
+
+#[derive(Default, Serialize)]
+pub struct DocumentsFetchAggregator {
+    #[serde(skip)]
+    timestamp: Option<OffsetDateTime>,
+
+    // context
+    #[serde(rename = "user-agent")]
+    user_agents: HashSet<String>,
+
+    #[serde(rename = "requests.max_limit")]
+    total_received: usize,
+
+    // a call on ../documents/:doc_id
+    per_document_id: bool,
+    // if a filter was used
+    per_filter: bool,
+
+    // pagination
+    #[serde(rename = "pagination.max_limit")]
+    max_limit: usize,
+    #[serde(rename = "pagination.max_offset")]
+    max_offset: usize,
+}
+
+impl DocumentsFetchAggregator {
+    pub fn from_query(query: &DocumentFetchKind, request: &HttpRequest) -> Self {
+        let (limit, offset) = match query {
+            DocumentFetchKind::PerDocumentId => (1, 0),
+            DocumentFetchKind::Normal { limit, offset, .. } => (*limit, *offset),
+        };
+        Self {
+            timestamp: Some(OffsetDateTime::now_utc()),
+            user_agents: extract_user_agents(request).into_iter().collect(),
+            total_received: 1,
+            per_document_id: matches!(query, DocumentFetchKind::PerDocumentId),
+            per_filter: matches!(query, DocumentFetchKind::Normal { with_filter, .. } if *with_filter),
+            max_limit: limit,
+            max_offset: offset,
+        }
+    }
+
+    /// Aggregate one [DocumentsFetchAggregator] into another.
+    pub fn aggregate(&mut self, other: Self) {
+        if self.timestamp.is_none() {
+            self.timestamp = other.timestamp;
+        }
+        for user_agent in other.user_agents {
+            self.user_agents.insert(user_agent);
+        }
+
+        self.total_received = self.total_received.saturating_add(other.total_received);
+        self.per_document_id |= other.per_document_id;
+        self.per_filter |= other.per_filter;
+
+        self.max_limit = self.max_limit.max(other.max_limit);
+        self.max_offset = self.max_offset.max(other.max_offset);
+    }
+
+    pub fn into_event(self, user: &User, event_name: &str) -> Option<Track> {
+        // if we had no timestamp it means we never encountered any events and
+        // thus we don't need to send this event.
+        let timestamp = self.timestamp?;
+
+        Some(Track {
+            timestamp: Some(timestamp),
+            user: user.clone(),
+            event: event_name.to_string(),
+            properties: serde_json::to_value(self).ok()?,
+            ..Default::default()
+        })
+    }
+}
--- a/meilisearch/src/error.rs
+++ b/meilisearch/src/error.rs
@ -1,5 +1,6 @@
 use actix_web as aweb;
 use aweb::error::{JsonPayloadError, QueryPayloadError};
+use byte_unit::Byte;
 use meilisearch_types::document_formats::{DocumentFormatError, PayloadType};
 use meilisearch_types::error::{Code, ErrorCode, ResponseError};
 use meilisearch_types::index_uid::{IndexUid, IndexUidFormatError};
@ -20,12 +21,14 @@ pub enum MeilisearchHttpError {
    InvalidContentType(String, Vec<String>),
    #[error("Document `{0}` not found.")]
    DocumentNotFound(String),
+    #[error("Sending an empty filter is forbidden.")]
+    EmptyFilter,
    #[error("Invalid syntax for the filter parameter: `expected {}, found: {1}`.", .0.join(", "))]
    InvalidExpression(&'static [&'static str], Value),
    #[error("A {0} payload is missing.")]
    MissingPayload(PayloadType),
-    #[error("The provided payload reached the size limit.")]
-    PayloadTooLarge,
+    #[error("The provided payload reached the size limit. The maximum accepted payload size is {}.",  Byte::from_bytes(*.0 as u64).get_appropriate_unit(true))]
+    PayloadTooLarge(usize),
    #[error("Two indexes must be given for each swap. The list `[{}]` contains {} indexes.",
        .0.iter().map(|uid| format!("\"{uid}\"")).collect::<Vec<_>>().join(", "), .0.len()
    )]
@ -58,8 +61,9 @@ impl ErrorCode for MeilisearchHttpError {
            MeilisearchHttpError::MissingPayload(_) => Code::MissingPayload,
            MeilisearchHttpError::InvalidContentType(_, _) => Code::InvalidContentType,
            MeilisearchHttpError::DocumentNotFound(_) => Code::DocumentNotFound,
+            MeilisearchHttpError::EmptyFilter => Code::InvalidDocumentFilter,
            MeilisearchHttpError::InvalidExpression(_, _) => Code::InvalidSearchFilter,
-            MeilisearchHttpError::PayloadTooLarge => Code::PayloadTooLarge,
+            MeilisearchHttpError::PayloadTooLarge(_) => Code::PayloadTooLarge,
            MeilisearchHttpError::SwapIndexPayloadWrongLength(_) => Code::InvalidSwapIndexes,
            MeilisearchHttpError::IndexUid(e) => e.error_code(),
            MeilisearchHttpError::SerdeJson(_) => Code::Internal,
--- a/meilisearch/src/extractors/payload.rs
+++ b/meilisearch/src/extractors/payload.rs
@ -11,6 +11,7 @@ use crate::error::MeilisearchHttpError;
 pub struct Payload {
    payload: Decompress<dev::Payload>,
    limit: usize,
+    remaining: usize,
 }

 pub struct PayloadConfig {
@ -43,6 +44,7 @@ impl FromRequest for Payload {
        ready(Ok(Payload {
            payload: Decompress::from_headers(payload.take(), req.headers()),
            limit,
+            remaining: limit,
        }))
    }
 }
@ -54,12 +56,14 @@ impl Stream for Payload {
    fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
        match Pin::new(&mut self.payload).poll_next(cx) {
            Poll::Ready(Some(result)) => match result {
-                Ok(bytes) => match self.limit.checked_sub(bytes.len()) {
+                Ok(bytes) => match self.remaining.checked_sub(bytes.len()) {
                    Some(new_limit) => {
-                        self.limit = new_limit;
+                        self.remaining = new_limit;
                        Poll::Ready(Some(Ok(bytes)))
                    }
-                    None => Poll::Ready(Some(Err(MeilisearchHttpError::PayloadTooLarge))),
+                    None => {
+                        Poll::Ready(Some(Err(MeilisearchHttpError::PayloadTooLarge(self.limit))))
+                    }
                },
                x => Poll::Ready(Some(x.map_err(MeilisearchHttpError::from))),
            },
--- a/meilisearch/src/lib.rs
+++ b/meilisearch/src/lib.rs
@ -232,6 +232,7 @@ fn open_or_create_database_unchecked(
            dumps_path: opt.dump_dir.clone(),
            task_db_size: opt.max_task_db_size.get_bytes() as usize,
            index_base_map_size: opt.max_index_size.get_bytes() as usize,
+            enable_mdb_writemap: opt.experimental_reduce_indexing_memory_usage,
            indexer_config: (&opt.indexer_options).try_into()?,
            autobatching_enabled: true,
            max_number_of_tasks: 1_000_000,
--- a/meilisearch/src/main.rs
+++ b/meilisearch/src/main.rs
@ -29,6 +29,11 @@ fn setup(opt: &Opt) -> anyhow::Result<()> {
 async fn main() -> anyhow::Result<()> {
    let (opt, config_read_from) = Opt::try_build()?;

+    anyhow::ensure!(
+        !(cfg!(windows) && opt.experimental_reduce_indexing_memory_usage),
+        "The `experimental-reduce-indexing-memory-usage` flag is not supported on Windows"
+    );
+
    setup(&opt)?;

    match (opt.env.as_ref(), &opt.master_key) {
@ -181,9 +186,9 @@ Anonymous telemetry:\t\"Enabled\""
    }

    eprintln!();
-    eprintln!("Documentation:\t\thttps://docs.meilisearch.com");
+    eprintln!("Documentation:\t\thttps://www.meilisearch.com/docs");
    eprintln!("Source code:\t\thttps://github.com/meilisearch/meilisearch");
-    eprintln!("Contact:\t\thttps://docs.meilisearch.com/resources/contact.html");
+    eprintln!("Discord:\t\thttps://discord.meilisearch.com");
    eprintln!();
 }

--- a/meilisearch/src/option.rs
+++ b/meilisearch/src/option.rs
@ -48,6 +48,8 @@ const MEILI_IGNORE_DUMP_IF_DB_EXISTS: &str = "MEILI_IGNORE_DUMP_IF_DB_EXISTS";
 const MEILI_DUMP_DIR: &str = "MEILI_DUMP_DIR";
 const MEILI_LOG_LEVEL: &str = "MEILI_LOG_LEVEL";
 const MEILI_EXPERIMENTAL_ENABLE_METRICS: &str = "MEILI_EXPERIMENTAL_ENABLE_METRICS";
+const MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE: &str =
+    "MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE";

 const DEFAULT_CONFIG_FILE_PATH: &str = "./config.toml";
 const DEFAULT_DB_PATH: &str = "./data.ms";
@ -293,6 +295,11 @@ pub struct Opt {
    #[serde(default)]
    pub experimental_enable_metrics: bool,

+    /// Experimental RAM reduction during indexing, do not use in production, see: <https://github.com/meilisearch/product/discussions/652>
+    #[clap(long, env = MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE)]
+    #[serde(default)]
+    pub experimental_reduce_indexing_memory_usage: bool,
+
    #[serde(flatten)]
    #[clap(flatten)]
    pub indexer_options: IndexerOpts,
@ -385,6 +392,7 @@ impl Opt {
            #[cfg(all(not(debug_assertions), feature = "analytics"))]
            no_analytics,
            experimental_enable_metrics: enable_metrics_route,
+            experimental_reduce_indexing_memory_usage: reduce_indexing_memory_usage,
        } = self;
        export_to_env_if_not_present(MEILI_DB_PATH, db_path);
        export_to_env_if_not_present(MEILI_HTTP_ADDR, http_addr);
@ -426,6 +434,10 @@ impl Opt {
            MEILI_EXPERIMENTAL_ENABLE_METRICS,
            enable_metrics_route.to_string(),
        );
+        export_to_env_if_not_present(
+            MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE,
+            reduce_indexing_memory_usage.to_string(),
+        );
        indexer_options.export_to_env();
    }

--- a/meilisearch/src/routes/indexes/documents.rs
+++ b/meilisearch/src/routes/indexes/documents.rs
@ -4,19 +4,20 @@ use actix_web::http::header::CONTENT_TYPE;
 use actix_web::web::Data;
 use actix_web::{web, HttpMessage, HttpRequest, HttpResponse};
 use bstr::ByteSlice;
-use deserr::actix_web::AwebQueryParameter;
+use deserr::actix_web::{AwebJson, AwebQueryParameter};
 use deserr::Deserr;
 use futures::StreamExt;
 use index_scheduler::IndexScheduler;
 use log::debug;
 use meilisearch_types::deserr::query_params::Param;
-use meilisearch_types::deserr::DeserrQueryParamError;
+use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError};
 use meilisearch_types::document_formats::{read_csv, read_json, read_ndjson, PayloadType};
 use meilisearch_types::error::deserr_codes::*;
 use meilisearch_types::error::{Code, ResponseError};
 use meilisearch_types::heed::RoTxn;
 use meilisearch_types::index_uid::IndexUid;
 use meilisearch_types::milli::update::IndexDocumentsMethod;
+use meilisearch_types::milli::DocumentId;
 use meilisearch_types::star_or::OptionStarOrList;
 use meilisearch_types::tasks::KindWithContent;
 use meilisearch_types::{milli, Document, Index};
@ -28,7 +29,7 @@ use tempfile::tempfile;
 use tokio::fs::File;
 use tokio::io::{AsyncSeekExt, AsyncWriteExt, BufWriter};

-use crate::analytics::{Analytics, DocumentDeletionKind};
+use crate::analytics::{Analytics, DocumentDeletionKind, DocumentFetchKind};
 use crate::error::MeilisearchHttpError;
 use crate::error::PayloadError::ReceivePayload;
 use crate::extractors::authentication::policies::*;
@ -36,6 +37,7 @@ use crate::extractors::authentication::GuardedData;
 use crate::extractors::payload::Payload;
 use crate::extractors::sequential_extractor::SeqHandler;
 use crate::routes::{PaginationView, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT};
+use crate::search::parse_filter;

 static ACCEPTED_CONTENT_TYPE: Lazy<Vec<String>> = Lazy::new(|| {
    vec!["application/json".to_string(), "application/x-ndjson".to_string(), "text/csv".to_string()]
@ -66,13 +68,17 @@ pub struct DocumentParam {
 pub fn configure(cfg: &mut web::ServiceConfig) {
    cfg.service(
        web::resource("")
-            .route(web::get().to(SeqHandler(get_all_documents)))
+            .route(web::get().to(SeqHandler(get_documents)))
            .route(web::post().to(SeqHandler(replace_documents)))
            .route(web::put().to(SeqHandler(update_documents)))
            .route(web::delete().to(SeqHandler(clear_all_documents))),
    )
-    // this route needs to be before the /documents/{document_id} to match properly
-    .service(web::resource("/delete-batch").route(web::post().to(SeqHandler(delete_documents))))
+    // these routes need to be before the /documents/{document_id} to match properly
+    .service(
+        web::resource("/delete-batch").route(web::post().to(SeqHandler(delete_documents_batch))),
+    )
+    .service(web::resource("/delete").route(web::post().to(SeqHandler(delete_documents_by_filter))))
+    .service(web::resource("/fetch").route(web::post().to(SeqHandler(documents_by_query_post))))
    .service(
        web::resource("/{document_id}")
            .route(web::get().to(SeqHandler(get_document)))
@ -91,10 +97,14 @@ pub async fn get_document(
    index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_GET }>, Data<IndexScheduler>>,
    document_param: web::Path<DocumentParam>,
    params: AwebQueryParameter<GetDocument, DeserrQueryParamError>,
+    req: HttpRequest,
+    analytics: web::Data<dyn Analytics>,
 ) -> Result<HttpResponse, ResponseError> {
    let DocumentParam { index_uid, document_id } = document_param.into_inner();
    let index_uid = IndexUid::try_from(index_uid)?;

+    analytics.get_fetch_documents(&DocumentFetchKind::PerDocumentId, &req);
+
    let GetDocument { fields } = params.into_inner();
    let attributes_to_retrieve = fields.merge_star_and_none();

@ -127,29 +137,103 @@ pub async fn delete_document(

 #[derive(Debug, Deserr)]
 #[deserr(error = DeserrQueryParamError, rename_all = camelCase, deny_unknown_fields)]
-pub struct BrowseQuery {
+pub struct BrowseQueryGet {
    #[deserr(default, error = DeserrQueryParamError<InvalidDocumentOffset>)]
    offset: Param<usize>,
    #[deserr(default = Param(PAGINATION_DEFAULT_LIMIT), error = DeserrQueryParamError<InvalidDocumentLimit>)]
    limit: Param<usize>,
    #[deserr(default, error = DeserrQueryParamError<InvalidDocumentFields>)]
    fields: OptionStarOrList<String>,
+    #[deserr(default, error = DeserrQueryParamError<InvalidDocumentFilter>)]
+    filter: Option<String>,
 }

-pub async fn get_all_documents(
+#[derive(Debug, Deserr)]
+#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
+pub struct BrowseQuery {
+    #[deserr(default, error = DeserrJsonError<InvalidDocumentOffset>)]
+    offset: usize,
+    #[deserr(default = PAGINATION_DEFAULT_LIMIT, error = DeserrJsonError<InvalidDocumentLimit>)]
+    limit: usize,
+    #[deserr(default, error = DeserrJsonError<InvalidDocumentFields>)]
+    fields: Option<Vec<String>>,
+    #[deserr(default, error = DeserrJsonError<InvalidDocumentFilter>)]
+    filter: Option<Value>,
+}
+
+pub async fn documents_by_query_post(
    index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_GET }>, Data<IndexScheduler>>,
    index_uid: web::Path<String>,
-    params: AwebQueryParameter<BrowseQuery, DeserrQueryParamError>,
+    body: AwebJson<BrowseQuery, DeserrJsonError>,
+    req: HttpRequest,
+    analytics: web::Data<dyn Analytics>,
+) -> Result<HttpResponse, ResponseError> {
+    debug!("called with body: {:?}", body);
+
+    let body = body.into_inner();
+
+    analytics.post_fetch_documents(
+        &DocumentFetchKind::Normal {
+            with_filter: body.filter.is_some(),
+            limit: body.limit,
+            offset: body.offset,
+        },
+        &req,
+    );
+
+    documents_by_query(&index_scheduler, index_uid, body)
+}
+
+pub async fn get_documents(
+    index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_GET }>, Data<IndexScheduler>>,
+    index_uid: web::Path<String>,
+    params: AwebQueryParameter<BrowseQueryGet, DeserrQueryParamError>,
+    req: HttpRequest,
+    analytics: web::Data<dyn Analytics>,
+) -> Result<HttpResponse, ResponseError> {
+    debug!("called with params: {:?}", params);
+
+    let BrowseQueryGet { limit, offset, fields, filter } = params.into_inner();
+
+    let filter = match filter {
+        Some(f) => match serde_json::from_str(&f) {
+            Ok(v) => Some(v),
+            _ => Some(Value::String(f)),
+        },
+        None => None,
+    };
+
+    let query = BrowseQuery {
+        offset: offset.0,
+        limit: limit.0,
+        fields: fields.merge_star_and_none(),
+        filter,
+    };
+
+    analytics.get_fetch_documents(
+        &DocumentFetchKind::Normal {
+            with_filter: query.filter.is_some(),
+            limit: query.limit,
+            offset: query.offset,
+        },
+        &req,
+    );
+
+    documents_by_query(&index_scheduler, index_uid, query)
+}
+
+fn documents_by_query(
+    index_scheduler: &IndexScheduler,
+    index_uid: web::Path<String>,
+    query: BrowseQuery,
 ) -> Result<HttpResponse, ResponseError> {
    let index_uid = IndexUid::try_from(index_uid.into_inner())?;
-    debug!("called with params: {:?}", params);
-    let BrowseQuery { limit, offset, fields } = params.into_inner();
-    let attributes_to_retrieve = fields.merge_star_and_none();
+    let BrowseQuery { offset, limit, fields, filter } = query;

    let index = index_scheduler.index(&index_uid)?;
-    let (total, documents) = retrieve_documents(&index, offset.0, limit.0, attributes_to_retrieve)?;
+    let (total, documents) = retrieve_documents(&index, offset, limit, filter, fields)?;

-    let ret = PaginationView::new(offset.0, limit.0, total as usize, documents);
+    let ret = PaginationView::new(offset, limit, total as usize, documents);

    debug!("returns: {:?}", ret);
    Ok(HttpResponse::Ok().json(ret))
@ -373,7 +457,7 @@ async fn document_addition(
    Ok(task.into())
 }

-pub async fn delete_documents(
+pub async fn delete_documents_batch(
    index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_DELETE }>, Data<IndexScheduler>>,
    index_uid: web::Path<String>,
    body: web::Json<Vec<Value>>,
@ -399,6 +483,42 @@ pub async fn delete_documents(
    Ok(HttpResponse::Accepted().json(task))
 }

+#[derive(Debug, Deserr)]
+#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
+pub struct DocumentDeletionByFilter {
+    #[deserr(error = DeserrJsonError<InvalidDocumentFilter>, missing_field_error = DeserrJsonError::missing_document_filter)]
+    filter: Value,
+}
+
+pub async fn delete_documents_by_filter(
+    index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_DELETE }>, Data<IndexScheduler>>,
+    index_uid: web::Path<String>,
+    body: AwebJson<DocumentDeletionByFilter, DeserrJsonError>,
+    req: HttpRequest,
+    analytics: web::Data<dyn Analytics>,
+) -> Result<HttpResponse, ResponseError> {
+    debug!("called with params: {:?}", body);
+    let index_uid = IndexUid::try_from(index_uid.into_inner())?;
+    let index_uid = index_uid.into_inner();
+    let filter = body.into_inner().filter;
+
+    analytics.delete_documents(DocumentDeletionKind::PerFilter, &req);
+
+    // we ensure the filter is well formed before enqueuing it
+    || -> Result<_, ResponseError> {
+        Ok(crate::search::parse_filter(&filter)?.ok_or(MeilisearchHttpError::EmptyFilter)?)
+    }()
+    // and whatever was the error, the error code should always be an InvalidDocumentFilter
+    .map_err(|err| ResponseError::from_msg(err.message, Code::InvalidDocumentFilter))?;
+    let task = KindWithContent::DocumentDeletionByFilter { index_uid, filter_expr: filter };
+
+    let task: SummarizedTaskView =
+        tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
+
+    debug!("returns: {:?}", task);
+    Ok(HttpResponse::Accepted().json(task))
+}
+
 pub async fn clear_all_documents(
    index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_DELETE }>, Data<IndexScheduler>>,
    index_uid: web::Path<String>,
@ -416,14 +536,15 @@ pub async fn clear_all_documents(
    Ok(HttpResponse::Accepted().json(task))
 }

-fn all_documents<'a>(
-    index: &Index,
-    rtxn: &'a RoTxn,
+fn some_documents<'a, 't: 'a>(
+    index: &'a Index,
+    rtxn: &'t RoTxn,
+    doc_ids: impl IntoIterator<Item = DocumentId> + 'a,
 ) -> Result<impl Iterator<Item = Result<Document, ResponseError>> + 'a, ResponseError> {
    let fields_ids_map = index.fields_ids_map(rtxn)?;
    let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();

-    Ok(index.all_documents(rtxn)?.map(move |ret| {
+    Ok(index.iter_documents(rtxn, doc_ids)?.map(move |ret| {
        ret.map_err(ResponseError::from).and_then(|(_key, document)| -> Result<_, ResponseError> {
            Ok(milli::obkv_to_json(&all_fields, &fields_ids_map, document)?)
        })
@ -434,24 +555,50 @@ fn retrieve_documents<S: AsRef<str>>(
    index: &Index,
    offset: usize,
    limit: usize,
+    filter: Option<Value>,
    attributes_to_retrieve: Option<Vec<S>>,
 ) -> Result<(u64, Vec<Document>), ResponseError> {
    let rtxn = index.read_txn()?;
+    let filter = &filter;
+    let filter = if let Some(filter) = filter {
+        parse_filter(filter)
+            .map_err(|err| ResponseError::from_msg(err.to_string(), Code::InvalidDocumentFilter))?
+    } else {
+        None
+    };

-    let mut documents = Vec::new();
-    for document in all_documents(index, &rtxn)?.skip(offset).take(limit) {
-        let document = match &attributes_to_retrieve {
-            Some(attributes_to_retrieve) => permissive_json_pointer::select_values(
-                &document?,
-                attributes_to_retrieve.iter().map(|s| s.as_ref()),
-            ),
-            None => document?,
-        };
-        documents.push(document);
-    }
+    let candidates = if let Some(filter) = filter {
+        filter.evaluate(&rtxn, index).map_err(|err| match err {
+            milli::Error::UserError(milli::UserError::InvalidFilter(_)) => {
+                ResponseError::from_msg(err.to_string(), Code::InvalidDocumentFilter)
+            }
+            e => e.into(),
+        })?
+    } else {
+        index.documents_ids(&rtxn)?
+    };

-    let number_of_documents = index.number_of_documents(&rtxn)?;
-    Ok((number_of_documents, documents))
+    let (it, number_of_documents) = {
+        let number_of_documents = candidates.len();
+        (
+            some_documents(index, &rtxn, candidates.into_iter().skip(offset).take(limit))?,
+            number_of_documents,
+        )
+    };
+
+    let documents: Result<Vec<_>, ResponseError> = it
+        .map(|document| {
+            Ok(match &attributes_to_retrieve {
+                Some(attributes_to_retrieve) => permissive_json_pointer::select_values(
+                    &document?,
+                    attributes_to_retrieve.iter().map(|s| s.as_ref()),
+                ),
+                None => document?,
+            })
+        })
+        .collect();
+
+    Ok((number_of_documents, documents?))
 }

 fn retrieve_document<S: AsRef<str>>(
--- a/meilisearch/src/routes/tasks.rs
+++ b/meilisearch/src/routes/tasks.rs
@ -99,7 +99,7 @@ pub struct DetailsView {
    #[serde(skip_serializing_if = "Option::is_none")]
    pub deleted_tasks: Option<Option<u64>>,
    #[serde(skip_serializing_if = "Option::is_none")]
-    pub original_filter: Option<String>,
+    pub original_filter: Option<Option<String>>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub dump_uid: Option<Option<String>>,
    #[serde(skip_serializing_if = "Option::is_none")]
@ -131,8 +131,17 @@ impl From<Details> for DetailsView {
            } => DetailsView {
                provided_ids: Some(received_document_ids),
                deleted_documents: Some(deleted_documents),
+                original_filter: Some(None),
                ..DetailsView::default()
            },
+            Details::DocumentDeletionByFilter { original_filter, deleted_documents } => {
+                DetailsView {
+                    provided_ids: Some(0),
+                    original_filter: Some(Some(original_filter)),
+                    deleted_documents: Some(deleted_documents),
+                    ..DetailsView::default()
+                }
+            }
            Details::ClearAll { deleted_documents } => {
                DetailsView { deleted_documents: Some(deleted_documents), ..DetailsView::default() }
            }
@ -140,7 +149,7 @@ impl From<Details> for DetailsView {
                DetailsView {
                    matched_tasks: Some(matched_tasks),
                    canceled_tasks: Some(canceled_tasks),
-                    original_filter: Some(original_filter),
+                    original_filter: Some(Some(original_filter)),
                    ..DetailsView::default()
                }
            }
@ -148,7 +157,7 @@ impl From<Details> for DetailsView {
                DetailsView {
                    matched_tasks: Some(matched_tasks),
                    deleted_tasks: Some(deleted_tasks),
-                    original_filter: Some(original_filter),
+                    original_filter: Some(Some(original_filter)),
                    ..DetailsView::default()
                }
            }
@ -721,7 +730,7 @@ mod tests {
            let err = deserr_query_params::<TaskDeletionOrCancelationQuery>(params).unwrap_err();
            snapshot!(meili_snap::json_string!(err), @r###"
            {
-              "message": "Invalid value in parameter `types`: `createIndex` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`.",
+              "message": "Invalid value in parameter `types`: `createIndex` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentDeletion`, `documentDeletionByFilter`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`.",
              "code": "invalid_task_types",
              "type": "invalid_request",
              "link": "https://docs.meilisearch.com/errors#invalid_task_types"
--- a/meilisearch/src/search.rs
+++ b/meilisearch/src/search.rs
@ -745,7 +745,7 @@ fn format_value<A: AsRef<[u8]>>(
    }
 }

-fn parse_filter(facets: &Value) -> Result<Option<Filter>, MeilisearchHttpError> {
+pub(crate) fn parse_filter(facets: &Value) -> Result<Option<Filter>, MeilisearchHttpError> {
    match facets {
        Value::String(expr) => {
            let condition = Filter::from_str(expr)?;
--- a/meilisearch/tests/auth/authorization.rs
+++ b/meilisearch/tests/auth/authorization.rs
@ -16,8 +16,11 @@ pub static AUTHORIZATIONS: Lazy<HashMap<(&'static str, &'static str), HashSet<&'
            ("GET",     "/indexes/products/search") =>                         hashset!{"search", "*"},
            ("POST",    "/indexes/products/documents") =>                      hashset!{"documents.add", "documents.*", "*"},
            ("GET",     "/indexes/products/documents") =>                      hashset!{"documents.get", "documents.*", "*"},
+            ("POST",    "/indexes/products/documents/fetch") =>                hashset!{"documents.get", "documents.*", "*"},
            ("GET",     "/indexes/products/documents/0") =>                    hashset!{"documents.get", "documents.*", "*"},
            ("DELETE",  "/indexes/products/documents/0") =>                    hashset!{"documents.delete", "documents.*", "*"},
+            ("POST",    "/indexes/products/documents/delete-batch") =>         hashset!{"documents.delete", "documents.*", "*"},
+            ("POST",    "/indexes/products/documents/delete") =>               hashset!{"documents.delete", "documents.*", "*"},
            ("GET",     "/tasks") =>                                           hashset!{"tasks.get", "tasks.*", "*"},
            ("DELETE",  "/tasks") =>                                           hashset!{"tasks.delete", "tasks.*", "*"},
            ("GET",     "/tasks?indexUid=products") =>                         hashset!{"tasks.get", "tasks.*", "*"},
--- a/meilisearch/tests/common/index.rs
+++ b/meilisearch/tests/common/index.rs
@ -198,6 +198,11 @@ impl Index<'_> {
        self.service.get(url).await
    }

+    pub async fn get_document_by_filter(&self, payload: Value) -> (Value, StatusCode) {
+        let url = format!("/indexes/{}/documents/fetch", urlencode(self.uid.as_ref()));
+        self.service.post(url, payload).await
+    }
+
    pub async fn get_all_documents_raw(&self, options: &str) -> (Value, StatusCode) {
        let url = format!("/indexes/{}/documents{}", urlencode(self.uid.as_ref()), options);
        self.service.get(url).await
@ -225,6 +230,11 @@ impl Index<'_> {
        self.service.delete(url).await
    }

+    pub async fn delete_document_by_filter(&self, body: Value) -> (Value, StatusCode) {
+        let url = format!("/indexes/{}/documents/delete", urlencode(self.uid.as_ref()));
+        self.service.post_encoded(url, body, self.encoder).await
+    }
+
    pub async fn clear_all_documents(&self) -> (Value, StatusCode) {
        let url = format!("/indexes/{}/documents", urlencode(self.uid.as_ref()));
        self.service.delete(url).await
--- a/meilisearch/tests/documents/add_documents.rs
+++ b/meilisearch/tests/documents/add_documents.rs
@ -1781,7 +1781,7 @@ async fn error_add_documents_payload_size() {
    snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }),
        @r###"
    {
-      "message": "The provided payload reached the size limit.",
+      "message": "The provided payload reached the size limit. The maximum accepted payload size is 10.00 MiB.",
      "code": "payload_too_large",
      "type": "invalid_request",
      "link": "https://docs.meilisearch.com/errors#payload_too_large"
--- a/meilisearch/tests/documents/delete_documents.rs
+++ b/meilisearch/tests/documents/delete_documents.rs
@ -1,3 +1,4 @@
+use meili_snap::{json_string, snapshot};
 use serde_json::json;

 use crate::common::{GetAllDocumentsOptions, Server};
@ -135,3 +136,254 @@ async fn delete_no_document_batch() {
    assert_eq!(code, 200);
    assert_eq!(response["results"].as_array().unwrap().len(), 3);
 }
+
+#[actix_rt::test]
+async fn delete_document_by_filter() {
+    let server = Server::new().await;
+    let index = server.index("doggo");
+    index.update_settings_filterable_attributes(json!(["color"])).await;
+    index
+        .add_documents(
+            json!([
+                { "id": 0, "color": "red" },
+                { "id": 1, "color": "blue" },
+                { "id": 2, "color": "blue" },
+                { "id": 3 },
+            ]),
+            Some("id"),
+        )
+        .await;
+    index.wait_task(1).await;
+    let (response, code) =
+        index.delete_document_by_filter(json!({ "filter": "color = blue"})).await;
+    snapshot!(code, @"202 Accepted");
+    snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
+    {
+      "taskUid": 2,
+      "indexUid": "doggo",
+      "status": "enqueued",
+      "type": "documentDeletion",
+      "enqueuedAt": "[date]"
+    }
+    "###);
+
+    let response = index.wait_task(2).await;
+    snapshot!(json_string!(response, { ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]" }), @r###"
+    {
+      "uid": 2,
+      "indexUid": "doggo",
+      "status": "succeeded",
+      "type": "documentDeletion",
+      "canceledBy": null,
+      "details": {
+        "providedIds": 0,
+        "deletedDocuments": 2,
+        "originalFilter": "\"color = blue\""
+      },
+      "error": null,
+      "duration": "[duration]",
+      "enqueuedAt": "[date]",
+      "startedAt": "[date]",
+      "finishedAt": "[date]"
+    }
+    "###);
+
+    let (documents, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await;
+    snapshot!(code, @"200 OK");
+    snapshot!(json_string!(documents), @r###"
+    {
+      "results": [
+        {
+          "id": 0,
+          "color": "red"
+        },
+        {
+          "id": 3
+        }
+      ],
+      "offset": 0,
+      "limit": 20,
+      "total": 2
+    }
+    "###);
+
+    let (response, code) =
+        index.delete_document_by_filter(json!({ "filter": "color NOT EXISTS"})).await;
+    snapshot!(code, @"202 Accepted");
+    snapshot!(json_string!(response, { ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]" }), @r###"
+    {
+      "taskUid": 3,
+      "indexUid": "doggo",
+      "status": "enqueued",
+      "type": "documentDeletion",
+      "enqueuedAt": "[date]"
+    }
+    "###);
+
+    let response = index.wait_task(3).await;
+    snapshot!(json_string!(response, { ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]" }), @r###"
+    {
+      "uid": 3,
+      "indexUid": "doggo",
+      "status": "succeeded",
+      "type": "documentDeletion",
+      "canceledBy": null,
+      "details": {
+        "providedIds": 0,
+        "deletedDocuments": 1,
+        "originalFilter": "\"color NOT EXISTS\""
+      },
+      "error": null,
+      "duration": "[duration]",
+      "enqueuedAt": "[date]",
+      "startedAt": "[date]",
+      "finishedAt": "[date]"
+    }
+    "###);
+
+    let (documents, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await;
+    snapshot!(code, @"200 OK");
+    snapshot!(json_string!(documents), @r###"
+    {
+      "results": [
+        {
+          "id": 0,
+          "color": "red"
+        }
+      ],
+      "offset": 0,
+      "limit": 20,
+      "total": 1
+    }
+    "###);
+}
+
+#[actix_rt::test]
+async fn delete_document_by_complex_filter() {
+    let server = Server::new().await;
+    let index = server.index("doggo");
+    index.update_settings_filterable_attributes(json!(["color"])).await;
+    index
+        .add_documents(
+            json!([
+                { "id": 0, "color": "red" },
+                { "id": 1, "color": "blue" },
+                { "id": 2, "color": "blue" },
+                { "id": 3, "color": "green" },
+                { "id": 4 },
+            ]),
+            Some("id"),
+        )
+        .await;
+    index.wait_task(1).await;
+    let (response, code) = index
+        .delete_document_by_filter(
+            json!({ "filter": ["color != red", "color != green", "color EXISTS"] }),
+        )
+        .await;
+    snapshot!(code, @"202 Accepted");
+    snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
+    {
+      "taskUid": 2,
+      "indexUid": "doggo",
+      "status": "enqueued",
+      "type": "documentDeletion",
+      "enqueuedAt": "[date]"
+    }
+    "###);
+
+    let response = index.wait_task(2).await;
+    snapshot!(json_string!(response, { ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]" }), @r###"
+    {
+      "uid": 2,
+      "indexUid": "doggo",
+      "status": "succeeded",
+      "type": "documentDeletion",
+      "canceledBy": null,
+      "details": {
+        "providedIds": 0,
+        "deletedDocuments": 2,
+        "originalFilter": "[\"color != red\",\"color != green\",\"color EXISTS\"]"
+      },
+      "error": null,
+      "duration": "[duration]",
+      "enqueuedAt": "[date]",
+      "startedAt": "[date]",
+      "finishedAt": "[date]"
+    }
+    "###);
+
+    let (documents, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await;
+    snapshot!(code, @"200 OK");
+    snapshot!(json_string!(documents), @r###"
+    {
+      "results": [
+        {
+          "id": 0,
+          "color": "red"
+        },
+        {
+          "id": 3,
+          "color": "green"
+        },
+        {
+          "id": 4
+        }
+      ],
+      "offset": 0,
+      "limit": 20,
+      "total": 3
+    }
+    "###);
+
+    let (response, code) = index
+        .delete_document_by_filter(json!({ "filter": [["color = green", "color NOT EXISTS"]] }))
+        .await;
+    snapshot!(code, @"202 Accepted");
+    snapshot!(json_string!(response, { ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]" }), @r###"
+    {
+      "taskUid": 3,
+      "indexUid": "doggo",
+      "status": "enqueued",
+      "type": "documentDeletion",
+      "enqueuedAt": "[date]"
+    }
+    "###);
+
+    let response = index.wait_task(3).await;
+    snapshot!(json_string!(response, { ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]" }), @r###"
+    {
+      "uid": 3,
+      "indexUid": "doggo",
+      "status": "succeeded",
+      "type": "documentDeletion",
+      "canceledBy": null,
+      "details": {
+        "providedIds": 0,
+        "deletedDocuments": 4,
+        "originalFilter": "[[\"color = green\",\"color NOT EXISTS\"]]"
+      },
+      "error": null,
+      "duration": "[duration]",
+      "enqueuedAt": "[date]",
+      "startedAt": "[date]",
+      "finishedAt": "[date]"
+    }
+    "###);
+
+    let (documents, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await;
+    snapshot!(code, @"200 OK");
+    snapshot!(json_string!(documents), @r###"
+    {
+      "results": [
+        {
+          "id": 0,
+          "color": "red"
+        }
+      ],
+      "offset": 0,
+      "limit": 20,
+      "total": 1
+    }
+    "###);
+}
--- a/meilisearch/tests/documents/errors.rs
+++ b/meilisearch/tests/documents/errors.rs
@ -82,6 +82,111 @@ async fn get_all_documents_bad_limit() {
    "###);
 }

+#[actix_rt::test]
+async fn get_all_documents_bad_filter() {
+    let server = Server::new().await;
+    let index = server.index("test");
+
+    // Since the filter can't be parsed automatically by deserr, we have the wrong error message
+    // if the index does not exist: we could expect to get an error message about the invalid filter before
+    // the existence of the index is checked, but it is not the case.
+    let (response, code) = index.get_all_documents_raw("?filter").await;
+    snapshot!(code, @"404 Not Found");
+    snapshot!(json_string!(response), @r###"
+    {
+      "message": "Index `test` not found.",
+      "code": "index_not_found",
+      "type": "invalid_request",
+      "link": "https://docs.meilisearch.com/errors#index_not_found"
+    }
+    "###);
+
+    let (response, code) = index.get_all_documents_raw("?filter=doggo").await;
+    snapshot!(code, @"404 Not Found");
+    snapshot!(json_string!(response), @r###"
+    {
+      "message": "Index `test` not found.",
+      "code": "index_not_found",
+      "type": "invalid_request",
+      "link": "https://docs.meilisearch.com/errors#index_not_found"
+    }
+    "###);
+
+    let (response, code) = index.get_all_documents_raw("?filter=doggo=bernese").await;
+    snapshot!(code, @"404 Not Found");
+    snapshot!(json_string!(response), @r###"
+    {
+      "message": "Index `test` not found.",
+      "code": "index_not_found",
+      "type": "invalid_request",
+      "link": "https://docs.meilisearch.com/errors#index_not_found"
+    }
+    "###);
+
+    let (response, code) = index.create(None).await;
+    snapshot!(code, @"202 Accepted");
+    snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
+    {
+      "taskUid": 0,
+      "indexUid": "test",
+      "status": "enqueued",
+      "type": "indexCreation",
+      "enqueuedAt": "[date]"
+    }
+    "###);
+    let response = server.wait_task(0).await;
+    snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }), @r###"
+    {
+      "uid": 0,
+      "indexUid": "test",
+      "status": "succeeded",
+      "type": "indexCreation",
+      "canceledBy": null,
+      "details": {
+        "primaryKey": null
+      },
+      "error": null,
+      "duration": "[duration]",
+      "enqueuedAt": "[date]",
+      "startedAt": "[date]",
+      "finishedAt": "[date]"
+    }
+    "###);
+
+    let (response, code) = index.get_all_documents_raw("?filter").await;
+    snapshot!(code, @"200 OK");
+    snapshot!(json_string!(response), @r###"
+    {
+      "results": [],
+      "offset": 0,
+      "limit": 20,
+      "total": 0
+    }
+    "###);
+
+    let (response, code) = index.get_all_documents_raw("?filter=doggo").await;
+    snapshot!(code, @"400 Bad Request");
+    snapshot!(json_string!(response), @r###"
+    {
+      "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `doggo`.\n1:6 doggo",
+      "code": "invalid_document_filter",
+      "type": "invalid_request",
+      "link": "https://docs.meilisearch.com/errors#invalid_document_filter"
+    }
+    "###);
+
+    let (response, code) = index.get_all_documents_raw("?filter=doggo=bernese").await;
+    snapshot!(code, @"400 Bad Request");
+    snapshot!(json_string!(response), @r###"
+    {
+      "message": "Attribute `doggo` is not filterable. This index does not have configured filterable attributes.\n1:6 doggo=bernese",
+      "code": "invalid_document_filter",
+      "type": "invalid_request",
+      "link": "https://docs.meilisearch.com/errors#invalid_document_filter"
+    }
+    "###);
+}
+
 #[actix_rt::test]
 async fn delete_documents_batch() {
    let server = Server::new().await;
@ -418,3 +523,264 @@ async fn update_documents_csv_delimiter_with_bad_content_type() {
    }
    "###);
 }
+
+#[actix_rt::test]
+async fn delete_document_by_filter() {
+    let server = Server::new().await;
+    let index = server.index("doggo");
+
+    // send a bad payload type
+    let (response, code) = index.delete_document_by_filter(json!("hello")).await;
+    snapshot!(code, @"400 Bad Request");
+    snapshot!(json_string!(response), @r###"
+    {
+      "message": "Invalid value type: expected an object, but found a string: `\"hello\"`",
+      "code": "bad_request",
+      "type": "invalid_request",
+      "link": "https://docs.meilisearch.com/errors#bad_request"
+    }
+    "###);
+
+    // send bad payload type
+    let (response, code) = index.delete_document_by_filter(json!({ "filter": true })).await;
+    snapshot!(code, @"400 Bad Request");
+    snapshot!(json_string!(response), @r###"
+    {
+      "message": "Invalid syntax for the filter parameter: `expected String, Array, found: true`.",
+      "code": "invalid_document_filter",
+      "type": "invalid_request",
+      "link": "https://docs.meilisearch.com/errors#invalid_document_filter"
+    }
+    "###);
+
+    // send bad filter
+    let (response, code) = index.delete_document_by_filter(json!({ "filter": "hello"})).await;
+    snapshot!(code, @"400 Bad Request");
+    snapshot!(json_string!(response), @r###"
+    {
+      "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `hello`.\n1:6 hello",
+      "code": "invalid_document_filter",
+      "type": "invalid_request",
+      "link": "https://docs.meilisearch.com/errors#invalid_document_filter"
+    }
+    "###);
+
+    // send empty filter
+    let (response, code) = index.delete_document_by_filter(json!({ "filter": ""})).await;
+    snapshot!(code, @"400 Bad Request");
+    snapshot!(json_string!(response), @r###"
+    {
+      "message": "Sending an empty filter is forbidden.",
+      "code": "invalid_document_filter",
+      "type": "invalid_request",
+      "link": "https://docs.meilisearch.com/errors#invalid_document_filter"
+    }
+    "###);
+
+    // do not send any filter
+    let (response, code) = index.delete_document_by_filter(json!({})).await;
+    snapshot!(code, @"400 Bad Request");
+    snapshot!(json_string!(response), @r###"
+    {
+      "message": "Missing field `filter`",
+      "code": "missing_document_filter",
+      "type": "invalid_request",
+      "link": "https://docs.meilisearch.com/errors#missing_document_filter"
+    }
+    "###);
+
+    // index does not exists
+    let (response, code) =
+        index.delete_document_by_filter(json!({ "filter": "doggo = bernese"})).await;
+    snapshot!(code, @"202 Accepted");
+    let response = server.wait_task(response["taskUid"].as_u64().unwrap()).await;
+    snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]"}), @r###"
+    {
+      "uid": 0,
+      "indexUid": "doggo",
+      "status": "failed",
+      "type": "documentDeletion",
+      "canceledBy": null,
+      "details": {
+        "providedIds": 0,
+        "deletedDocuments": 0,
+        "originalFilter": "\"doggo = bernese\""
+      },
+      "error": {
+        "message": "Index `doggo` not found.",
+        "code": "index_not_found",
+        "type": "invalid_request",
+        "link": "https://docs.meilisearch.com/errors#index_not_found"
+      },
+      "duration": "[duration]",
+      "enqueuedAt": "[date]",
+      "startedAt": "[date]",
+      "finishedAt": "[date]"
+    }
+    "###);
+
+    let (response, code) = index.create(None).await;
+    snapshot!(code, @"202 Accepted");
+    server.wait_task(response["taskUid"].as_u64().unwrap()).await;
+
+    // no filterable are set
+    let (response, code) =
+        index.delete_document_by_filter(json!({ "filter": "doggo = bernese"})).await;
+    snapshot!(code, @"202 Accepted");
+    let response = server.wait_task(response["taskUid"].as_u64().unwrap()).await;
+    snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]"}), @r###"
+    {
+      "uid": 2,
+      "indexUid": "doggo",
+      "status": "failed",
+      "type": "documentDeletion",
+      "canceledBy": null,
+      "details": {
+        "providedIds": 0,
+        "deletedDocuments": 0,
+        "originalFilter": "\"doggo = bernese\""
+      },
+      "error": {
+        "message": "Attribute `doggo` is not filterable. This index does not have configured filterable attributes.\n1:6 doggo = bernese",
+        "code": "invalid_document_filter",
+        "type": "invalid_request",
+        "link": "https://docs.meilisearch.com/errors#invalid_document_filter"
+      },
+      "duration": "[duration]",
+      "enqueuedAt": "[date]",
+      "startedAt": "[date]",
+      "finishedAt": "[date]"
+    }
+    "###);
+
+    let (response, code) = index.update_settings_filterable_attributes(json!(["doggo"])).await;
+    snapshot!(code, @"202 Accepted");
+    server.wait_task(response["taskUid"].as_u64().unwrap()).await;
+
+    // not filterable while there is a filterable attribute
+    let (response, code) =
+        index.delete_document_by_filter(json!({ "filter": "catto = jorts"})).await;
+    snapshot!(code, @"202 Accepted");
+    let response = server.wait_task(response["taskUid"].as_u64().unwrap()).await;
+    snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]"}), @r###"
+    {
+      "uid": 4,
+      "indexUid": "doggo",
+      "status": "failed",
+      "type": "documentDeletion",
+      "canceledBy": null,
+      "details": {
+        "providedIds": 0,
+        "deletedDocuments": 0,
+        "originalFilter": "\"catto = jorts\""
+      },
+      "error": {
+        "message": "Attribute `catto` is not filterable. Available filterable attributes are: `doggo`.\n1:6 catto = jorts",
+        "code": "invalid_document_filter",
+        "type": "invalid_request",
+        "link": "https://docs.meilisearch.com/errors#invalid_document_filter"
+      },
+      "duration": "[duration]",
+      "enqueuedAt": "[date]",
+      "startedAt": "[date]",
+      "finishedAt": "[date]"
+    }
+    "###);
+}
+
+#[actix_rt::test]
+async fn fetch_document_by_filter() {
+    let server = Server::new().await;
+    let index = server.index("doggo");
+    index.update_settings_filterable_attributes(json!(["color"])).await;
+    index
+        .add_documents(
+            json!([
+                { "id": 0, "color": "red" },
+                { "id": 1, "color": "blue" },
+                { "id": 2, "color": "blue" },
+                { "id": 3 },
+            ]),
+            Some("id"),
+        )
+        .await;
+    index.wait_task(1).await;
+
+    let (response, code) = index.get_document_by_filter(json!(null)).await;
+    snapshot!(code, @"400 Bad Request");
+    snapshot!(json_string!(response), @r###"
+    {
+      "message": "Invalid value type: expected an object, but found null",
+      "code": "bad_request",
+      "type": "invalid_request",
+      "link": "https://docs.meilisearch.com/errors#bad_request"
+    }
+    "###);
+
+    let (response, code) = index.get_document_by_filter(json!({ "offset": "doggo" })).await;
+    snapshot!(code, @"400 Bad Request");
+    snapshot!(json_string!(response), @r###"
+    {
+      "message": "Invalid value type at `.offset`: expected a positive integer, but found a string: `\"doggo\"`",
+      "code": "invalid_document_offset",
+      "type": "invalid_request",
+      "link": "https://docs.meilisearch.com/errors#invalid_document_offset"
+    }
+    "###);
+
+    let (response, code) = index.get_document_by_filter(json!({ "limit": "doggo" })).await;
+    snapshot!(code, @"400 Bad Request");
+    snapshot!(json_string!(response), @r###"
+    {
+      "message": "Invalid value type at `.limit`: expected a positive integer, but found a string: `\"doggo\"`",
+      "code": "invalid_document_limit",
+      "type": "invalid_request",
+      "link": "https://docs.meilisearch.com/errors#invalid_document_limit"
+    }
+    "###);
+
+    let (response, code) = index.get_document_by_filter(json!({ "fields": "doggo" })).await;
+    snapshot!(code, @"400 Bad Request");
+    snapshot!(json_string!(response), @r###"
+    {
+      "message": "Invalid value type at `.fields`: expected an array, but found a string: `\"doggo\"`",
+      "code": "invalid_document_fields",
+      "type": "invalid_request",
+      "link": "https://docs.meilisearch.com/errors#invalid_document_fields"
+    }
+    "###);
+
+    let (response, code) = index.get_document_by_filter(json!({ "filter": true })).await;
+    snapshot!(code, @"400 Bad Request");
+    snapshot!(json_string!(response), @r###"
+    {
+      "message": "Invalid syntax for the filter parameter: `expected String, Array, found: true`.",
+      "code": "invalid_document_filter",
+      "type": "invalid_request",
+      "link": "https://docs.meilisearch.com/errors#invalid_document_filter"
+    }
+    "###);
+
+    let (response, code) = index.get_document_by_filter(json!({ "filter": "cool doggo" })).await;
+    snapshot!(code, @"400 Bad Request");
+    snapshot!(json_string!(response), @r###"
+    {
+      "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `cool doggo`.\n1:11 cool doggo",
+      "code": "invalid_document_filter",
+      "type": "invalid_request",
+      "link": "https://docs.meilisearch.com/errors#invalid_document_filter"
+    }
+    "###);
+
+    let (response, code) =
+        index.get_document_by_filter(json!({ "filter": "doggo = bernese" })).await;
+    snapshot!(code, @"400 Bad Request");
+    snapshot!(json_string!(response), @r###"
+    {
+      "message": "Attribute `doggo` is not filterable. Available filterable attributes are: `color`.\n1:6 doggo = bernese",
+      "code": "invalid_document_filter",
+      "type": "invalid_request",
+      "link": "https://docs.meilisearch.com/errors#invalid_document_filter"
+    }
+    "###);
+}
--- a/meilisearch/tests/documents/get_documents.rs
+++ b/meilisearch/tests/documents/get_documents.rs
@ -1,5 +1,6 @@
 use actix_web::test;
 use http::header::ACCEPT_ENCODING;
+use meili_snap::*;
 use serde_json::{json, Value};
 use urlencoding::encode as urlencode;

@ -378,3 +379,164 @@ async fn get_documents_displayed_attributes_is_ignored() {
    assert_eq!(response.as_object().unwrap().keys().count(), 16);
    assert!(response.as_object().unwrap().get("gender").is_some());
 }
+
+#[actix_rt::test]
+async fn get_document_by_filter() {
+    let server = Server::new().await;
+    let index = server.index("doggo");
+    index.update_settings_filterable_attributes(json!(["color"])).await;
+    index
+        .add_documents(
+            json!([
+                { "id": 0, "color": "red" },
+                { "id": 1, "color": "blue" },
+                { "id": 2, "color": "blue" },
+                { "id": 3 },
+            ]),
+            Some("id"),
+        )
+        .await;
+    index.wait_task(1).await;
+
+    let (response, code) = index.get_document_by_filter(json!({})).await;
+    let (response2, code2) = index.get_all_documents_raw("").await;
+    snapshot!(code, @"200 OK");
+    snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
+    {
+      "results": [
+        {
+          "id": 0,
+          "color": "red"
+        },
+        {
+          "id": 1,
+          "color": "blue"
+        },
+        {
+          "id": 2,
+          "color": "blue"
+        },
+        {
+          "id": 3
+        }
+      ],
+      "offset": 0,
+      "limit": 20,
+      "total": 4
+    }
+    "###);
+    assert_eq!(code, code2);
+    assert_eq!(response, response2);
+
+    let (response, code) = index.get_document_by_filter(json!({ "filter": "color = blue" })).await;
+    let (response2, code2) = index.get_all_documents_raw("?filter=color=blue").await;
+    snapshot!(code, @"200 OK");
+    snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
+    {
+      "results": [
+        {
+          "id": 1,
+          "color": "blue"
+        },
+        {
+          "id": 2,
+          "color": "blue"
+        }
+      ],
+      "offset": 0,
+      "limit": 20,
+      "total": 2
+    }
+    "###);
+    assert_eq!(code, code2);
+    assert_eq!(response, response2);
+
+    let (response, code) = index
+        .get_document_by_filter(json!({ "offset": 1, "limit": 1, "filter": "color != blue" }))
+        .await;
+    let (response2, code2) =
+        index.get_all_documents_raw("?filter=color!=blue&offset=1&limit=1").await;
+    snapshot!(code, @"200 OK");
+    snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
+    {
+      "results": [
+        {
+          "id": 3
+        }
+      ],
+      "offset": 1,
+      "limit": 1,
+      "total": 2
+    }
+    "###);
+    assert_eq!(code, code2);
+    assert_eq!(response, response2);
+
+    let (response, code) = index
+        .get_document_by_filter(
+            json!({ "limit": 1, "filter": "color != blue", "fields": ["color"] }),
+        )
+        .await;
+    let (response2, code2) =
+        index.get_all_documents_raw("?limit=1&filter=color!=blue&fields=color").await;
+    snapshot!(code, @"200 OK");
+    snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
+    {
+      "results": [
+        {
+          "color": "red"
+        }
+      ],
+      "offset": 0,
+      "limit": 1,
+      "total": 2
+    }
+    "###);
+    assert_eq!(code, code2);
+    assert_eq!(response, response2);
+
+    // Now testing more complex filter that the get route can't represent
+
+    let (response, code) =
+        index.get_document_by_filter(json!({ "filter": [["color = blue", "color = red"]] })).await;
+    snapshot!(code, @"200 OK");
+    snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
+    {
+      "results": [
+        {
+          "id": 0,
+          "color": "red"
+        },
+        {
+          "id": 1,
+          "color": "blue"
+        },
+        {
+          "id": 2,
+          "color": "blue"
+        }
+      ],
+      "offset": 0,
+      "limit": 20,
+      "total": 3
+    }
+    "###);
+
+    let (response, code) = index
+        .get_document_by_filter(json!({ "filter": [["color != blue"], "color EXISTS"] }))
+        .await;
+    snapshot!(code, @"200 OK");
+    snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
+    {
+      "results": [
+        {
+          "id": 0,
+          "color": "red"
+        }
+      ],
+      "offset": 0,
+      "limit": 20,
+      "total": 1
+    }
+    "###);
+}
--- a/meilisearch/tests/search/errors.rs
+++ b/meilisearch/tests/search/errors.rs
@ -946,7 +946,7 @@ async fn sort_unset_ranking_rule() {
    index.wait_task(1).await;

    let expected_response = json!({
-        "message": "The sort ranking rule must be specified in the ranking rules settings to use the sort parameter at search time.",
+        "message": "You must specify where `sort` is listed in the rankingRules setting to use the sort parameter at search time.",
        "code": "invalid_search_sort",
        "type": "invalid_request",
        "link": "https://docs.meilisearch.com/errors#invalid_search_sort"
--- a/meilisearch/tests/tasks/errors.rs
+++ b/meilisearch/tests/tasks/errors.rs
@ -97,7 +97,7 @@ async fn task_bad_types() {
    snapshot!(code, @"400 Bad Request");
    snapshot!(json_string!(response), @r###"
    {
-      "message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`.",
+      "message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentDeletion`, `documentDeletionByFilter`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`.",
      "code": "invalid_task_types",
      "type": "invalid_request",
      "link": "https://docs.meilisearch.com/errors#invalid_task_types"
@ -108,7 +108,7 @@ async fn task_bad_types() {
    snapshot!(code, @"400 Bad Request");
    snapshot!(json_string!(response), @r###"
    {
-      "message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`.",
+      "message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentDeletion`, `documentDeletionByFilter`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`.",
      "code": "invalid_task_types",
      "type": "invalid_request",
      "link": "https://docs.meilisearch.com/errors#invalid_task_types"
@ -119,7 +119,7 @@ async fn task_bad_types() {
    snapshot!(code, @"400 Bad Request");
    snapshot!(json_string!(response), @r###"
    {
-      "message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentDeletion`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`.",
+      "message": "Invalid value in parameter `types`: `doggo` is not a valid task type. Available types are `documentAdditionOrUpdate`, `documentDeletion`, `documentDeletionByFilter`, `settingsUpdate`, `indexCreation`, `indexDeletion`, `indexUpdate`, `indexSwap`, `taskCancelation`, `taskDeletion`, `dumpCreation`, `snapshotCreation`.",
      "code": "invalid_task_types",
      "type": "invalid_request",
      "link": "https://docs.meilisearch.com/errors#invalid_task_types"
--- a/meilisearch/tests/tasks/mod.rs
+++ b/meilisearch/tests/tasks/mod.rs
@ -413,7 +413,7 @@ async fn test_summarized_document_addition_or_update() {
 }

 #[actix_web::test]
-async fn test_summarized_delete_batch() {
+async fn test_summarized_delete_documents_by_batch() {
    let server = Server::new().await;
    let index = server.index("test");
    index.delete_batch(vec![1, 2, 3]).await;
@ -430,7 +430,8 @@ async fn test_summarized_delete_batch() {
      "canceledBy": null,
      "details": {
        "providedIds": 3,
-        "deletedDocuments": 0
+        "deletedDocuments": 0,
+        "originalFilter": null
      },
      "error": {
        "message": "Index `test` not found.",
@ -460,7 +461,8 @@ async fn test_summarized_delete_batch() {
      "canceledBy": null,
      "details": {
        "providedIds": 1,
-        "deletedDocuments": 0
+        "deletedDocuments": 0,
+        "originalFilter": null
      },
      "error": null,
      "duration": "[duration]",
@ -472,7 +474,100 @@ async fn test_summarized_delete_batch() {
 }

 #[actix_web::test]
-async fn test_summarized_delete_document() {
+async fn test_summarized_delete_documents_by_filter() {
+    let server = Server::new().await;
+    let index = server.index("test");
+
+    index.delete_document_by_filter(json!({ "filter": "doggo = bernese" })).await;
+    index.wait_task(0).await;
+    let (task, _) = index.get_task(0).await;
+    assert_json_snapshot!(task,
+        { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" },
+        @r###"
+    {
+      "uid": 0,
+      "indexUid": "test",
+      "status": "failed",
+      "type": "documentDeletion",
+      "canceledBy": null,
+      "details": {
+        "providedIds": 0,
+        "deletedDocuments": 0,
+        "originalFilter": "\"doggo = bernese\""
+      },
+      "error": {
+        "message": "Index `test` not found.",
+        "code": "index_not_found",
+        "type": "invalid_request",
+        "link": "https://docs.meilisearch.com/errors#index_not_found"
+      },
+      "duration": "[duration]",
+      "enqueuedAt": "[date]",
+      "startedAt": "[date]",
+      "finishedAt": "[date]"
+    }
+    "###);
+
+    index.create(None).await;
+    index.delete_document_by_filter(json!({ "filter": "doggo = bernese" })).await;
+    index.wait_task(2).await;
+    let (task, _) = index.get_task(2).await;
+    assert_json_snapshot!(task,
+        { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" },
+        @r###"
+    {
+      "uid": 2,
+      "indexUid": "test",
+      "status": "failed",
+      "type": "documentDeletion",
+      "canceledBy": null,
+      "details": {
+        "providedIds": 0,
+        "deletedDocuments": 0,
+        "originalFilter": "\"doggo = bernese\""
+      },
+      "error": {
+        "message": "Attribute `doggo` is not filterable. This index does not have configured filterable attributes.\n1:6 doggo = bernese",
+        "code": "invalid_document_filter",
+        "type": "invalid_request",
+        "link": "https://docs.meilisearch.com/errors#invalid_document_filter"
+      },
+      "duration": "[duration]",
+      "enqueuedAt": "[date]",
+      "startedAt": "[date]",
+      "finishedAt": "[date]"
+    }
+    "###);
+
+    index.update_settings(json!({ "filterableAttributes": ["doggo"] })).await;
+    index.delete_document_by_filter(json!({ "filter": "doggo = bernese" })).await;
+    index.wait_task(4).await;
+    let (task, _) = index.get_task(4).await;
+    assert_json_snapshot!(task,
+        { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" },
+        @r###"
+    {
+      "uid": 4,
+      "indexUid": "test",
+      "status": "succeeded",
+      "type": "documentDeletion",
+      "canceledBy": null,
+      "details": {
+        "providedIds": 0,
+        "deletedDocuments": 0,
+        "originalFilter": "\"doggo = bernese\""
+      },
+      "error": null,
+      "duration": "[duration]",
+      "enqueuedAt": "[date]",
+      "startedAt": "[date]",
+      "finishedAt": "[date]"
+    }
+    "###);
+}
+
+#[actix_web::test]
+async fn test_summarized_delete_document_by_id() {
    let server = Server::new().await;
    let index = server.index("test");
    index.delete_document(1).await;
@ -489,7 +584,8 @@ async fn test_summarized_delete_document() {
      "canceledBy": null,
      "details": {
        "providedIds": 1,
-        "deletedDocuments": 0
+        "deletedDocuments": 0,
+        "originalFilter": null
      },
      "error": {
        "message": "Index `test` not found.",
@ -519,7 +615,8 @@ async fn test_summarized_delete_document() {
      "canceledBy": null,
      "details": {
        "providedIds": 1,
-        "deletedDocuments": 0
+        "deletedDocuments": 0,
+        "originalFilter": null
      },
      "error": null,
      "duration": "[duration]",
--- a/milli/Cargo.toml
+++ b/milli/Cargo.toml
@ -25,8 +25,13 @@ flatten-serde-json = { path = "../flatten-serde-json" }
 fst = "0.4.7"
 fxhash = "0.2.1"
 geoutils = "0.5.1"
-grenad = { version = "0.4.4", default-features = false, features = ["tempfile"] }
-heed = { git = "https://github.com/meilisearch/heed", tag = "v0.12.5", default-features = false, features = ["lmdb", "sync-read-txn"] }
+grenad = { version = "0.4.4", default-features = false, features = [
+    "tempfile",
+] }
+heed = { git = "https://github.com/meilisearch/heed", tag = "v0.12.6", default-features = false, features = [
+    "lmdb",
+    "sync-read-txn",
+] }
 json-depth-checker = { path = "../json-depth-checker" }
 levenshtein_automata = { version = "0.2.1", features = ["fst_automaton"] }
 memmap2 = "0.5.10"
@ -39,12 +44,17 @@ rstar = { version = "0.10.0", features = ["serde"] }
 serde = { version = "1.0.160", features = ["derive"] }
 serde_json = { version = "1.0.95", features = ["preserve_order"] }
 slice-group-by = "0.3.0"
-smallstr =  { version = "0.3.0", features = ["serde"] }
+smallstr = { version = "0.3.0", features = ["serde"] }
 smallvec = "1.10.0"
 smartstring = "1.0.1"
 tempfile = "3.5.0"
 thiserror = "1.0.40"
-time = { version = "0.3.20", features = ["serde-well-known", "formatting", "parsing", "macros"] }
+time = { version = "0.3.20", features = [
+    "serde-well-known",
+    "formatting",
+    "parsing",
+    "macros",
+] }
 uuid = { version = "1.3.1", features = ["v4"] }

 filter-parser = { path = "../filter-parser" }
@ -63,13 +73,13 @@ big_s = "1.0.2"
 insta = "1.29.0"
 maplit = "1.0.2"
 md5 = "0.7.0"
-rand = {version = "0.8.5", features = ["small_rng"] }
+rand = { version = "0.8.5", features = ["small_rng"] }

 [target.'cfg(fuzzing)'.dev-dependencies]
 fuzzcheck = "0.12.1"

 [features]
-default = [ "charabia/default" ]
+all-tokenizations = ["charabia/default"]

 # Use POSIX semaphores instead of SysV semaphores in LMDB
 # For more information on this feature, see heed's Cargo.toml
--- a/milli/src/error.rs
+++ b/milli/src/error.rs
@ -112,6 +112,8 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco
    InvalidGeoField(#[from] GeoError),
    #[error("{0}")]
    InvalidFilter(String),
+    #[error("Invalid type for filter subexpression: `expected {}, found: {1}`.", .0.join(", "))]
+    InvalidFilterExpression(&'static [&'static str], Value),
    #[error("Attribute `{}` is not sortable. {}",
        .field,
        match .valid_fields.is_empty() {
@ -124,7 +126,7 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco
    InvalidSortableAttribute { field: String, valid_fields: BTreeSet<String> },
    #[error("{}", HeedError::BadOpenOptions)]
    InvalidLmdbOpenOptions,
-    #[error("The sort ranking rule must be specified in the ranking rules settings to use the sort parameter at search time.")]
+    #[error("You must specify where `sort` is listed in the rankingRules setting to use the sort parameter at search time.")]
    SortRankingRuleMissing,
    #[error("The database file is in an invalid state.")]
    InvalidStoreFile,
--- a/milli/src/index.rs
+++ b/milli/src/index.rs
@ -170,33 +170,46 @@ impl Index {
        unsafe { options.flag(Flags::MdbAlwaysFreePages) };

        let env = options.open(path)?;
-        let main = env.create_poly_database(Some(MAIN))?;
-        let word_docids = env.create_database(Some(WORD_DOCIDS))?;
-        let exact_word_docids = env.create_database(Some(EXACT_WORD_DOCIDS))?;
-        let word_prefix_docids = env.create_database(Some(WORD_PREFIX_DOCIDS))?;
-        let exact_word_prefix_docids = env.create_database(Some(EXACT_WORD_PREFIX_DOCIDS))?;
-        let docid_word_positions = env.create_database(Some(DOCID_WORD_POSITIONS))?;
-        let word_pair_proximity_docids = env.create_database(Some(WORD_PAIR_PROXIMITY_DOCIDS))?;
-        let script_language_docids = env.create_database(Some(SCRIPT_LANGUAGE_DOCIDS))?;
+        let mut wtxn = env.write_txn()?;
+        let main = env.create_poly_database(&mut wtxn, Some(MAIN))?;
+        let word_docids = env.create_database(&mut wtxn, Some(WORD_DOCIDS))?;
+        let exact_word_docids = env.create_database(&mut wtxn, Some(EXACT_WORD_DOCIDS))?;
+        let word_prefix_docids = env.create_database(&mut wtxn, Some(WORD_PREFIX_DOCIDS))?;
+        let exact_word_prefix_docids =
+            env.create_database(&mut wtxn, Some(EXACT_WORD_PREFIX_DOCIDS))?;
+        let docid_word_positions = env.create_database(&mut wtxn, Some(DOCID_WORD_POSITIONS))?;
+        let word_pair_proximity_docids =
+            env.create_database(&mut wtxn, Some(WORD_PAIR_PROXIMITY_DOCIDS))?;
+        let script_language_docids =
+            env.create_database(&mut wtxn, Some(SCRIPT_LANGUAGE_DOCIDS))?;
        let word_prefix_pair_proximity_docids =
-            env.create_database(Some(WORD_PREFIX_PAIR_PROXIMITY_DOCIDS))?;
+            env.create_database(&mut wtxn, Some(WORD_PREFIX_PAIR_PROXIMITY_DOCIDS))?;
        let prefix_word_pair_proximity_docids =
-            env.create_database(Some(PREFIX_WORD_PAIR_PROXIMITY_DOCIDS))?;
-        let word_position_docids = env.create_database(Some(WORD_POSITION_DOCIDS))?;
-        let word_fid_docids = env.create_database(Some(WORD_FIELD_ID_DOCIDS))?;
-        let field_id_word_count_docids = env.create_database(Some(FIELD_ID_WORD_COUNT_DOCIDS))?;
-        let word_prefix_position_docids = env.create_database(Some(WORD_PREFIX_POSITION_DOCIDS))?;
-        let word_prefix_fid_docids = env.create_database(Some(WORD_PREFIX_FIELD_ID_DOCIDS))?;
-        let facet_id_f64_docids = env.create_database(Some(FACET_ID_F64_DOCIDS))?;
-        let facet_id_string_docids = env.create_database(Some(FACET_ID_STRING_DOCIDS))?;
-        let facet_id_exists_docids = env.create_database(Some(FACET_ID_EXISTS_DOCIDS))?;
-        let facet_id_is_null_docids = env.create_database(Some(FACET_ID_IS_NULL_DOCIDS))?;
-        let facet_id_is_empty_docids = env.create_database(Some(FACET_ID_IS_EMPTY_DOCIDS))?;
+            env.create_database(&mut wtxn, Some(PREFIX_WORD_PAIR_PROXIMITY_DOCIDS))?;
+        let word_position_docids = env.create_database(&mut wtxn, Some(WORD_POSITION_DOCIDS))?;
+        let word_fid_docids = env.create_database(&mut wtxn, Some(WORD_FIELD_ID_DOCIDS))?;
+        let field_id_word_count_docids =
+            env.create_database(&mut wtxn, Some(FIELD_ID_WORD_COUNT_DOCIDS))?;
+        let word_prefix_position_docids =
+            env.create_database(&mut wtxn, Some(WORD_PREFIX_POSITION_DOCIDS))?;
+        let word_prefix_fid_docids =
+            env.create_database(&mut wtxn, Some(WORD_PREFIX_FIELD_ID_DOCIDS))?;
+        let facet_id_f64_docids = env.create_database(&mut wtxn, Some(FACET_ID_F64_DOCIDS))?;
+        let facet_id_string_docids =
+            env.create_database(&mut wtxn, Some(FACET_ID_STRING_DOCIDS))?;
+        let facet_id_exists_docids =
+            env.create_database(&mut wtxn, Some(FACET_ID_EXISTS_DOCIDS))?;
+        let facet_id_is_null_docids =
+            env.create_database(&mut wtxn, Some(FACET_ID_IS_NULL_DOCIDS))?;
+        let facet_id_is_empty_docids =
+            env.create_database(&mut wtxn, Some(FACET_ID_IS_EMPTY_DOCIDS))?;

-        let field_id_docid_facet_f64s = env.create_database(Some(FIELD_ID_DOCID_FACET_F64S))?;
+        let field_id_docid_facet_f64s =
+            env.create_database(&mut wtxn, Some(FIELD_ID_DOCID_FACET_F64S))?;
        let field_id_docid_facet_strings =
-            env.create_database(Some(FIELD_ID_DOCID_FACET_STRINGS))?;
-        let documents = env.create_database(Some(DOCUMENTS))?;
+            env.create_database(&mut wtxn, Some(FIELD_ID_DOCID_FACET_STRINGS))?;
+        let documents = env.create_database(&mut wtxn, Some(DOCUMENTS))?;
+        wtxn.commit()?;

        Index::set_creation_dates(&env, main, created_at, updated_at)?;

@ -1032,16 +1045,15 @@ impl Index {

    /* documents */

-    /// Returns a [`Vec`] of the requested documents. Returns an error if a document is missing.
-    pub fn documents<'t>(
-        &self,
+    /// Returns an iterator over the requested documents. The next item will be an error if a document is missing.
+    pub fn iter_documents<'a, 't: 'a>(
+        &'a self,
        rtxn: &'t RoTxn,
-        ids: impl IntoIterator<Item = DocumentId>,
-    ) -> Result<Vec<(DocumentId, obkv::KvReaderU16<'t>)>> {
+        ids: impl IntoIterator<Item = DocumentId> + 'a,
+    ) -> Result<impl Iterator<Item = Result<(DocumentId, obkv::KvReaderU16<'t>)>> + 'a> {
        let soft_deleted_documents = self.soft_deleted_documents_ids(rtxn)?;
-        let mut documents = Vec::new();

-        for id in ids {
+        Ok(ids.into_iter().map(move |id| {
            if soft_deleted_documents.contains(id) {
                return Err(UserError::AccessingSoftDeletedDocument { document_id: id })?;
            }
@ -1049,27 +1061,25 @@ impl Index {
                .documents
                .get(rtxn, &BEU32::new(id))?
                .ok_or(UserError::UnknownInternalDocumentId { document_id: id })?;
-            documents.push((id, kv));
-        }
+            Ok((id, kv))
+        }))
+    }

-        Ok(documents)
+    /// Returns a [`Vec`] of the requested documents. Returns an error if a document is missing.
+    pub fn documents<'t>(
+        &self,
+        rtxn: &'t RoTxn,
+        ids: impl IntoIterator<Item = DocumentId>,
+    ) -> Result<Vec<(DocumentId, obkv::KvReaderU16<'t>)>> {
+        self.iter_documents(rtxn, ids)?.collect()
    }

    /// Returns an iterator over all the documents in the index.
-    pub fn all_documents<'t>(
-        &self,
+    pub fn all_documents<'a, 't: 'a>(
+        &'a self,
        rtxn: &'t RoTxn,
-    ) -> Result<impl Iterator<Item = heed::Result<(DocumentId, obkv::KvReaderU16<'t>)>>> {
-        let soft_deleted_docids = self.soft_deleted_documents_ids(rtxn)?;
-
-        Ok(self
-            .documents
-            .iter(rtxn)?
-            // we cast the BEU32 to a DocumentId
-            .map(|document| document.map(|(id, obkv)| (id.get(), obkv)))
-            .filter(move |document| {
-                document.as_ref().map_or(true, |(id, _)| !soft_deleted_docids.contains(*id))
-            }))
+    ) -> Result<impl Iterator<Item = Result<(DocumentId, obkv::KvReaderU16<'t>)>> + 'a> {
+        self.iter_documents(rtxn, self.documents_ids(rtxn)?)
    }

    pub fn facets_distribution<'a>(&'a self, rtxn: &'a RoTxn) -> FacetDistribution<'a> {
--- a/milli/src/search/facet/filter.rs
+++ b/milli/src/search/facet/filter.rs
@ -5,6 +5,7 @@ use std::ops::Bound::{self, Excluded, Included};
 use either::Either;
 pub use filter_parser::{Condition, Error as FPError, FilterCondition, Span, Token};
 use roaring::RoaringBitmap;
+use serde_json::Value;

 use super::facet_range_search;
 use crate::error::{Error, UserError};
@ -112,6 +113,52 @@ impl<'a> From<Filter<'a>> for FilterCondition<'a> {
 }

 impl<'a> Filter<'a> {
+    pub fn from_json(facets: &'a Value) -> Result<Option<Self>> {
+        match facets {
+            Value::String(expr) => {
+                let condition = Filter::from_str(expr)?;
+                Ok(condition)
+            }
+            Value::Array(arr) => Self::parse_filter_array(arr),
+            v => Err(Error::UserError(UserError::InvalidFilterExpression(
+                &["String", "Array"],
+                v.clone(),
+            ))),
+        }
+    }
+
+    fn parse_filter_array(arr: &'a [Value]) -> Result<Option<Self>> {
+        let mut ands = Vec::new();
+        for value in arr {
+            match value {
+                Value::String(s) => ands.push(Either::Right(s.as_str())),
+                Value::Array(arr) => {
+                    let mut ors = Vec::new();
+                    for value in arr {
+                        match value {
+                            Value::String(s) => ors.push(s.as_str()),
+                            v => {
+                                return Err(Error::UserError(UserError::InvalidFilterExpression(
+                                    &["String"],
+                                    v.clone(),
+                                )))
+                            }
+                        }
+                    }
+                    ands.push(Either::Left(ors));
+                }
+                v => {
+                    return Err(Error::UserError(UserError::InvalidFilterExpression(
+                        &["String", "[String]"],
+                        v.clone(),
+                    )))
+                }
+            }
+        }
+
+        Filter::from_array(ands)
+    }
+
    pub fn from_array<I, J>(array: I) -> Result<Option<Self>>
    where
        I: IntoIterator<Item = Either<J, &'a str>>,
--- a/milli/src/search/mod.rs
+++ b/milli/src/search/mod.rs
@ -201,12 +201,14 @@ pub fn build_dfa(word: &str, typos: u8, is_prefix: bool) -> DFA {

 #[cfg(test)]
 mod test {
+    #[allow(unused_imports)]
    use super::*;
-    use crate::index::tests::TempIndex;

-    #[cfg(feature = "default")]
+    #[cfg(feature = "japanese")]
    #[test]
    fn test_kanji_language_detection() {
+        use crate::index::tests::TempIndex;
+
        let index = TempIndex::new();

        index
--- a/milli/src/search/new/bucket_sort.rs
+++ b/milli/src/search/new/bucket_sort.rs
@ -116,16 +116,15 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
    }

    while valid_docids.len() < length {
-        // The universe for this bucket is zero or one element, so we don't need to sort
-        // anything, just extend the results and go back to the parent ranking rule.
-        if ranking_rule_universes[cur_ranking_rule_index].len() <= 1 {
-            let bucket = std::mem::take(&mut ranking_rule_universes[cur_ranking_rule_index]);
-            maybe_add_to_results!(bucket);
+        // The universe for this bucket is zero element, so we don't need to sort
+        // anything, just go back to the parent ranking rule.
+        if ranking_rule_universes[cur_ranking_rule_index].is_empty() {
            back!();
            continue;
        }

-        let Some(next_bucket) = ranking_rules[cur_ranking_rule_index].next_bucket(ctx, logger, &ranking_rule_universes[cur_ranking_rule_index])? else {
+        let Some(next_bucket) = ranking_rules[cur_ranking_rule_index].next_bucket(ctx, logger, &ranking_rule_universes[cur_ranking_rule_index])?
+        else {
            back!();
            continue;
        };
--- a/milli/src/search/new/graph_based_ranking_rule.rs
+++ b/milli/src/search/new/graph_based_ranking_rule.rs
@ -46,7 +46,7 @@ use super::logger::SearchLogger;
 use super::query_graph::QueryNode;
 use super::ranking_rule_graph::{
    ConditionDocIdsCache, DeadEndsCache, ExactnessGraph, FidGraph, PositionGraph, ProximityGraph,
-    RankingRuleGraph, RankingRuleGraphTrait, TypoGraph,
+    RankingRuleGraph, RankingRuleGraphTrait, TypoGraph, WordsGraph,
 };
 use super::small_bitmap::SmallBitmap;
 use super::{QueryGraph, RankingRule, RankingRuleOutput, SearchContext};
@ -54,6 +54,12 @@ use crate::search::new::query_term::LocatedQueryTermSubset;
 use crate::search::new::ranking_rule_graph::PathVisitor;
 use crate::{Result, TermsMatchingStrategy};

+pub type Words = GraphBasedRankingRule<WordsGraph>;
+impl GraphBasedRankingRule<WordsGraph> {
+    pub fn new(terms_matching_strategy: TermsMatchingStrategy) -> Self {
+        Self::new_with_id("words".to_owned(), Some(terms_matching_strategy))
+    }
+}
 pub type Proximity = GraphBasedRankingRule<ProximityGraph>;
 impl GraphBasedRankingRule<ProximityGraph> {
    pub fn new(terms_matching_strategy: Option<TermsMatchingStrategy>) -> Self {
@ -175,9 +181,6 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
        logger: &mut dyn SearchLogger<QueryGraph>,
        universe: &RoaringBitmap,
    ) -> Result<Option<RankingRuleOutput<QueryGraph>>> {
-        // If universe.len() <= 1, the bucket sort algorithm
-        // should not have called this function.
-        assert!(universe.len() > 1);
        // Will crash if `next_bucket` is called before `start_iteration` or after `end_iteration`,
        // should never happen
        let mut state = self.state.take().unwrap();
--- a/milli/src/search/new/logger/visual.rs
+++ b/milli/src/search/new/logger/visual.rs
@ -4,7 +4,6 @@ use std::io::{BufWriter, Write};
 use std::path::{Path, PathBuf};
 use std::time::Instant;

-// use rand::random;
 use roaring::RoaringBitmap;

 use crate::search::new::interner::Interned;
@ -13,6 +12,7 @@ use crate::search::new::query_term::LocatedQueryTermSubset;
 use crate::search::new::ranking_rule_graph::{
    Edge, FidCondition, FidGraph, PositionCondition, PositionGraph, ProximityCondition,
    ProximityGraph, RankingRuleGraph, RankingRuleGraphTrait, TypoCondition, TypoGraph,
+    WordsCondition, WordsGraph,
 };
 use crate::search::new::ranking_rules::BoxRankingRule;
 use crate::search::new::{QueryGraph, QueryNode, RankingRule, SearchContext, SearchLogger};
@ -24,11 +24,12 @@ pub enum SearchEvents {
    RankingRuleSkipBucket { ranking_rule_idx: usize, bucket_len: u64 },
    RankingRuleEndIteration { ranking_rule_idx: usize, universe_len: u64 },
    ExtendResults { new: Vec<u32> },
-    WordsGraph { query_graph: QueryGraph },
    ProximityGraph { graph: RankingRuleGraph<ProximityGraph> },
    ProximityPaths { paths: Vec<Vec<Interned<ProximityCondition>>> },
    TypoGraph { graph: RankingRuleGraph<TypoGraph> },
    TypoPaths { paths: Vec<Vec<Interned<TypoCondition>>> },
+    WordsGraph { graph: RankingRuleGraph<WordsGraph> },
+    WordsPaths { paths: Vec<Vec<Interned<WordsCondition>>> },
    FidGraph { graph: RankingRuleGraph<FidGraph> },
    FidPaths { paths: Vec<Vec<Interned<FidCondition>>> },
    PositionGraph { graph: RankingRuleGraph<PositionGraph> },
@ -139,8 +140,11 @@ impl SearchLogger<QueryGraph> for VisualSearchLogger {
        let Some(location) = self.location.last() else { return };
        match location {
            Location::Words => {
-                if let Some(query_graph) = state.downcast_ref::<QueryGraph>() {
-                    self.events.push(SearchEvents::WordsGraph { query_graph: query_graph.clone() });
+                if let Some(graph) = state.downcast_ref::<RankingRuleGraph<WordsGraph>>() {
+                    self.events.push(SearchEvents::WordsGraph { graph: graph.clone() });
+                }
+                if let Some(paths) = state.downcast_ref::<Vec<Vec<Interned<WordsCondition>>>>() {
+                    self.events.push(SearchEvents::WordsPaths { paths: paths.clone() });
                }
            }
            Location::Typo => {
@ -329,7 +333,6 @@ impl<'ctx> DetailedLoggerFinish<'ctx> {
            SearchEvents::ExtendResults { new } => {
                self.write_extend_results(new)?;
            }
-            SearchEvents::WordsGraph { query_graph } => self.write_words_graph(query_graph)?,
            SearchEvents::ProximityGraph { graph } => self.write_rr_graph(&graph)?,
            SearchEvents::ProximityPaths { paths } => {
                self.write_rr_graph_paths::<ProximityGraph>(paths)?;
@ -338,6 +341,10 @@ impl<'ctx> DetailedLoggerFinish<'ctx> {
            SearchEvents::TypoPaths { paths } => {
                self.write_rr_graph_paths::<TypoGraph>(paths)?;
            }
+            SearchEvents::WordsGraph { graph } => self.write_rr_graph(&graph)?,
+            SearchEvents::WordsPaths { paths } => {
+                self.write_rr_graph_paths::<WordsGraph>(paths)?;
+            }
            SearchEvents::FidGraph { graph } => self.write_rr_graph(&graph)?,
            SearchEvents::FidPaths { paths } => {
                self.write_rr_graph_paths::<FidGraph>(paths)?;
@ -455,7 +462,7 @@ fill: \"#B6E2D3\"
                shape: class
                max_nbr_typo: {}",
                    term_subset.description(ctx),
-                    term_subset.max_nbr_typos(ctx)
+                    term_subset.max_typo_cost(ctx)
                )?;

                for w in term_subset.all_single_words_except_prefix_db(ctx)? {
@ -482,13 +489,6 @@ fill: \"#B6E2D3\"
        }
        Ok(())
    }
-    fn write_words_graph(&mut self, qg: QueryGraph) -> Result<()> {
-        self.make_new_file_for_internal_state_if_needed()?;
-
-        self.write_query_graph(&qg)?;
-
-        Ok(())
-    }
    fn write_rr_graph<R: RankingRuleGraphTrait>(
        &mut self,
        graph: &RankingRuleGraph<R>,
--- a/milli/src/search/new/matches/matching_words.rs
+++ b/milli/src/search/new/matches/matching_words.rs
@ -52,7 +52,7 @@ impl MatchingWords {
            words.push(LocatedMatchingWords {
                value: matching_words,
                positions: located_term.positions.clone(),
-                is_prefix: term.is_cached_prefix(),
+                is_prefix: term.is_prefix(),
                original_char_count: term.original_word(&ctx).chars().count(),
            });
        }
@ -244,6 +244,8 @@ pub(crate) mod tests {
        temp_index
            .add_documents(documents!([
                { "id": 1, "name": "split this world westfali westfalia the Ŵôřlḑôle" },
+                { "id": 2, "name": "Westfália" },
+                { "id": 3, "name": "Ŵôřlḑôle" },
            ]))
            .unwrap();
        temp_index
@ -305,7 +307,7 @@ pub(crate) mod tests {
                    ..Default::default()
                })
                .next(),
-            None
+            Some(MatchType::Full { char_len: 5, ids: &(2..=2) })
        );
        assert_eq!(
            matching_words
--- a/milli/src/search/new/matches/mod.rs
+++ b/milli/src/search/new/matches/mod.rs
@ -499,17 +499,36 @@ mod tests {
    use charabia::TokenizerBuilder;
    use matching_words::tests::temp_index_with_documents;

-    use super::super::located_query_terms_from_tokens;
    use super::*;
-    use crate::SearchContext;
+    use crate::index::tests::TempIndex;
+    use crate::{execute_search, SearchContext};

    impl<'a> MatcherBuilder<'a, &[u8]> {
-        pub fn new_test(mut ctx: SearchContext, query: &'a str) -> Self {
-            let tokenizer = TokenizerBuilder::new().build();
-            let tokens = tokenizer.tokenize(query);
-            let query_terms = located_query_terms_from_tokens(&mut ctx, tokens, None).unwrap();
-            let matching_words = MatchingWords::new(ctx, query_terms);
-            Self::new(matching_words, TokenizerBuilder::new().build())
+        fn new_test(rtxn: &'a heed::RoTxn, index: &'a TempIndex, query: &str) -> Self {
+            let mut ctx = SearchContext::new(index, rtxn);
+            let crate::search::PartialSearchResult { located_query_terms, .. } = execute_search(
+                &mut ctx,
+                &Some(query.to_string()),
+                crate::TermsMatchingStrategy::default(),
+                false,
+                &None,
+                &None,
+                crate::search::new::GeoSortStrategy::default(),
+                0,
+                100,
+                Some(10),
+                &mut crate::DefaultSearchLogger,
+                &mut crate::DefaultSearchLogger,
+            )
+            .unwrap();
+
+            // consume context and located_query_terms to build MatchingWords.
+            let matching_words = match located_query_terms {
+                Some(located_query_terms) => MatchingWords::new(ctx, located_query_terms),
+                None => MatchingWords::default(),
+            };
+
+            MatcherBuilder::new(matching_words, TokenizerBuilder::new().build())
        }
    }

@ -517,8 +536,7 @@ mod tests {
    fn format_identity() {
        let temp_index = temp_index_with_documents();
        let rtxn = temp_index.read_txn().unwrap();
-        let ctx = SearchContext::new(&temp_index, &rtxn);
-        let builder = MatcherBuilder::new_test(ctx, "split the world");
+        let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "split the world");

        let format_options = FormatOptions { highlight: false, crop: None };

@ -545,8 +563,7 @@ mod tests {
    fn format_highlight() {
        let temp_index = temp_index_with_documents();
        let rtxn = temp_index.read_txn().unwrap();
-        let ctx = SearchContext::new(&temp_index, &rtxn);
-        let builder = MatcherBuilder::new_test(ctx, "split the world");
+        let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "split the world");

        let format_options = FormatOptions { highlight: true, crop: None };

@ -589,8 +606,7 @@ mod tests {
    fn highlight_unicode() {
        let temp_index = temp_index_with_documents();
        let rtxn = temp_index.read_txn().unwrap();
-        let ctx = SearchContext::new(&temp_index, &rtxn);
-        let builder = MatcherBuilder::new_test(ctx, "world");
+        let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "world");
        let format_options = FormatOptions { highlight: true, crop: None };

        // Text containing prefix match.
@ -599,7 +615,7 @@ mod tests {
        // no crop should return complete text with highlighted matches.
        insta::assert_snapshot!(
            matcher.format(format_options),
-            @"<em>Ŵôřlḑôle</em>"
+            @"<em>Ŵôřlḑ</em>ôle"
        );

        // Text containing unicode match.
@ -611,8 +627,7 @@ mod tests {
            @"<em>Ŵôřlḑ</em>"
        );

-        let ctx = SearchContext::new(&temp_index, &rtxn);
-        let builder = MatcherBuilder::new_test(ctx, "westfali");
+        let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "westfali");
        let format_options = FormatOptions { highlight: true, crop: None };

        // Text containing unicode match.
@ -621,7 +636,7 @@ mod tests {
        // no crop should return complete text with highlighted matches.
        insta::assert_snapshot!(
            matcher.format(format_options),
-            @"<em>Westfália</em>"
+            @"<em>Westfáli</em>a"
        );
    }

@ -629,8 +644,7 @@ mod tests {
    fn format_crop() {
        let temp_index = temp_index_with_documents();
        let rtxn = temp_index.read_txn().unwrap();
-        let ctx = SearchContext::new(&temp_index, &rtxn);
-        let builder = MatcherBuilder::new_test(ctx, "split the world");
+        let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "split the world");

        let format_options = FormatOptions { highlight: false, crop: Some(10) };

@ -727,8 +741,7 @@ mod tests {
    fn format_highlight_crop() {
        let temp_index = temp_index_with_documents();
        let rtxn = temp_index.read_txn().unwrap();
-        let ctx = SearchContext::new(&temp_index, &rtxn);
-        let builder = MatcherBuilder::new_test(ctx, "split the world");
+        let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "split the world");

        let format_options = FormatOptions { highlight: true, crop: Some(10) };

@ -790,8 +803,7 @@ mod tests {
        //! testing: https://github.com/meilisearch/specifications/pull/120#discussion_r836536295
        let temp_index = temp_index_with_documents();
        let rtxn = temp_index.read_txn().unwrap();
-        let ctx = SearchContext::new(&temp_index, &rtxn);
-        let builder = MatcherBuilder::new_test(ctx, "split the world");
+        let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "split the world");

        let text = "void void split the world void void.";

@ -827,8 +839,8 @@ mod tests {
    fn partial_matches() {
        let temp_index = temp_index_with_documents();
        let rtxn = temp_index.read_txn().unwrap();
-        let ctx = SearchContext::new(&temp_index, &rtxn);
-        let mut builder = MatcherBuilder::new_test(ctx, "the \"t he\" door \"do or\"");
+        let mut builder =
+            MatcherBuilder::new_test(&rtxn, &temp_index, "the \"t he\" door \"do or\"");
        builder.highlight_prefix("_".to_string());
        builder.highlight_suffix("_".to_string());

--- a/milli/src/search/new/mod.rs
+++ b/milli/src/search/new/mod.rs
@ -15,11 +15,7 @@ mod resolve_query_graph;
 mod small_bitmap;

 mod exact_attribute;
-// TODO: documentation + comments
-// implementation is currently an adaptation of the previous implementation to fit with the new model
 mod sort;
-// TODO: documentation + comments
-mod words;

 #[cfg(test)]
 mod tests;
@ -43,10 +39,10 @@ use ranking_rules::{
 use resolve_query_graph::{compute_query_graph_docids, PhraseDocIdsCache};
 use roaring::RoaringBitmap;
 use sort::Sort;
-use words::Words;

 use self::geo_sort::GeoSort;
 pub use self::geo_sort::Strategy as GeoSortStrategy;
+use self::graph_based_ranking_rule::Words;
 use self::interner::Interned;
 use crate::search::new::distinct::apply_distinct_rule;
 use crate::{AscDesc, DocumentId, Filter, Index, Member, Result, TermsMatchingStrategy, UserError};
@ -202,6 +198,11 @@ fn get_ranking_rules_for_query_graph_search<'ctx>(
    let mut sorted_fields = HashSet::new();
    let mut geo_sorted = false;

+    // Don't add the `words` ranking rule if the term matching strategy is `All`
+    if matches!(terms_matching_strategy, TermsMatchingStrategy::All) {
+        words = true;
+    }
+
    let mut ranking_rules: Vec<BoxRankingRule<QueryGraph>> = vec![];
    let settings_ranking_rules = ctx.index.criteria(ctx.txn)?;
    for rr in settings_ranking_rules {
@ -397,8 +398,8 @@ pub fn execute_search(
        None
    };
    let bucket_sort_output = if let Some(query_terms) = query_terms {
-        let graph = QueryGraph::from_query(ctx, &query_terms)?;
-        located_query_terms = Some(query_terms);
+        let (graph, new_located_query_terms) = QueryGraph::from_query(ctx, &query_terms)?;
+        located_query_terms = Some(new_located_query_terms);

        let ranking_rules = get_ranking_rules_for_query_graph_search(
            ctx,
--- a/milli/src/search/new/query_graph.rs
+++ b/milli/src/search/new/query_graph.rs
@ -88,12 +88,15 @@ pub struct QueryGraph {
 }

 impl QueryGraph {
-    /// Build the query graph from the parsed user search query.
+    /// Build the query graph from the parsed user search query, return an updated list of the located query terms
+    /// which contains ngrams.
    pub fn from_query(
        ctx: &mut SearchContext,
        // NOTE: the terms here must be consecutive
        terms: &[LocatedQueryTerm],
-    ) -> Result<QueryGraph> {
+    ) -> Result<(QueryGraph, Vec<LocatedQueryTerm>)> {
+        let mut new_located_query_terms = terms.to_vec();
+
        let nbr_typos = number_of_typos_allowed(ctx)?;

        let mut nodes_data: Vec<QueryNodeData> = vec![QueryNodeData::Start, QueryNodeData::End];
@ -107,10 +110,11 @@ impl QueryGraph {
        let original_terms_len = terms.len();
        for term_idx in 0..original_terms_len {
            let mut new_nodes = vec![];
+
            let new_node_idx = add_node(
                &mut nodes_data,
                QueryNodeData::Term(LocatedQueryTermSubset {
-                    term_subset: QueryTermSubset::full(Interned::from_raw(term_idx as u16)),
+                    term_subset: QueryTermSubset::full(terms[term_idx].value),
                    positions: terms[term_idx].positions.clone(),
                    term_ids: term_idx as u8..=term_idx as u8,
                }),
@ -121,6 +125,7 @@ impl QueryGraph {
                if let Some(ngram) =
                    query_term::make_ngram(ctx, &terms[term_idx - 1..=term_idx], &nbr_typos)?
                {
+                    new_located_query_terms.push(ngram.clone());
                    let ngram_idx = add_node(
                        &mut nodes_data,
                        QueryNodeData::Term(LocatedQueryTermSubset {
@ -136,6 +141,7 @@ impl QueryGraph {
                if let Some(ngram) =
                    query_term::make_ngram(ctx, &terms[term_idx - 2..=term_idx], &nbr_typos)?
                {
+                    new_located_query_terms.push(ngram.clone());
                    let ngram_idx = add_node(
                        &mut nodes_data,
                        QueryNodeData::Term(LocatedQueryTermSubset {
@ -167,7 +173,7 @@ impl QueryGraph {
        let mut graph = QueryGraph { root_node, end_node, nodes };
        graph.build_initial_edges();

-        Ok(graph)
+        Ok((graph, new_located_query_terms))
    }

    /// Remove the given nodes, connecting all their predecessors to all their successors.
--- a/milli/src/search/new/query_term/compute_derivations.rs
+++ b/milli/src/search/new/query_term/compute_derivations.rs
@ -28,16 +28,14 @@ pub enum ZeroOrOneTypo {
 impl Interned<QueryTerm> {
    pub fn compute_fully_if_needed(self, ctx: &mut SearchContext) -> Result<()> {
        let s = ctx.term_interner.get_mut(self);
-        if s.max_nbr_typos == 0 {
-            s.one_typo = Lazy::Init(OneTypoTerm::default());
-            s.two_typo = Lazy::Init(TwoTypoTerm::default());
-        } else if s.max_nbr_typos == 1 && s.one_typo.is_uninit() {
+        if s.max_levenshtein_distance <= 1 && s.one_typo.is_uninit() {
            assert!(s.two_typo.is_uninit());
+            // Initialize one_typo subterm even if max_nbr_typo is 0 because of split words
            self.initialize_one_typo_subterm(ctx)?;
            let s = ctx.term_interner.get_mut(self);
            assert!(s.one_typo.is_init());
            s.two_typo = Lazy::Init(TwoTypoTerm::default());
-        } else if s.max_nbr_typos > 1 && s.two_typo.is_uninit() {
+        } else if s.max_levenshtein_distance > 1 && s.two_typo.is_uninit() {
            assert!(s.two_typo.is_uninit());
            self.initialize_one_and_two_typo_subterm(ctx)?;
            let s = ctx.term_interner.get_mut(self);
@ -187,7 +185,7 @@ pub fn partially_initialized_term_from_word(
                original: ctx.word_interner.insert(word.to_owned()),
                ngram_words: None,
                is_prefix: false,
-                max_nbr_typos: 0,
+                max_levenshtein_distance: 0,
                zero_typo: <_>::default(),
                one_typo: Lazy::Init(<_>::default()),
                two_typo: Lazy::Init(<_>::default()),
@ -258,7 +256,7 @@ pub fn partially_initialized_term_from_word(
    Ok(QueryTerm {
        original: word_interned,
        ngram_words: None,
-        max_nbr_typos: max_typo,
+        max_levenshtein_distance: max_typo,
        is_prefix,
        zero_typo,
        one_typo: Lazy::Uninit,
@ -277,7 +275,16 @@ fn find_split_words(ctx: &mut SearchContext, word: &str) -> Result<Option<Intern
 impl Interned<QueryTerm> {
    fn initialize_one_typo_subterm(self, ctx: &mut SearchContext) -> Result<()> {
        let self_mut = ctx.term_interner.get_mut(self);
-        let QueryTerm { original, is_prefix, one_typo, .. } = self_mut;
+
+        let allows_split_words = self_mut.allows_split_words();
+        let QueryTerm {
+            original,
+            is_prefix,
+            one_typo,
+            max_levenshtein_distance: max_nbr_typos,
+            ..
+        } = self_mut;
+
        let original = *original;
        let is_prefix = *is_prefix;
        // let original_str = ctx.word_interner.get(*original).to_owned();
@ -286,26 +293,33 @@ impl Interned<QueryTerm> {
        }
        let mut one_typo_words = BTreeSet::new();

-        find_zero_one_typo_derivations(ctx, original, is_prefix, |derived_word, nbr_typos| {
-            match nbr_typos {
-                ZeroOrOneTypo::Zero => {}
-                ZeroOrOneTypo::One => {
-                    if one_typo_words.len() < limits::MAX_ONE_TYPO_COUNT {
-                        one_typo_words.insert(derived_word);
-                    } else {
-                        return Ok(ControlFlow::Break(()));
+        if *max_nbr_typos > 0 {
+            find_zero_one_typo_derivations(ctx, original, is_prefix, |derived_word, nbr_typos| {
+                match nbr_typos {
+                    ZeroOrOneTypo::Zero => {}
+                    ZeroOrOneTypo::One => {
+                        if one_typo_words.len() < limits::MAX_ONE_TYPO_COUNT {
+                            one_typo_words.insert(derived_word);
+                        } else {
+                            return Ok(ControlFlow::Break(()));
+                        }
                    }
                }
-            }
-            Ok(ControlFlow::Continue(()))
-        })?;
-        let original_str = ctx.word_interner.get(original).to_owned();
-        let split_words = find_split_words(ctx, original_str.as_str())?;
+                Ok(ControlFlow::Continue(()))
+            })?;
+        }
+
+        let split_words = if allows_split_words {
+            let original_str = ctx.word_interner.get(original).to_owned();
+            find_split_words(ctx, original_str.as_str())?
+        } else {
+            None
+        };

        let self_mut = ctx.term_interner.get_mut(self);

        // Only add the split words to the derivations if:
-        // 1. the term is not an ngram; OR
+        // 1. the term is neither an ngram nor a phrase; OR
        // 2. the term is an ngram, but the split words are different from the ngram's component words
        let split_words = if let Some((ngram_words, split_words)) =
            self_mut.ngram_words.as_ref().zip(split_words.as_ref())
@ -327,7 +341,13 @@ impl Interned<QueryTerm> {
    }
    fn initialize_one_and_two_typo_subterm(self, ctx: &mut SearchContext) -> Result<()> {
        let self_mut = ctx.term_interner.get_mut(self);
-        let QueryTerm { original, is_prefix, two_typo, .. } = self_mut;
+        let QueryTerm {
+            original,
+            is_prefix,
+            two_typo,
+            max_levenshtein_distance: max_nbr_typos,
+            ..
+        } = self_mut;
        let original_str = ctx.word_interner.get(*original).to_owned();
        if two_typo.is_init() {
            return Ok(());
@ -335,34 +355,37 @@ impl Interned<QueryTerm> {
        let mut one_typo_words = BTreeSet::new();
        let mut two_typo_words = BTreeSet::new();

-        find_zero_one_two_typo_derivations(
-            *original,
-            *is_prefix,
-            ctx.index.words_fst(ctx.txn)?,
-            &mut ctx.word_interner,
-            |derived_word, nbr_typos| {
-                if one_typo_words.len() >= limits::MAX_ONE_TYPO_COUNT
-                    && two_typo_words.len() >= limits::MAX_TWO_TYPOS_COUNT
-                {
-                    // No chance we will add either one- or two-typo derivations anymore, stop iterating.
-                    return Ok(ControlFlow::Break(()));
-                }
-                match nbr_typos {
-                    NumberOfTypos::Zero => {}
-                    NumberOfTypos::One => {
-                        if one_typo_words.len() < limits::MAX_ONE_TYPO_COUNT {
-                            one_typo_words.insert(derived_word);
+        if *max_nbr_typos > 0 {
+            find_zero_one_two_typo_derivations(
+                *original,
+                *is_prefix,
+                ctx.index.words_fst(ctx.txn)?,
+                &mut ctx.word_interner,
+                |derived_word, nbr_typos| {
+                    if one_typo_words.len() >= limits::MAX_ONE_TYPO_COUNT
+                        && two_typo_words.len() >= limits::MAX_TWO_TYPOS_COUNT
+                    {
+                        // No chance we will add either one- or two-typo derivations anymore, stop iterating.
+                        return Ok(ControlFlow::Break(()));
+                    }
+                    match nbr_typos {
+                        NumberOfTypos::Zero => {}
+                        NumberOfTypos::One => {
+                            if one_typo_words.len() < limits::MAX_ONE_TYPO_COUNT {
+                                one_typo_words.insert(derived_word);
+                            }
+                        }
+                        NumberOfTypos::Two => {
+                            if two_typo_words.len() < limits::MAX_TWO_TYPOS_COUNT {
+                                two_typo_words.insert(derived_word);
+                            }
                        }
                    }
-                    NumberOfTypos::Two => {
-                        if two_typo_words.len() < limits::MAX_TWO_TYPOS_COUNT {
-                            two_typo_words.insert(derived_word);
-                        }
-                    }
-                }
-                Ok(ControlFlow::Continue(()))
-            },
-        )?;
+                    Ok(ControlFlow::Continue(()))
+                },
+            )?;
+        }
+
        let split_words = find_split_words(ctx, original_str.as_str())?;
        let self_mut = ctx.term_interner.get_mut(self);

--- a/milli/src/search/new/query_term/mod.rs
+++ b/milli/src/search/new/query_term/mod.rs
@ -43,7 +43,7 @@ pub struct QueryTermSubset {
 pub struct QueryTerm {
    original: Interned<String>,
    ngram_words: Option<Vec<Interned<String>>>,
-    max_nbr_typos: u8,
+    max_levenshtein_distance: u8,
    is_prefix: bool,
    zero_typo: ZeroTypoTerm,
    // May not be computed yet
@ -342,10 +342,16 @@ impl QueryTermSubset {
        }
        None
    }
-    pub fn max_nbr_typos(&self, ctx: &SearchContext) -> u8 {
+    pub fn max_typo_cost(&self, ctx: &SearchContext) -> u8 {
        let t = ctx.term_interner.get(self.original);
-        match t.max_nbr_typos {
-            0 => 0,
+        match t.max_levenshtein_distance {
+            0 => {
+                if t.allows_split_words() {
+                    1
+                } else {
+                    0
+                }
+            }
            1 => {
                if self.one_typo_subset.is_empty() {
                    0
@ -438,6 +444,9 @@ impl QueryTerm {

        self.zero_typo.is_empty() && one_typo.is_empty() && two_typo.is_empty()
    }
+    fn allows_split_words(&self) -> bool {
+        self.zero_typo.phrase.is_none()
+    }
 }

 impl Interned<QueryTerm> {
@ -470,6 +479,9 @@ impl QueryTerm {
    pub fn is_cached_prefix(&self) -> bool {
        self.zero_typo.use_prefix_db.is_some()
    }
+    pub fn is_prefix(&self) -> bool {
+        self.is_prefix
+    }
    pub fn original_word(&self, ctx: &SearchContext) -> String {
        ctx.word_interner.get(self.original).clone()
    }
--- a/milli/src/search/new/query_term/parse_query.rs
+++ b/milli/src/search/new/query_term/parse_query.rs
@ -77,13 +77,9 @@ pub fn located_query_terms_from_tokens(
                }
            }
            TokenKind::Separator(separator_kind) => {
-                match separator_kind {
-                    SeparatorKind::Hard => {
-                        position += 1;
-                    }
-                    SeparatorKind::Soft => {
-                        position += 0;
-                    }
+                // add penalty for hard separators
+                if let SeparatorKind::Hard = separator_kind {
+                    position = position.wrapping_add(1);
                }

                phrase = 'phrase: {
@ -217,7 +213,7 @@ pub fn make_ngram(
        original: ngram_str_interned,
        ngram_words: Some(words_interned),
        is_prefix,
-        max_nbr_typos,
+        max_levenshtein_distance: max_nbr_typos,
        zero_typo: term.zero_typo,
        one_typo: Lazy::Uninit,
        two_typo: Lazy::Uninit,
@ -271,7 +267,7 @@ impl PhraseBuilder {
                QueryTerm {
                    original: ctx.word_interner.insert(phrase_desc),
                    ngram_words: None,
-                    max_nbr_typos: 0,
+                    max_levenshtein_distance: 0,
                    is_prefix: false,
                    zero_typo: ZeroTypoTerm {
                        phrase: Some(phrase),
@ -288,3 +284,36 @@ impl PhraseBuilder {
        })
    }
 }
+
+#[cfg(test)]
+mod tests {
+    use charabia::TokenizerBuilder;
+
+    use super::*;
+    use crate::index::tests::TempIndex;
+
+    fn temp_index_with_documents() -> TempIndex {
+        let temp_index = TempIndex::new();
+        temp_index
+            .add_documents(documents!([
+                { "id": 1, "name": "split this world westfali westfalia the Ŵôřlḑôle" },
+                { "id": 2, "name": "Westfália" },
+                { "id": 3, "name": "Ŵôřlḑôle" },
+            ]))
+            .unwrap();
+        temp_index
+    }
+
+    #[test]
+    fn start_with_hard_separator() -> Result<()> {
+        let tokenizer = TokenizerBuilder::new().build();
+        let tokens = tokenizer.tokenize(".");
+        let index = temp_index_with_documents();
+        let rtxn = index.read_txn()?;
+        let mut ctx = SearchContext::new(&index, &rtxn);
+        // panics with `attempt to add with overflow` before <https://github.com/meilisearch/meilisearch/issues/3785>
+        let located_query_terms = located_query_terms_from_tokens(&mut ctx, tokens, None)?;
+        assert!(located_query_terms.is_empty());
+        Ok(())
+    }
+}
--- a/milli/src/search/new/ranking_rule_graph/cheapest_paths.rs
+++ b/milli/src/search/new/ranking_rule_graph/cheapest_paths.rs
@ -205,18 +205,12 @@ impl<G: RankingRuleGraphTrait> VisitorState<G> {
 impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
    pub fn find_all_costs_to_end(&self) -> MappedInterner<QueryNode, Vec<u64>> {
        let mut costs_to_end = self.query_graph.nodes.map(|_| vec![]);
-        let mut enqueued = SmallBitmap::new(self.query_graph.nodes.len());

-        let mut node_stack = VecDeque::new();
-
-        *costs_to_end.get_mut(self.query_graph.end_node) = vec![0];
-
-        for prev_node in self.query_graph.nodes.get(self.query_graph.end_node).predecessors.iter() {
-            node_stack.push_back(prev_node);
-            enqueued.insert(prev_node);
-        }
-
-        while let Some(cur_node) = node_stack.pop_front() {
+        self.traverse_breadth_first_backward(self.query_graph.end_node, |cur_node| {
+            if cur_node == self.query_graph.end_node {
+                *costs_to_end.get_mut(self.query_graph.end_node) = vec![0];
+                return;
+            }
            let mut self_costs = Vec::<u64>::new();

            let cur_node_edges = &self.edges_of_node.get(cur_node);
@ -232,13 +226,7 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
            self_costs.dedup();

            *costs_to_end.get_mut(cur_node) = self_costs;
-            for prev_node in self.query_graph.nodes.get(cur_node).predecessors.iter() {
-                if !enqueued.contains(prev_node) {
-                    node_stack.push_back(prev_node);
-                    enqueued.insert(prev_node);
-                }
-            }
-        }
+        });
        costs_to_end
    }

@ -247,17 +235,12 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
        node_with_removed_outgoing_conditions: Interned<QueryNode>,
        costs: &mut MappedInterner<QueryNode, Vec<u64>>,
    ) {
-        let mut enqueued = SmallBitmap::new(self.query_graph.nodes.len());
-        let mut node_stack = VecDeque::new();
-
-        enqueued.insert(node_with_removed_outgoing_conditions);
-        node_stack.push_back(node_with_removed_outgoing_conditions);
-
-        'main_loop: while let Some(cur_node) = node_stack.pop_front() {
+        // Traverse the graph backward from the target node, recomputing the cost for each of its predecessors.
+        // We first check that no other node is contributing the same total cost to a predecessor before removing
+        // the cost from the predecessor.
+        self.traverse_breadth_first_backward(node_with_removed_outgoing_conditions, |cur_node| {
            let mut costs_to_remove = FxHashSet::default();
-            for c in costs.get(cur_node) {
-                costs_to_remove.insert(*c);
-            }
+            costs_to_remove.extend(costs.get(cur_node).iter().copied());

            let cur_node_edges = &self.edges_of_node.get(cur_node);
            for edge_idx in cur_node_edges.iter() {
@ -265,22 +248,75 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
                for cost in costs.get(edge.dest_node).iter() {
                    costs_to_remove.remove(&(*cost + edge.cost as u64));
                    if costs_to_remove.is_empty() {
-                        continue 'main_loop;
+                        return;
                    }
                }
            }
            if costs_to_remove.is_empty() {
-                continue 'main_loop;
+                return;
            }
            let mut new_costs = BTreeSet::from_iter(costs.get(cur_node).iter().copied());
            for c in costs_to_remove {
                new_costs.remove(&c);
            }
            *costs.get_mut(cur_node) = new_costs.into_iter().collect();
+        });
+    }

+    /// Traverse the graph backwards from the given node such that every time
+    /// a node is visited, we are guaranteed that all its successors either:
+    /// 1. have already been visited; OR
+    /// 2. were not reachable from the given node
+    pub fn traverse_breadth_first_backward(
+        &self,
+        from: Interned<QueryNode>,
+        mut visit: impl FnMut(Interned<QueryNode>),
+    ) {
+        let mut reachable = SmallBitmap::for_interned_values_in(&self.query_graph.nodes);
+        {
+            // go backward to get the set of all reachable nodes from the given node
+            // the nodes that are not reachable will be set as `visited`
+            let mut stack = VecDeque::new();
+            let mut enqueued = SmallBitmap::for_interned_values_in(&self.query_graph.nodes);
+            enqueued.insert(from);
+            stack.push_back(from);
+            while let Some(n) = stack.pop_front() {
+                if reachable.contains(n) {
+                    continue;
+                }
+                reachable.insert(n);
+                for prev_node in self.query_graph.nodes.get(n).predecessors.iter() {
+                    if !enqueued.contains(prev_node) && !reachable.contains(prev_node) {
+                        stack.push_back(prev_node);
+                        enqueued.insert(prev_node);
+                    }
+                }
+            }
+        };
+        let mut unreachable_or_visited =
+            SmallBitmap::for_interned_values_in(&self.query_graph.nodes);
+        for (n, _) in self.query_graph.nodes.iter() {
+            if !reachable.contains(n) {
+                unreachable_or_visited.insert(n);
+            }
+        }
+
+        let mut enqueued = SmallBitmap::for_interned_values_in(&self.query_graph.nodes);
+        let mut stack = VecDeque::new();
+
+        enqueued.insert(from);
+        stack.push_back(from);
+
+        while let Some(cur_node) = stack.pop_front() {
+            if !self.query_graph.nodes.get(cur_node).successors.is_subset(&unreachable_or_visited) {
+                stack.push_back(cur_node);
+                continue;
+            }
+            unreachable_or_visited.insert(cur_node);
+            visit(cur_node);
            for prev_node in self.query_graph.nodes.get(cur_node).predecessors.iter() {
-                if !enqueued.contains(prev_node) {
-                    node_stack.push_back(prev_node);
+                if !enqueued.contains(prev_node) && !unreachable_or_visited.contains(prev_node) {
+                    stack.push_back(prev_node);
                    enqueued.insert(prev_node);
                }
            }
--- a/milli/src/search/new/ranking_rule_graph/mod.rs
+++ b/milli/src/search/new/ranking_rule_graph/mod.rs
@ -20,6 +20,8 @@ mod position;
 mod proximity;
 /// Implementation of the `typo` ranking rule
 mod typo;
+/// Implementation of the `words` ranking rule
+mod words;

 use std::collections::BTreeSet;
 use std::hash::Hash;
@ -33,6 +35,7 @@ pub use position::{PositionCondition, PositionGraph};
 pub use proximity::{ProximityCondition, ProximityGraph};
 use roaring::RoaringBitmap;
 pub use typo::{TypoCondition, TypoGraph};
+pub use words::{WordsCondition, WordsGraph};

 use super::interner::{DedupInterner, FixedSizeInterner, Interned, MappedInterner};
 use super::query_term::LocatedQueryTermSubset;
--- a/milli/src/search/new/ranking_rule_graph/position/mod.rs
+++ b/milli/src/search/new/ranking_rule_graph/position/mod.rs
@ -111,23 +111,16 @@ impl RankingRuleGraphTrait for PositionGraph {

 fn cost_from_position(sum_positions: u32) -> u32 {
    match sum_positions {
-        0 | 1 | 2 | 3 => sum_positions,
-        4 | 5 => 4,
-        6 | 7 => 5,
-        8 | 9 => 6,
-        10 | 11 => 7,
-        12 | 13 => 8,
-        14 | 15 => 9,
-        16 | 17..=24 => 10,
-        25..=32 => 11,
-        33..=64 => 12,
-        65..=128 => 13,
-        129..=256 => 14,
-        257..=512 => 15,
-        513..=1024 => 16,
-        1025..=2048 => 17,
-        2049..=4096 => 18,
-        4097..=8192 => 19,
-        _ => 20,
+        0 => 0,
+        1 => 1,
+        2..=4 => 2,
+        5..=7 => 3,
+        8..=11 => 4,
+        12..=16 => 5,
+        17..=24 => 6,
+        25..=64 => 7,
+        65..=256 => 8,
+        257..=1024 => 9,
+        _ => 10,
    }
 }
--- a/milli/src/search/new/ranking_rule_graph/typo/mod.rs
+++ b/milli/src/search/new/ranking_rule_graph/typo/mod.rs
@ -50,7 +50,7 @@ impl RankingRuleGraphTrait for TypoGraph {
        // 3-gram -> equivalent to 2 typos
        let base_cost = if term.term_ids.len() == 1 { 0 } else { term.term_ids.len() as u32 };

-        for nbr_typos in 0..=term.term_subset.max_nbr_typos(ctx) {
+        for nbr_typos in 0..=term.term_subset.max_typo_cost(ctx) {
            let mut term = term.clone();
            match nbr_typos {
                0 => {
--- a/milli/src/search/new/ranking_rule_graph/words/mod.rs
+++ b/milli/src/search/new/ranking_rule_graph/words/mod.rs
@ -0,0 +1,49 @@
+use roaring::RoaringBitmap;
+
+use super::{ComputedCondition, RankingRuleGraphTrait};
+use crate::search::new::interner::{DedupInterner, Interned};
+use crate::search::new::query_term::LocatedQueryTermSubset;
+use crate::search::new::resolve_query_graph::compute_query_term_subset_docids;
+use crate::search::new::SearchContext;
+use crate::Result;
+
+#[derive(Clone, PartialEq, Eq, Hash)]
+pub struct WordsCondition {
+    term: LocatedQueryTermSubset,
+}
+
+pub enum WordsGraph {}
+
+impl RankingRuleGraphTrait for WordsGraph {
+    type Condition = WordsCondition;
+
+    fn resolve_condition(
+        ctx: &mut SearchContext,
+        condition: &Self::Condition,
+        universe: &RoaringBitmap,
+    ) -> Result<ComputedCondition> {
+        let WordsCondition { term, .. } = condition;
+        // maybe compute_query_term_subset_docids should accept a universe as argument
+        let mut docids = compute_query_term_subset_docids(ctx, &term.term_subset)?;
+        docids &= universe;
+
+        Ok(ComputedCondition {
+            docids,
+            universe_len: universe.len(),
+            start_term_subset: None,
+            end_term_subset: term.clone(),
+        })
+    }
+
+    fn build_edges(
+        _ctx: &mut SearchContext,
+        conditions_interner: &mut DedupInterner<Self::Condition>,
+        _from: Option<&LocatedQueryTermSubset>,
+        to_term: &LocatedQueryTermSubset,
+    ) -> Result<Vec<(u32, Interned<Self::Condition>)>> {
+        Ok(vec![(
+            to_term.term_ids.len() as u32,
+            conditions_interner.insert(WordsCondition { term: to_term.clone() }),
+        )])
+    }
+}
--- a/milli/src/search/new/tests/attribute_position.rs
+++ b/milli/src/search/new/tests/attribute_position.rs
@ -138,7 +138,7 @@ fn test_attribute_position_simple() {
    s.terms_matching_strategy(TermsMatchingStrategy::All);
    s.query("quick brown");
    let SearchResult { documents_ids, .. } = s.execute().unwrap();
-    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 11, 12, 13, 3, 4, 2, 1, 0, 6, 8, 7, 9, 5]");
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 11, 12, 13, 2, 3, 4, 1, 0, 6, 8, 7, 9, 5]");
 }
 #[test]
 fn test_attribute_position_repeated() {
@ -163,7 +163,7 @@ fn test_attribute_position_different_fields() {
    s.terms_matching_strategy(TermsMatchingStrategy::All);
    s.query("quick brown");
    let SearchResult { documents_ids, .. } = s.execute().unwrap();
-    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 11, 12, 13, 3, 4, 2, 1, 0, 6, 8, 7, 9, 5]");
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 11, 12, 13, 2, 3, 4, 1, 0, 6, 8, 7, 9, 5]");
 }

 #[test]
@ -176,5 +176,5 @@ fn test_attribute_position_ngrams() {
    s.terms_matching_strategy(TermsMatchingStrategy::All);
    s.query("quick brown");
    let SearchResult { documents_ids, .. } = s.execute().unwrap();
-    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 11, 12, 13, 3, 4, 2, 1, 0, 6, 8, 7, 9, 5]");
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 11, 12, 13, 2, 3, 4, 1, 0, 6, 8, 7, 9, 5]");
 }
--- a/milli/src/search/new/tests/mod.rs
+++ b/milli/src/search/new/tests/mod.rs
@ -4,7 +4,7 @@ pub mod distinct;
 pub mod exactness;
 pub mod geo_sort;
 pub mod integration;
-#[cfg(feature = "default")]
+#[cfg(feature = "all-tokenizations")]
 pub mod language;
 pub mod ngram_split_words;
 pub mod proximity;
--- a/milli/src/search/new/tests/ngram_split_words.rs
+++ b/milli/src/search/new/tests/ngram_split_words.rs
@ -3,9 +3,9 @@ This module tests the following properties:

 1. Two consecutive words from a query can be combined into a "2gram"
 2. Three consecutive words from a query can be combined into a "3gram"
-3. A word from the query can be split into two consecutive words (split words)
+3. A word from the query can be split into two consecutive words (split words), no matter how short it is
 4. A 2gram can be split into two words
-5. A 3gram cannot be split into two words
+5. A 3gram can be split into two words
 6. 2grams can contain up to 1 typo
 7. 3grams cannot have typos
 8. 2grams and 3grams can be prefix tolerant
@ -14,6 +14,7 @@ This module tests the following properties:
 11. Disabling typo tolerance does not disable ngram tolerance
 12. Prefix tolerance is disabled for the last word if a space follows it
 13. Ngrams cannot be formed by combining a phrase and a word or two phrases
+14. Split words are not disabled by the `disableOnAttribute` or `disableOnWords` typo settings
 */

 use crate::index::tests::TempIndex;
@ -56,6 +57,10 @@ fn create_index() -> TempIndex {
            {
                "id": 5,
                "text": "sunflowering is not a verb"
+            },
+            {
+                "id": 6,
+                "text": "xy z"
            }
        ]))
        .unwrap();
@ -263,10 +268,11 @@ fn test_disable_split_words() {
    s.query("sunflower ");
    let SearchResult { documents_ids, .. } = s.execute().unwrap();
    // no document containing `sun flower`
-    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[3]");
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[1, 3]");
    let texts = collect_field_values(&index, &txn, "text", &documents_ids);
    insta::assert_debug_snapshot!(texts, @r###"
    [
+        "\"the sun flower is tall\"",
        "\"the sunflower is tall\"",
    ]
    "###);
@ -307,10 +313,11 @@ fn test_3gram_no_split_words() {
    let SearchResult { documents_ids, .. } = s.execute().unwrap();

    // no document with `sun flower`
-    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 3, 5]");
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[1, 2, 3, 5]");
    let texts = collect_field_values(&index, &txn, "text", &documents_ids);
    insta::assert_debug_snapshot!(texts, @r###"
    [
+        "\"the sun flower is tall\"",
        "\"the sunflowers are pretty\"",
        "\"the sunflower is tall\"",
        "\"sunflowering is not a verb\"",
@ -369,3 +376,50 @@ fn test_no_ngram_phrases() {
    ]
    "###);
 }
+
+#[test]
+fn test_short_split_words() {
+    let index = create_index();
+    let txn = index.read_txn().unwrap();
+
+    let mut s = Search::new(&txn, &index);
+    s.terms_matching_strategy(TermsMatchingStrategy::All);
+    s.query("xyz");
+    let SearchResult { documents_ids, .. } = s.execute().unwrap();
+
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[6]");
+    let texts = collect_field_values(&index, &txn, "text", &documents_ids);
+    insta::assert_debug_snapshot!(texts, @r###"
+    [
+        "\"xy z\"",
+    ]
+    "###);
+}
+
+#[test]
+fn test_split_words_never_disabled() {
+    let index = create_index();
+
+    index
+        .update_settings(|s| {
+            s.set_exact_words(["sunflower"].iter().map(ToString::to_string).collect());
+            s.set_exact_attributes(["text"].iter().map(ToString::to_string).collect());
+        })
+        .unwrap();
+
+    let txn = index.read_txn().unwrap();
+
+    let mut s = Search::new(&txn, &index);
+    s.terms_matching_strategy(TermsMatchingStrategy::All);
+    s.query("the sunflower is tall");
+    let SearchResult { documents_ids, .. } = s.execute().unwrap();
+
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[1, 3]");
+    let texts = collect_field_values(&index, &txn, "text", &documents_ids);
+    insta::assert_debug_snapshot!(texts, @r###"
+    [
+        "\"the sun flower is tall\"",
+        "\"the sunflower is tall\"",
+    ]
+    "###);
+}
--- a/milli/src/search/new/tests/typo.rs
+++ b/milli/src/search/new/tests/typo.rs
@ -9,7 +9,7 @@ This module tests the following properties:
 6. A typo on the first letter of a word counts as two typos
 7. Phrases are not typo tolerant
 8. 2grams can have 1 typo if they are larger than `min_word_len_two_typos`
-9. 3grams are not typo tolerant
+9. 3grams are not typo tolerant (but they can be split into two words)
 10. The `typo` ranking rule assumes the role of the `words` ranking rule implicitly
 if `words` doesn't exist before it.
 11. The `typo` ranking rule places documents with the same number of typos in the same bucket
@ -287,16 +287,17 @@ fn test_typo_exact_word() {
    ]
    "###);

-    // exact words do not disable prefix (sunflowering OK, but no sunflowar or sun flower)
+    // exact words do not disable prefix (sunflowering OK, but no sunflowar)
    let mut s = Search::new(&txn, &index);
    s.terms_matching_strategy(TermsMatchingStrategy::All);
    s.query("network interconnection sunflower");
    let SearchResult { documents_ids, .. } = s.execute().unwrap();
-    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[16, 18]");
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[16, 17, 18]");
    let texts = collect_field_values(&index, &txn, "text", &documents_ids);
    insta::assert_debug_snapshot!(texts, @r###"
    [
        "\"network interconnection sunflower\"",
+        "\"network interconnection sun flower\"",
        "\"network interconnection sunflowering\"",
    ]
    "###);
--- a/milli/src/search/new/words.rs
+++ b/milli/src/search/new/words.rs
@ -1,87 +0,0 @@
-use roaring::RoaringBitmap;
-
-use super::logger::SearchLogger;
-use super::query_graph::QueryNode;
-use super::resolve_query_graph::compute_query_graph_docids;
-use super::small_bitmap::SmallBitmap;
-use super::{QueryGraph, RankingRule, RankingRuleOutput, SearchContext};
-use crate::{Result, TermsMatchingStrategy};
-
-pub struct Words {
-    exhausted: bool, // TODO: remove
-    query_graph: Option<QueryGraph>,
-    nodes_to_remove: Vec<SmallBitmap<QueryNode>>,
-    terms_matching_strategy: TermsMatchingStrategy,
-}
-impl Words {
-    pub fn new(terms_matching_strategy: TermsMatchingStrategy) -> Self {
-        Self {
-            exhausted: true,
-            query_graph: None,
-            nodes_to_remove: vec![],
-            terms_matching_strategy,
-        }
-    }
-}
-
-impl<'ctx> RankingRule<'ctx, QueryGraph> for Words {
-    fn id(&self) -> String {
-        "words".to_owned()
-    }
-    fn start_iteration(
-        &mut self,
-        ctx: &mut SearchContext<'ctx>,
-        _logger: &mut dyn SearchLogger<QueryGraph>,
-        _universe: &RoaringBitmap,
-        parent_query_graph: &QueryGraph,
-    ) -> Result<()> {
-        self.exhausted = false;
-        self.query_graph = Some(parent_query_graph.clone());
-        self.nodes_to_remove = match self.terms_matching_strategy {
-            TermsMatchingStrategy::Last => {
-                let mut ns = parent_query_graph.removal_order_for_terms_matching_strategy_last(ctx);
-                ns.reverse();
-                ns
-            }
-            TermsMatchingStrategy::All => {
-                vec![]
-            }
-        };
-        Ok(())
-    }
-
-    fn next_bucket(
-        &mut self,
-        ctx: &mut SearchContext<'ctx>,
-        logger: &mut dyn SearchLogger<QueryGraph>,
-        universe: &RoaringBitmap,
-    ) -> Result<Option<RankingRuleOutput<QueryGraph>>> {
-        if self.exhausted {
-            return Ok(None);
-        }
-        let Some(query_graph) = &mut self.query_graph else { panic!() };
-        logger.log_internal_state(query_graph);
-
-        let this_bucket = compute_query_graph_docids(ctx, query_graph, universe)?;
-
-        let child_query_graph = query_graph.clone();
-
-        if self.nodes_to_remove.is_empty() {
-            self.exhausted = true;
-        } else {
-            let nodes_to_remove = self.nodes_to_remove.pop().unwrap();
-            query_graph.remove_nodes_keep_edges(&nodes_to_remove.iter().collect::<Vec<_>>());
-        }
-        Ok(Some(RankingRuleOutput { query: child_query_graph, candidates: this_bucket }))
-    }
-
-    fn end_iteration(
-        &mut self,
-        _ctx: &mut SearchContext<'ctx>,
-        _logger: &mut dyn SearchLogger<QueryGraph>,
-    ) {
-        self.exhausted = true;
-        self.nodes_to_remove = vec![];
-        self.query_graph = None;
-    }
-}
--- a/milli/src/update/facet/mod.rs
+++ b/milli/src/update/facet/mod.rs
@ -261,7 +261,9 @@ pub(crate) mod test_helpers {
            let options = options.map_size(4096 * 4 * 1000 * 100);
            let tempdir = tempfile::TempDir::new().unwrap();
            let env = options.open(tempdir.path()).unwrap();
-            let content = env.create_database(None).unwrap();
+            let mut wtxn = env.write_txn().unwrap();
+            let content = env.create_database(&mut wtxn, None).unwrap();
+            wtxn.commit().unwrap();

            FacetIndex {
                content,
--- a/milli/src/update/index_documents/mod.rs
+++ b/milli/src/update/index_documents/mod.rs
@ -1581,7 +1581,7 @@ mod tests {
        assert_eq!(count, 4);
    }

-    #[cfg(feature = "default")]
+    #[cfg(feature = "chinese")]
    #[test]
    fn test_meilisearch_1714() {
        let index = TempIndex::new();