Fix the legend

Create a small tool to measure the size of inernal databases
Merge #3842
2025-07-18 20:30:47 +00:00 · 2023-06-24 14:53:32 +02:00 · 2023-06-23 22:57:57 +02:00 · 2023-06-22 18:01:10 +00:00 · 2023-06-22 21:59:00 +08:00 · 2023-06-20 13:35:33 +00:00
133 changed files with 5574 additions and 2344 deletions
--- a/.dockerignore
+++ b/.dockerignore
@ -2,4 +2,3 @@ target
 Dockerfile
 .dockerignore
 .gitignore
-**/.git
--- a/.github/scripts/check-release.sh
+++ b/.github/scripts/check-release.sh
@ -1,24 +1,41 @@
-#!/bin/bash
+#!/usr/bin/env bash
+set -eu -o pipefail

-# check_tag $current_tag $file_tag $file_name
-function check_tag {
-  if [[ "$1" != "$2" ]]; then
-      echo "Error: the current tag does not match the version in Cargo.toml: found $2 - expected $1"
-      ret=1
-  fi
+check_tag() {
+    local expected=$1
+    local actual=$2
+    local filename=$3
+
+    if [[ $actual != $expected ]]; then
+        echo >&2 "Error: the current tag does not match the version in $filename: found $actual, expected $expected"
+        return 1
+    fi
 }

+read_version() {
+    grep '^version = ' | cut -d \" -f 2
+}
+
+if [[ -z "${GITHUB_REF:-}" ]]; then
+    echo >&2 "Error: GITHUB_REF is not set"
+    exit 1
+fi
+
+if [[ ! "$GITHUB_REF" =~ ^refs/tags/v[0-9]+\.[0-9]+\.[0-9]+(-[a-z0-9]+)?$ ]]; then
+    echo >&2 "Error: GITHUB_REF is not a valid tag: $GITHUB_REF"
+    exit 1
+fi
+
+current_tag=${GITHUB_REF#refs/tags/v}
 ret=0
-current_tag=${GITHUB_REF#'refs/tags/v'}

-file_tag="$(grep '^version = ' Cargo.toml | cut -d '=' -f 2 | tr -d '"' | tr -d ' ')"
-check_tag $current_tag $file_tag
+toml_tag="$(cat Cargo.toml | read_version)"
+check_tag "$current_tag" "$toml_tag" Cargo.toml || ret=1

-lock_file='Cargo.lock'
-lock_tag=$(grep -A 1 'name = "meilisearch-auth"' $lock_file | grep version | cut -d '=' -f 2 | tr -d '"' | tr -d ' ')
-check_tag $current_tag $lock_tag $lock_file
+lock_tag=$(grep -A 1 '^name = "meilisearch-auth"' Cargo.lock | read_version)
+check_tag "$current_tag" "$lock_tag" Cargo.lock || ret=1

-if [[ "$ret" -eq 0 ]] ; then
-  echo 'OK'
+if (( ret == 0 )); then
+    echo 'OK'
 fi
 exit $ret
--- a/.github/workflows/fuzzer-indexing.yml
+++ b/.github/workflows/fuzzer-indexing.yml
@ -0,0 +1,24 @@
+name: Run the indexing fuzzer
+
+on:
+  push:
+    branches:
+      - main
+
+jobs:
+  fuzz:
+    name: Setup the action
+    runs-on: ubuntu-latest
+    timeout-minutes: 4320 # 72h
+    steps:
+      - uses: actions/checkout@v3
+      - uses: actions-rs/toolchain@v1
+        with:
+          profile: minimal
+          toolchain: stable
+          override: true
+
+      # Run benchmarks
+      - name: Run the fuzzer
+        run: |
+          cargo run --release --bin fuzz-indexing
--- a/.github/workflows/publish-apt-brew-pkg.yml
+++ b/.github/workflows/publish-apt-brew-pkg.yml
@ -35,7 +35,7 @@ jobs:
    - name: Build deb package
      run: cargo deb -p meilisearch -o target/debian/meilisearch.deb
    - name: Upload debian pkg to release
-      uses: svenstaro/upload-release-action@2.5.0
+      uses: svenstaro/upload-release-action@2.6.1
      with:
        repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
        file: target/debian/meilisearch.deb
--- a/.github/workflows/publish-binaries.yml
+++ b/.github/workflows/publish-binaries.yml
@ -54,7 +54,7 @@ jobs:
    # No need to upload binaries for dry run (cron)
    - name: Upload binaries to release
      if: github.event_name == 'release'
-      uses: svenstaro/upload-release-action@2.5.0
+      uses: svenstaro/upload-release-action@2.6.1
      with:
        repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
        file: target/release/meilisearch
@ -87,7 +87,7 @@ jobs:
    # No need to upload binaries for dry run (cron)
    - name: Upload binaries to release
      if: github.event_name == 'release'
-      uses: svenstaro/upload-release-action@2.5.0
+      uses: svenstaro/upload-release-action@2.6.1
      with:
        repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
        file: target/release/${{ matrix.artifact_name }}
@ -121,7 +121,7 @@ jobs:
      - name: Upload the binary to release
        # No need to upload binaries for dry run (cron)
        if: github.event_name == 'release'
-        uses: svenstaro/upload-release-action@2.5.0
+        uses: svenstaro/upload-release-action@2.6.1
        with:
          repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
          file: target/${{ matrix.target }}/release/meilisearch
@ -183,7 +183,7 @@ jobs:
      - name: Upload the binary to release
        # No need to upload binaries for dry run (cron)
        if: github.event_name == 'release'
-        uses: svenstaro/upload-release-action@2.5.0
+        uses: svenstaro/upload-release-action@2.6.1
        with:
          repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
          file: target/${{ matrix.target }}/release/meilisearch
--- a/.github/workflows/publish-docker-images.yml
+++ b/.github/workflows/publish-docker-images.yml
@ -58,13 +58,9 @@ jobs:

      - name: Set up QEMU
        uses: docker/setup-qemu-action@v2
-        with:
-          platforms: linux/amd64,linux/arm64

      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v2
-        with:
-          platforms: linux/amd64,linux/arm64

      - name: Login to Docker Hub
        uses: docker/login-action@v2
@ -92,13 +88,10 @@ jobs:
          push: true
          platforms: linux/amd64,linux/arm64
          tags: ${{ steps.meta.outputs.tags }}
-          builder: ${{ steps.buildx.outputs.name }}
          build-args: |
            COMMIT_SHA=${{ github.sha }}
            COMMIT_DATE=${{ steps.build-metadata.outputs.date }}
            GIT_TAG=${{ github.ref_name }}
-          cache-from: type=gha
-          cache-to: type=gha,mode=max

      # /!\ Don't touch this without checking with Cloud team
      - name: Send CI information to Cloud team
--- a/.github/workflows/sdks-tests.yml
+++ b/.github/workflows/sdks-tests.yml
@ -3,6 +3,11 @@ name: SDKs tests

 on:
  workflow_dispatch:
+    inputs:
+      docker_image:
+        description: 'The Meilisearch Docker image used'
+        required: false
+        default: nightly
  schedule:
    - cron: "0 6 * * MON" # Every Monday at 6:00AM

@ -11,13 +16,28 @@ env:
  MEILI_NO_ANALYTICS: 'true'

 jobs:
+  define-docker-image:
+    runs-on: ubuntu-latest
+    outputs:
+      docker-image: ${{ steps.define-image.outputs.docker-image }}
+    steps:
+      - uses: actions/checkout@v3
+      - name: Define the Docker image we need to use
+        id: define-image
+        run: |
+          event=${{ github.event_name }}
+          echo "docker-image=nightly" >> $GITHUB_OUTPUT
+          if [[ $event == 'workflow_dispatch' ]]; then
+            echo "docker-image=${{ github.event.inputs.docker_image }}" >> $GITHUB_OUTPUT
+          fi

  meilisearch-js-tests:
+    needs: define-docker-image
    name: JS SDK tests
    runs-on: ubuntu-latest
    services:
      meilisearch:
-        image: getmeili/meilisearch:nightly
+        image: getmeili/meilisearch:${{ needs.define-docker-image.outputs.docker-image }}
        env:
          MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
          MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
@ -47,11 +67,12 @@ jobs:
        run: yarn test:env:browser

  instant-meilisearch-tests:
+    needs: define-docker-image
    name: instant-meilisearch tests
    runs-on: ubuntu-latest
    services:
      meilisearch:
-        image: getmeili/meilisearch:nightly
+        image: getmeili/meilisearch:${{ needs.define-docker-image.outputs.docker-image }}
        env:
          MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
          MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
@ -73,11 +94,12 @@ jobs:
        run: yarn build

  meilisearch-php-tests:
+    needs: define-docker-image
    name: PHP SDK tests
    runs-on: ubuntu-latest
    services:
      meilisearch:
-        image: getmeili/meilisearch:nightly
+        image: getmeili/meilisearch:${{ needs.define-docker-image.outputs.docker-image }}
        env:
          MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
          MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
@ -103,11 +125,12 @@ jobs:
          composer remove --dev guzzlehttp/guzzle http-interop/http-factory-guzzle

  meilisearch-python-tests:
+    needs: define-docker-image
    name: Python SDK tests
    runs-on: ubuntu-latest
    services:
      meilisearch:
-        image: getmeili/meilisearch:nightly
+        image: getmeili/meilisearch:${{ needs.define-docker-image.outputs.docker-image }}
        env:
          MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
          MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
@ -127,11 +150,12 @@ jobs:
        run: pipenv run pytest

  meilisearch-go-tests:
+    needs: define-docker-image
    name: Go SDK tests
    runs-on: ubuntu-latest
    services:
      meilisearch:
-        image: getmeili/meilisearch:nightly
+        image: getmeili/meilisearch:${{ needs.define-docker-image.outputs.docker-image }}
        env:
          MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
          MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
@ -139,7 +163,7 @@ jobs:
          - '7700:7700'
    steps:
      - name: Set up Go
-        uses: actions/setup-go@v3
+        uses: actions/setup-go@v4
        with:
          go-version: stable
      - uses: actions/checkout@v3
@ -156,11 +180,12 @@ jobs:
        run: go test -v ./...

  meilisearch-ruby-tests:
+    needs: define-docker-image
    name: Ruby SDK tests
    runs-on: ubuntu-latest
    services:
      meilisearch:
-        image: getmeili/meilisearch:nightly
+        image: getmeili/meilisearch:${{ needs.define-docker-image.outputs.docker-image }}
        env:
          MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
          MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
@ -180,11 +205,12 @@ jobs:
        run: bundle exec rspec

  meilisearch-rust-tests:
+    needs: define-docker-image
    name: Rust SDK tests
    runs-on: ubuntu-latest
    services:
      meilisearch:
-        image: getmeili/meilisearch:nightly
+        image: getmeili/meilisearch:${{ needs.define-docker-image.outputs.docker-image }}
        env:
          MEILI_MASTER_KEY: ${{ env.MEILI_MASTER_KEY }}
          MEILI_NO_ANALYTICS: ${{ env.MEILI_NO_ANALYTICS }}
--- a/.github/workflows/test-suite.yml
+++ b/.github/workflows/test-suite.yml
@ -43,7 +43,7 @@ jobs:
          toolchain: nightly
          override: true
      - name: Cache dependencies
-        uses: Swatinem/rust-cache@v2.2.1
+        uses: Swatinem/rust-cache@v2.4.0
      - name: Run cargo check without any default features
        uses: actions-rs/cargo@v1
        with:
@ -65,7 +65,7 @@ jobs:
    steps:
      - uses: actions/checkout@v3
      - name: Cache dependencies
-        uses: Swatinem/rust-cache@v2.2.1
+        uses: Swatinem/rust-cache@v2.4.0
      - name: Run cargo check without any default features
        uses: actions-rs/cargo@v1
        with:
@ -105,6 +105,29 @@ jobs:
          command: test
          args: --workspace --locked --release --all-features

+  test-disabled-tokenization:
+    name: Test disabled tokenization
+    runs-on: ubuntu-latest
+    container:
+      image: ubuntu:18.04
+    if: github.event_name == 'schedule'
+    steps:
+      - uses: actions/checkout@v3
+      - name: Install needed dependencies
+        run: |
+          apt-get update
+          apt-get install --assume-yes build-essential curl
+      - uses: actions-rs/toolchain@v1 
+        with: 
+          toolchain: stable 
+          override: true 
+      - name: Run cargo tree without default features and check lindera is not present
+        run: |
+          cargo tree -f '{p} {f}' -e normal --no-default-features | grep lindera -vqz
+      - name: Run cargo tree with default features and check lindera is pressent
+        run: |
+          cargo tree -f '{p} {f}' -e normal | grep lindera -qz
+                
  # We run tests in debug also, to make sure that the debug_assertions are hit
  test-debug:
    name: Run tests in debug
@ -123,7 +146,7 @@ jobs:
          toolchain: stable
          override: true
      - name: Cache dependencies
-        uses: Swatinem/rust-cache@v2.2.1
+        uses: Swatinem/rust-cache@v2.4.0
      - name: Run tests in debug
        uses: actions-rs/cargo@v1
        with:
@ -142,7 +165,7 @@ jobs:
          override: true
          components: clippy
      - name: Cache dependencies
-        uses: Swatinem/rust-cache@v2.2.1
+        uses: Swatinem/rust-cache@v2.4.0
      - name: Run cargo clippy
        uses: actions-rs/cargo@v1
        with:
@ -161,7 +184,7 @@ jobs:
          override: true
          components: rustfmt
      - name: Cache dependencies
-        uses: Swatinem/rust-cache@v2.2.1
+        uses: Swatinem/rust-cache@v2.4.0
      - name: Run cargo fmt
        # Since we never ran the `build.rs` script in the benchmark directory we are missing one auto-generated import file.
        # Since we want to trigger (and fail) this action as fast as possible, instead of building the benchmark crate
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@ -19,8 +19,8 @@ If Meilisearch does not offer optimized support for your language, please consid
 ## Assumptions

 1. **You're familiar with [GitHub](https://github.com) and the [Pull Requests (PR)](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests/about-pull-requests) workflow.**
-2. **You've read the Meilisearch [documentation](https://docs.meilisearch.com).**
-3. **You know about the [Meilisearch community](https://docs.meilisearch.com/learn/what_is_meilisearch/contact.html).
+2. **You've read the Meilisearch [documentation](https://www.meilisearch.com/docs).**
+3. **You know about the [Meilisearch community on Discord](https://discord.meilisearch.com).
   Please use this for help.**

 ## How to Contribute
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@ -10,14 +10,16 @@ members = [
    "file-store",
    "permissive-json-pointer",
    "milli",
+    "index-stats",
    "filter-parser",
    "flatten-serde-json",
    "json-depth-checker",
-    "benchmarks"
+    "benchmarks",
+    "fuzzers",
 ]

 [workspace.package]
-version = "1.1.1"
+version = "1.2.0"
 authors = ["Quentin de Quelen <quentin@dequelen.me>", "Clément Renault <clement@meilisearch.com>"]
 description = "Meilisearch HTTP server"
 homepage = "https://meilisearch.com"
--- a/5
+++ b/5
@ -1,4 +1,3 @@
-# syntax=docker/dockerfile:1.4
 # Compile
 FROM    rust:alpine3.16 AS compiler

@ -12,7 +11,7 @@ ARG     GIT_TAG
 ENV     VERGEN_GIT_SHA=${COMMIT_SHA} VERGEN_GIT_COMMIT_TIMESTAMP=${COMMIT_DATE} VERGEN_GIT_SEMVER_LIGHTWEIGHT=${GIT_TAG}
 ENV     RUSTFLAGS="-C target-feature=-crt-static"

-COPY    --link . .
+COPY    . .
 RUN     set -eux; \
        apkArch="$(apk --print-arch)"; \
        if [ "$apkArch" = "aarch64" ]; then \
@ -31,7 +30,7 @@ RUN     apk update --quiet \

 # add meilisearch to the `/bin` so you can run it from anywhere and it's easy
 # to find.
-COPY    --from=compiler --link /meilisearch/target/release/meilisearch /bin/meilisearch
+COPY    --from=compiler /meilisearch/target/release/meilisearch /bin/meilisearch
 # To stay compatible with the older version of the container (pre v0.27.0) we're
 # going to symlink the meilisearch binary in the path to `/meilisearch`
 RUN     ln -s /bin/meilisearch /meilisearch
--- a/README.md
+++ b/README.md
@ -7,8 +7,8 @@
  <a href="https://www.meilisearch.com">Website</a> |
  <a href="https://roadmap.meilisearch.com/tabs/1-under-consideration">Roadmap</a> |
  <a href="https://blog.meilisearch.com">Blog</a> |
-  <a href="https://meilisearch.com/docs">Documentation</a> |
-  <a href="https://meilisearch.com/docs/faq">FAQ</a> |
+  <a href="https://www.meilisearch.com/docs">Documentation</a> |
+  <a href="https://www.meilisearch.com/docs/faq">FAQ</a> |
  <a href="https://discord.meilisearch.com">Discord</a>
 </h4>

@ -36,27 +36,27 @@ Meilisearch helps you shape a delightful search experience in a snap, offering f
 ## ✨ Features

 - **Search-as-you-type:** find search results in less than 50 milliseconds
- **[Typo tolerance](https://meilisearch.com/docs/learn/getting_started/customizing_relevancy#typo-tolerance):** get relevant matches even when queries contain typos and misspellings
- **[Filtering](https://meilisearch.com/docs/learn/advanced/filtering) and [faceted search](https://meilisearch.com/docs/learn/advanced/faceted_search):** enhance your user's search experience with custom filters and build a faceted search interface in a few lines of code
- **[Sorting](https://meilisearch.com/docs/learn/advanced/sorting):** sort results based on price, date, or pretty much anything else your users need
- **[Synonym support](https://meilisearch.com/docs/learn/getting_started/customizing_relevancy#synonyms):** configure synonyms to include more relevant content in your search results
- **[Geosearch](https://meilisearch.com/docs/learn/advanced/geosearch):** filter and sort documents based on geographic data
- **[Extensive language support](https://meilisearch.com/docs/learn/what_is_meilisearch/language):** search datasets in any language, with optimized support for Chinese, Japanese, Hebrew, and languages using the Latin alphabet
- **[Security management](https://meilisearch.com/docs/learn/security/master_api_keys):** control which users can access what data with API keys that allow fine-grained permissions handling
- **[Multi-Tenancy](https://meilisearch.com/docs/learn/security/tenant_tokens):** personalize search results for any number of application tenants
+- **[Typo tolerance](https://www.meilisearch.com/docs/learn/getting_started/customizing_relevancy#typo-tolerance):** get relevant matches even when queries contain typos and misspellings
+- **[Filtering](https://www.meilisearch.com/docs/learn/advanced/filtering) and [faceted search](https://www.meilisearch.com/docs/learn/advanced/faceted_search):** enhance your user's search experience with custom filters and build a faceted search interface in a few lines of code
+- **[Sorting](https://www.meilisearch.com/docs/learn/advanced/sorting):** sort results based on price, date, or pretty much anything else your users need
+- **[Synonym support](https://www.meilisearch.com/docs/learn/getting_started/customizing_relevancy#synonyms):** configure synonyms to include more relevant content in your search results
+- **[Geosearch](https://www.meilisearch.com/docs/learn/advanced/geosearch):** filter and sort documents based on geographic data
+- **[Extensive language support](https://www.meilisearch.com/docs/learn/what_is_meilisearch/language):** search datasets in any language, with optimized support for Chinese, Japanese, Hebrew, and languages using the Latin alphabet
+- **[Security management](https://www.meilisearch.com/docs/learn/security/master_api_keys):** control which users can access what data with API keys that allow fine-grained permissions handling
+- **[Multi-Tenancy](https://www.meilisearch.com/docs/learn/security/tenant_tokens):** personalize search results for any number of application tenants
 - **Highly Customizable:** customize Meilisearch to your specific needs or use our out-of-the-box and hassle-free presets
- **[RESTful API](https://meilisearch.com/docs/reference/api/overview):** integrate Meilisearch in your technical stack with our plugins and SDKs
+- **[RESTful API](https://www.meilisearch.com/docs/reference/api/overview):** integrate Meilisearch in your technical stack with our plugins and SDKs
 - **Easy to install, deploy, and maintain**

 ## 📖 Documentation

-You can consult Meilisearch's documentation at [https://meilisearch.com/docs](https://meilisearch.com/docs/).
+You can consult Meilisearch's documentation at [https://www.meilisearch.com/docs](https://www.meilisearch.com/docs/).

 ## 🚀 Getting started

-For basic instructions on how to set up Meilisearch, add documents to an index, and search for documents, take a look at our [Quick Start](https://meilisearch.com/docs/learn/getting_started/quick_start) guide.
+For basic instructions on how to set up Meilisearch, add documents to an index, and search for documents, take a look at our [Quick Start](https://www.meilisearch.com/docs/learn/getting_started/quick_start) guide.

-You may also want to check out [Meilisearch 101](https://meilisearch.com/docs/learn/getting_started/filtering_and_sorting) for an introduction to some of Meilisearch's most popular features.
+You may also want to check out [Meilisearch 101](https://www.meilisearch.com/docs/learn/getting_started/filtering_and_sorting) for an introduction to some of Meilisearch's most popular features.

 ## ☁️ Meilisearch cloud

@ -66,7 +66,7 @@ Let us manage your infrastructure so you can focus on integrating a great search

 Install one of our SDKs in your project for seamless integration between Meilisearch and your favorite language or framework!

-Take a look at the complete [Meilisearch integration list](https://meilisearch.com/docs/learn/what_is_meilisearch/sdks).
+Take a look at the complete [Meilisearch integration list](https://www.meilisearch.com/docs/learn/what_is_meilisearch/sdks).

 [![Logos belonging to different languages and frameworks supported by Meilisearch, including React, Ruby on Rails, Go, Rust, and PHP](assets/integrations.png)](https://www.meilisearch.com/docs/learn/what_is_meilisearch/sdks)

@ -74,17 +74,17 @@ Take a look at the complete [Meilisearch integration list](https://meilisearch.c

 Experienced users will want to keep our [API Reference](https://www.meilisearch.com/docs/reference/api/overview) close at hand.

-We also offer a wide range of dedicated guides to all Meilisearch features, such as [filtering](https://meilisearch.com/docs/learn/advanced/filtering), [sorting](https://meilisearch.com/docs/learn/advanced/sorting), [geosearch](https://meilisearch.com/docs/learn/advanced/geosearch), [API keys](https://meilisearch.com/docs/learn/security/master_api_keys), and [tenant tokens](https://meilisearch.com/docs/learn/security/tenant_tokens).
+We also offer a wide range of dedicated guides to all Meilisearch features, such as [filtering](https://www.meilisearch.com/docs/learn/advanced/filtering), [sorting](https://www.meilisearch.com/docs/learn/advanced/sorting), [geosearch](https://www.meilisearch.com/docs/learn/advanced/geosearch), [API keys](https://www.meilisearch.com/docs/learn/security/master_api_keys), and [tenant tokens](https://www.meilisearch.com/docs/learn/security/tenant_tokens).

-Finally, for more in-depth information, refer to our articles explaining fundamental Meilisearch concepts such as [documents](https://meilisearch.com/docs/learn/core_concepts/documents) and [indexes](https://meilisearch.com/docs/learn/core_concepts/indexes).
+Finally, for more in-depth information, refer to our articles explaining fundamental Meilisearch concepts such as [documents](https://www.meilisearch.com/docs/learn/core_concepts/documents) and [indexes](https://www.meilisearch.com/docs/learn/core_concepts/indexes).

 ## 📊 Telemetry

-Meilisearch collects **anonymized** data from users to help us improve our product. You can [deactivate this](https://meilisearch.com/docs/learn/what_is_meilisearch/telemetry#how-to-disable-data-collection) whenever you want.
+Meilisearch collects **anonymized** data from users to help us improve our product. You can [deactivate this](https://www.meilisearch.com/docs/learn/what_is_meilisearch/telemetry#how-to-disable-data-collection) whenever you want.

 To request deletion of collected data, please write to us at [privacy@meilisearch.com](mailto:privacy@meilisearch.com). Don't forget to include your `Instance UID` in the message, as this helps us quickly find and delete your data.

-If you want to know more about the kind of data we collect and what we use it for, check the [telemetry section](https://meilisearch.com/docs/learn/what_is_meilisearch/telemetry) of our documentation.
+If you want to know more about the kind of data we collect and what we use it for, check the [telemetry section](https://www.meilisearch.com/docs/learn/what_is_meilisearch/telemetry) of our documentation.

 ## 📫 Get in touch!

--- a/assets/grafana-dashboard.json
+++ b/assets/grafana-dashboard.json
--- a/assets/prometheus-basic-scraper.yml
+++ b/assets/prometheus-basic-scraper.yml
@ -0,0 +1,19 @@
+global:
+  scrape_interval:     15s # By default, scrape targets every 15 seconds.
+
+  # Attach these labels to any time series or alerts when communicating with
+  # external systems (federation, remote storage, Alertmanager).
+  external_labels:
+    monitor: 'codelab-monitor'
+
+# A scrape configuration containing exactly one endpoint to scrape:
+# Here it's Prometheus itself.
+scrape_configs:
+  # The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
+  - job_name: 'meilisearch'
+
+    # Override the global default and scrape targets from this job every 5 seconds.
+    scrape_interval: 5s
+
+    static_configs:
+      - targets: ['localhost:7700']
--- a/benchmarks/Cargo.toml
+++ b/benchmarks/Cargo.toml
@ -13,7 +13,7 @@ license.workspace = true
 [dependencies]
 anyhow = "1.0.70"
 csv = "1.2.1"
-milli = { path = "../milli", default-features = false }
+milli = { path = "../milli" }
 mimalloc = { version = "0.1.36", default-features = false }
 serde_json = { version = "1.0.95", features = ["preserve_order"] }

@ -31,7 +31,7 @@ flate2 = "1.0.25"
 reqwest = { version = "0.11.16", features = ["blocking", "rustls-tls"], default-features = false }

 [features]
-default = ["milli/default"]
+default = ["milli/all-tokenizations"]

 [[bench]]
 name = "search_songs"
--- a/benchmarks/README.md
+++ b/benchmarks/README.md
@ -119,9 +119,9 @@ _[Download the `smol-wiki` dataset](https://milli-benchmarks.fra1.digitaloceansp

 ### Movies

-`movies` is a really small dataset we uses as our example in the [getting started](https://docs.meilisearch.com/learn/getting_started/)
+`movies` is a really small dataset we uses as our example in the [getting started](https://www.meilisearch.com/docs/learn/getting_started/quick_start)

-_[Download the `movies` dataset](https://docs.meilisearch.com/movies.json)._
+_[Download the `movies` dataset](https://www.meilisearch.com/movies.json)._


 ### All Countries
--- a/config.toml
+++ b/config.toml
@ -1,130 +1,131 @@
 # This file shows the default configuration of Meilisearch.
-# All variables are defined here: https://docs.meilisearch.com/learn/configuration/instance_options.html#environment-variables
+# All variables are defined here: https://www.meilisearch.com/docs/learn/configuration/instance_options#environment-variables

-db_path = "./data.ms"
 # Designates the location where database files will be created and retrieved.
-# https://docs.meilisearch.com/learn/configuration/instance_options.html#database-path
+# https://www.meilisearch.com/docs/learn/configuration/instance_options#database-path
+db_path = "./data.ms"

-env = "development"
 # Configures the instance's environment. Value must be either `production` or `development`.
-# https://docs.meilisearch.com/learn/configuration/instance_options.html#environment
+# https://www.meilisearch.com/docs/learn/configuration/instance_options#environment
+env = "development"

-http_addr = "localhost:7700"
 # The address on which the HTTP server will listen.
+http_addr = "localhost:7700"

-# master_key = "YOUR_MASTER_KEY_VALUE"
 # Sets the instance's master key, automatically protecting all routes except GET /health.
-# https://docs.meilisearch.com/learn/configuration/instance_options.html#master-key
+# https://www.meilisearch.com/docs/learn/configuration/instance_options#master-key
+# master_key = "YOUR_MASTER_KEY_VALUE"

-# no_analytics = true
 # Deactivates Meilisearch's built-in telemetry when provided.
 # Meilisearch automatically collects data from all instances that do not opt out using this flag.
 # All gathered data is used solely for the purpose of improving Meilisearch, and can be deleted at any time.
-# https://docs.meilisearch.com/learn/configuration/instance_options.html#disable-analytics
+# https://www.meilisearch.com/docs/learn/configuration/instance_options#disable-analytics
+# no_analytics = true

-http_payload_size_limit = "100 MB"
 # Sets the maximum size of accepted payloads.
-# https://docs.meilisearch.com/learn/configuration/instance_options.html#payload-limit-size
+# https://www.meilisearch.com/docs/learn/configuration/instance_options#payload-limit-size
+http_payload_size_limit = "100 MB"

-log_level = "INFO"
 # Defines how much detail should be present in Meilisearch's logs.
 # Meilisearch currently supports six log levels, listed in order of increasing verbosity:  `OFF`, `ERROR`, `WARN`, `INFO`, `DEBUG`, `TRACE`
-# https://docs.meilisearch.com/learn/configuration/instance_options.html#log-level
+# https://www.meilisearch.com/docs/learn/configuration/instance_options#log-level
+log_level = "INFO"

-# max_indexing_memory = "2 GiB"
 # Sets the maximum amount of RAM Meilisearch can use when indexing.
-# https://docs.meilisearch.com/learn/configuration/instance_options.html#max-indexing-memory
+# https://www.meilisearch.com/docs/learn/configuration/instance_options#max-indexing-memory
+# max_indexing_memory = "2 GiB"

-# max_indexing_threads = 4
 # Sets the maximum number of threads Meilisearch can use during indexing.
-# https://docs.meilisearch.com/learn/configuration/instance_options.html#max-indexing-threads
+# https://www.meilisearch.com/docs/learn/configuration/instance_options#max-indexing-threads
+# max_indexing_threads = 4

 #############
 ### DUMPS ###
 #############

-dump_dir = "dumps/"
 # Sets the directory where Meilisearch will create dump files.
-# https://docs.meilisearch.com/learn/configuration/instance_options.html#dump-directory
+# https://www.meilisearch.com/docs/learn/configuration/instance_options#dump-directory
+dump_dir = "dumps/"

-# import_dump = "./path/to/my/file.dump"
 # Imports the dump file located at the specified path. Path must point to a .dump file.
-# https://docs.meilisearch.com/learn/configuration/instance_options.html#import-dump
+# https://www.meilisearch.com/docs/learn/configuration/instance_options#import-dump
+# import_dump = "./path/to/my/file.dump"

-ignore_missing_dump = false
 # Prevents Meilisearch from throwing an error when `import_dump` does not point to a valid dump file.
-# https://docs.meilisearch.com/learn/configuration/instance_options.html#ignore-missing-dump
+# https://www.meilisearch.com/docs/learn/configuration/instance_options#ignore-missing-dump
+ignore_missing_dump = false

-ignore_dump_if_db_exists = false
 # Prevents a Meilisearch instance with an existing database from throwing an error when using `import_dump`.
-# https://docs.meilisearch.com/learn/configuration/instance_options.html#ignore-dump-if-db-exists
+# https://www.meilisearch.com/docs/learn/configuration/instance_options#ignore-dump-if-db-exists
+ignore_dump_if_db_exists = false


 #################
 ### SNAPSHOTS ###
 #################

-schedule_snapshot = false
 # Enables scheduled snapshots when true, disable when false (the default).
 # If the value is given as an integer, then enables the scheduled snapshot with the passed value as the interval
 # between each snapshot, in seconds.
-# https://docs.meilisearch.com/learn/configuration/instance_options.html#schedule-snapshot-creation
+# https://www.meilisearch.com/docs/learn/configuration/instance_options#schedule-snapshot-creation
+schedule_snapshot = false

-snapshot_dir = "snapshots/"
 # Sets the directory where Meilisearch will store snapshots.
-# https://docs.meilisearch.com/learn/configuration/instance_options.html#snapshot-destination
+# https://www.meilisearch.com/docs/learn/configuration/instance_options#snapshot-destination
+snapshot_dir = "snapshots/"

-# import_snapshot = "./path/to/my/snapshot"
 # Launches Meilisearch after importing a previously-generated snapshot at the given filepath.
-# https://docs.meilisearch.com/learn/configuration/instance_options.html#import-snapshot
+# https://www.meilisearch.com/docs/learn/configuration/instance_options#import-snapshot
+# import_snapshot = "./path/to/my/snapshot"

-ignore_missing_snapshot = false
 # Prevents a Meilisearch instance from throwing an error when `import_snapshot` does not point to a valid snapshot file.
-# https://docs.meilisearch.com/learn/configuration/instance_options.html#ignore-missing-snapshot
+# https://www.meilisearch.com/docs/learn/configuration/instance_options#ignore-missing-snapshot
+ignore_missing_snapshot = false

-ignore_snapshot_if_db_exists = false
 # Prevents a Meilisearch instance with an existing database from throwing an error when using `import_snapshot`.
-# https://docs.meilisearch.com/learn/configuration/instance_options.html#ignore-snapshot-if-db-exists
+# https://www.meilisearch.com/docs/learn/configuration/instance_options#ignore-snapshot-if-db-exists
+ignore_snapshot_if_db_exists = false


 ###########
 ### SSL ###
 ###########

-# ssl_auth_path = "./path/to/root"
 # Enables client authentication in the specified path.
-# https://docs.meilisearch.com/learn/configuration/instance_options.html#ssl-authentication-path
+# https://www.meilisearch.com/docs/learn/configuration/instance_options#ssl-authentication-path
+# ssl_auth_path = "./path/to/root"

-# ssl_cert_path = "./path/to/certfile"
 # Sets the server's SSL certificates.
-# https://docs.meilisearch.com/learn/configuration/instance_options.html#ssl-certificates-path
+# https://www.meilisearch.com/docs/learn/configuration/instance_options#ssl-certificates-path
+# ssl_cert_path = "./path/to/certfile"

-# ssl_key_path = "./path/to/private-key"
 # Sets the server's SSL key files.
-# https://docs.meilisearch.com/learn/configuration/instance_options.html#ssl-key-path
+# https://www.meilisearch.com/docs/learn/configuration/instance_options#ssl-key-path
+# ssl_key_path = "./path/to/private-key"

-# ssl_ocsp_path = "./path/to/ocsp-file"
 # Sets the server's OCSP file.
-# https://docs.meilisearch.com/learn/configuration/instance_options.html#ssl-ocsp-path
+# https://www.meilisearch.com/docs/learn/configuration/instance_options#ssl-ocsp-path
+# ssl_ocsp_path = "./path/to/ocsp-file"

-ssl_require_auth = false
 # Makes SSL authentication mandatory.
-# https://docs.meilisearch.com/learn/configuration/instance_options.html#ssl-require-auth
+# https://www.meilisearch.com/docs/learn/configuration/instance_options#ssl-require-auth
+ssl_require_auth = false

-ssl_resumption = false
 # Activates SSL session resumption.
-# https://docs.meilisearch.com/learn/configuration/instance_options.html#ssl-resumption
+# https://www.meilisearch.com/docs/learn/configuration/instance_options#ssl-resumption
+ssl_resumption = false

-ssl_tickets = false
 # Activates SSL tickets.
-# https://docs.meilisearch.com/learn/configuration/instance_options.html#ssl-tickets
+# https://www.meilisearch.com/docs/learn/configuration/instance_options#ssl-tickets
+ssl_tickets = false

 #############################
 ### Experimental features ###
 #############################

-experimental_enable_metrics = false
 # Experimental metrics feature. For more information, see: <https://github.com/meilisearch/meilisearch/discussions/3518>
 # Enables the Prometheus metrics on the `GET /metrics` endpoint.
+experimental_enable_metrics = false

-
+# Experimental RAM reduction during indexing, do not use in production, see: <https://github.com/meilisearch/product/discussions/652>
+experimental_reduce_indexing_memory_usage = false
--- a/dump/src/lib.rs
+++ b/dump/src/lib.rs
@ -101,6 +101,9 @@ pub enum KindDump {
        documents_ids: Vec<String>,
    },
    DocumentClear,
+    DocumentDeletionByFilter {
+        filter: serde_json::Value,
+    },
    Settings {
        settings: Box<meilisearch_types::settings::Settings<Unchecked>>,
        is_deletion: bool,
@ -166,6 +169,9 @@ impl From<KindWithContent> for KindDump {
            KindWithContent::DocumentDeletion { documents_ids, .. } => {
                KindDump::DocumentDeletion { documents_ids }
            }
+            KindWithContent::DocumentDeletionByFilter { filter_expr, .. } => {
+                KindDump::DocumentDeletionByFilter { filter: filter_expr }
+            }
            KindWithContent::DocumentClear { .. } => KindDump::DocumentClear,
            KindWithContent::SettingsUpdate {
                new_settings,
--- a/fuzzers/Cargo.toml
+++ b/fuzzers/Cargo.toml
@ -0,0 +1,20 @@
+[package]
+name = "fuzzers"
+publish = false
+
+version.workspace = true
+authors.workspace = true
+description.workspace = true
+homepage.workspace = true
+readme.workspace = true
+edition.workspace = true
+license.workspace = true
+
+[dependencies]
+arbitrary = { version = "1.3.0", features = ["derive"] }
+clap = { version = "4.3.0", features = ["derive"] }
+fastrand = "1.9.0"
+milli = { path = "../milli" }
+serde = { version = "1.0.160", features = ["derive"] }
+serde_json = { version = "1.0.95", features = ["preserve_order"] }
+tempfile = "3.5.0"
--- a/fuzzers/README.md
+++ b/fuzzers/README.md
@ -0,0 +1,3 @@
+# Fuzzers
+
+The purpose of this crate is to contains all the handmade "fuzzer" we may need.
--- a/fuzzers/src/bin/fuzz-indexing.rs
+++ b/fuzzers/src/bin/fuzz-indexing.rs
@ -0,0 +1,152 @@
+use std::num::NonZeroUsize;
+use std::path::PathBuf;
+use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
+use std::time::Duration;
+
+use arbitrary::{Arbitrary, Unstructured};
+use clap::Parser;
+use fuzzers::Operation;
+use milli::heed::EnvOpenOptions;
+use milli::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig};
+use milli::Index;
+use tempfile::TempDir;
+
+#[derive(Debug, Arbitrary)]
+struct Batch([Operation; 5]);
+
+#[derive(Debug, Clone, Parser)]
+struct Opt {
+    /// The number of fuzzer to run in parallel.
+    #[clap(long)]
+    par: Option<NonZeroUsize>,
+    // We need to put a lot of newlines in the following documentation or else everything gets collapsed on one line
+    /// The path in which the databases will be created.
+    /// Using a ramdisk is recommended.
+    ///
+    /// Linux:
+    ///
+    /// sudo mount -t tmpfs -o size=2g tmpfs ramdisk # to create it
+    ///
+    /// sudo umount ramdisk # to remove it
+    ///
+    /// MacOS:
+    ///
+    /// diskutil erasevolume HFS+ 'RAM Disk' `hdiutil attach -nobrowse -nomount ram://4194304 # create it
+    ///
+    /// hdiutil detach /dev/:the_disk
+    #[clap(long)]
+    path: Option<PathBuf>,
+}
+
+fn main() {
+    let opt = Opt::parse();
+    let progression: &'static AtomicUsize = Box::leak(Box::new(AtomicUsize::new(0)));
+    let stop: &'static AtomicBool = Box::leak(Box::new(AtomicBool::new(false)));
+
+    let par = opt.par.unwrap_or_else(|| std::thread::available_parallelism().unwrap()).get();
+    let mut handles = Vec::with_capacity(par);
+
+    for _ in 0..par {
+        let opt = opt.clone();
+
+        let handle = std::thread::spawn(move || {
+            let mut options = EnvOpenOptions::new();
+            options.map_size(1024 * 1024 * 1024 * 1024);
+            let tempdir = match opt.path {
+                Some(path) => TempDir::new_in(path).unwrap(),
+                None => TempDir::new().unwrap(),
+            };
+            let index = Index::new(options, tempdir.path()).unwrap();
+            let indexer_config = IndexerConfig::default();
+            let index_documents_config = IndexDocumentsConfig::default();
+
+            std::thread::scope(|s| {
+                loop {
+                    if stop.load(Ordering::Relaxed) {
+                        return;
+                    }
+                    let v: Vec<u8> =
+                        std::iter::repeat_with(|| fastrand::u8(..)).take(1000).collect();
+
+                    let mut data = Unstructured::new(&v);
+                    let batches = <[Batch; 5]>::arbitrary(&mut data).unwrap();
+                    // will be used to display the error once a thread crashes
+                    let dbg_input = format!("{:#?}", batches);
+
+                    let handle = s.spawn(|| {
+                        let mut wtxn = index.write_txn().unwrap();
+
+                        for batch in batches {
+                            let mut builder = IndexDocuments::new(
+                                &mut wtxn,
+                                &index,
+                                &indexer_config,
+                                index_documents_config.clone(),
+                                |_| (),
+                                || false,
+                            )
+                            .unwrap();
+
+                            for op in batch.0 {
+                                match op {
+                                    Operation::AddDoc(doc) => {
+                                        let documents =
+                                            milli::documents::objects_from_json_value(doc.to_d());
+                                        let documents =
+                                            milli::documents::documents_batch_reader_from_objects(
+                                                documents,
+                                            );
+                                        let (b, _added) = builder.add_documents(documents).unwrap();
+                                        builder = b;
+                                    }
+                                    Operation::DeleteDoc(id) => {
+                                        let (b, _removed) =
+                                            builder.remove_documents(vec![id.to_s()]).unwrap();
+                                        builder = b;
+                                    }
+                                }
+                            }
+                            builder.execute().unwrap();
+
+                            // after executing a batch we check if the database is corrupted
+                            let res = index.search(&wtxn).execute().unwrap();
+                            index.documents(&wtxn, res.documents_ids).unwrap();
+                            progression.fetch_add(1, Ordering::Relaxed);
+                        }
+                        wtxn.abort().unwrap();
+                    });
+                    if let err @ Err(_) = handle.join() {
+                        stop.store(true, Ordering::Relaxed);
+                        err.expect(&dbg_input);
+                    }
+                }
+            });
+        });
+        handles.push(handle);
+    }
+
+    std::thread::spawn(|| {
+        let mut last_value = 0;
+        let start = std::time::Instant::now();
+        loop {
+            let total = progression.load(Ordering::Relaxed);
+            let elapsed = start.elapsed().as_secs();
+            if elapsed > 3600 {
+                // after 1 hour, stop the fuzzer, success
+                std::process::exit(0);
+            }
+            println!(
+                "Has been running for {:?} seconds. Tested {} new values for a total of {}.",
+                elapsed,
+                total - last_value,
+                total
+            );
+            last_value = total;
+            std::thread::sleep(Duration::from_secs(1));
+        }
+    });
+
+    for handle in handles {
+        handle.join().unwrap();
+    }
+}
--- a/fuzzers/src/lib.rs
+++ b/fuzzers/src/lib.rs
@ -0,0 +1,46 @@
+use arbitrary::Arbitrary;
+use serde_json::{json, Value};
+
+#[derive(Debug, Arbitrary)]
+pub enum Document {
+    One,
+    Two,
+    Three,
+    Four,
+    Five,
+    Six,
+}
+
+impl Document {
+    pub fn to_d(&self) -> Value {
+        match self {
+            Document::One => json!({ "id": 0, "doggo": "bernese" }),
+            Document::Two => json!({ "id": 0, "doggo": "golden" }),
+            Document::Three => json!({ "id": 0, "catto": "jorts" }),
+            Document::Four => json!({ "id": 1, "doggo": "bernese" }),
+            Document::Five => json!({ "id": 1, "doggo": "golden" }),
+            Document::Six => json!({ "id": 1, "catto": "jorts" }),
+        }
+    }
+}
+
+#[derive(Debug, Arbitrary)]
+pub enum DocId {
+    Zero,
+    One,
+}
+
+impl DocId {
+    pub fn to_s(&self) -> String {
+        match self {
+            DocId::Zero => "0".to_string(),
+            DocId::One => "1".to_string(),
+        }
+    }
+}
+
+#[derive(Debug, Arbitrary)]
+pub enum Operation {
+    AddDoc(Document),
+    DeleteDoc(DocId),
+}
--- a/grafana-dashboards/dashboard.json
+++ b/grafana-dashboards/dashboard.json
--- a/index-scheduler/src/autobatcher.rs
+++ b/index-scheduler/src/autobatcher.rs
@ -25,6 +25,7 @@ enum AutobatchKind {
        primary_key: Option<String>,
    },
    DocumentDeletion,
+    DocumentDeletionByFilter,
    DocumentClear,
    Settings {
        allow_index_creation: bool,
@ -64,6 +65,9 @@ impl From<KindWithContent> for AutobatchKind {
            } => AutobatchKind::DocumentImport { method, allow_index_creation, primary_key },
            KindWithContent::DocumentDeletion { .. } => AutobatchKind::DocumentDeletion,
            KindWithContent::DocumentClear { .. } => AutobatchKind::DocumentClear,
+            KindWithContent::DocumentDeletionByFilter { .. } => {
+                AutobatchKind::DocumentDeletionByFilter
+            }
            KindWithContent::SettingsUpdate { allow_index_creation, is_deletion, .. } => {
                AutobatchKind::Settings {
                    allow_index_creation: allow_index_creation && !is_deletion,
@ -97,6 +101,9 @@ pub enum BatchKind {
    DocumentDeletion {
        deletion_ids: Vec<TaskId>,
    },
+    DocumentDeletionByFilter {
+        id: TaskId,
+    },
    ClearAndSettings {
        other: Vec<TaskId>,
        allow_index_creation: bool,
@ -153,7 +160,7 @@ impl BatchKind {
 impl BatchKind {
    /// Returns a `ControlFlow::Break` if you must stop right now.
    /// The boolean tell you if an index has been created by the batched task.
-    /// To ease the writting of the code. `true` can be returned when you don't need to create an index
+    /// To ease the writing of the code. `true` can be returned when you don't need to create an index
    /// but false can't be returned if you needs to create an index.
    // TODO use an AutoBatchKind as input
    pub fn new(
@ -195,6 +202,9 @@ impl BatchKind {
            K::DocumentDeletion => {
                (Continue(BatchKind::DocumentDeletion { deletion_ids: vec![task_id] }), false)
            }
+            K::DocumentDeletionByFilter => {
+                (Break(BatchKind::DocumentDeletionByFilter { id: task_id }), false)
+            }
            K::Settings { allow_index_creation } => (
                Continue(BatchKind::Settings { allow_index_creation, settings_ids: vec![task_id] }),
                allow_index_creation,
@ -204,7 +214,7 @@ impl BatchKind {

    /// Returns a `ControlFlow::Break` if you must stop right now.
    /// The boolean tell you if an index has been created by the batched task.
-    /// To ease the writting of the code. `true` can be returned when you don't need to create an index
+    /// To ease the writing of the code. `true` can be returned when you don't need to create an index
    /// but false can't be returned if you needs to create an index.
    #[rustfmt::skip]
    fn accumulate(self, id: TaskId, kind: AutobatchKind, index_already_exists: bool, primary_key: Option<&str>) -> ControlFlow<BatchKind, BatchKind> {
@ -212,7 +222,7 @@ impl BatchKind {

        match (self, kind) {
            // We don't batch any of these operations
-            (this, K::IndexCreation | K::IndexUpdate | K::IndexSwap) => Break(this),
+            (this, K::IndexCreation | K::IndexUpdate | K::IndexSwap | K::DocumentDeletionByFilter) => Break(this),
            // We must not batch tasks that don't have the same index creation rights if the index doesn't already exists.
            (this, kind) if !index_already_exists && this.allow_index_creation() == Some(false) && kind.allow_index_creation() == Some(true) => {
                Break(this)
@ -311,9 +321,18 @@ impl BatchKind {
                })
            }
            (
-                this @ BatchKind::DocumentOperation { .. },
+                BatchKind::DocumentOperation { method, allow_index_creation, primary_key, mut operation_ids },
                K::DocumentDeletion,
-            ) => Break(this),
+            ) => {
+                operation_ids.push(id);
+
+                Continue(BatchKind::DocumentOperation {
+                    method,
+                    allow_index_creation,
+                    primary_key,
+                    operation_ids,
+                })
+            }
            // but we can't autobatch documents if it's not the same kind
            // this match branch MUST be AFTER the previous one
            (
@ -336,7 +355,35 @@ impl BatchKind {
                deletion_ids.push(id);
                Continue(BatchKind::DocumentClear { ids: deletion_ids })
            }
-            // we can't autobatch a deletion and an import
+            // we can autobatch the deletion and import if the index already exists
+            (
+                BatchKind::DocumentDeletion { mut deletion_ids },
+                K::DocumentImport { method, allow_index_creation, primary_key }
+            ) if index_already_exists => {
+                deletion_ids.push(id);
+
+                Continue(BatchKind::DocumentOperation {
+                    method,
+                    allow_index_creation,
+                    primary_key,
+                    operation_ids: deletion_ids,
+                })
+            }
+            // we can autobatch the deletion and import if both can't create an index
+            (
+                BatchKind::DocumentDeletion { mut deletion_ids },
+                K::DocumentImport { method, allow_index_creation, primary_key }
+            ) if !allow_index_creation => {
+                deletion_ids.push(id);
+
+                Continue(BatchKind::DocumentOperation {
+                    method,
+                    allow_index_creation,
+                    primary_key,
+                    operation_ids: deletion_ids,
+                })
+            }
+            // we can't autobatch a deletion and an import if the index does not exists but would be created by an addition
            (
                this @ BatchKind::DocumentDeletion { .. },
                K::DocumentImport { .. }
@ -471,7 +518,8 @@ impl BatchKind {
                BatchKind::IndexCreation { .. }
                | BatchKind::IndexDeletion { .. }
                | BatchKind::IndexUpdate { .. }
-                | BatchKind::IndexSwap { .. },
+                | BatchKind::IndexSwap { .. }
+                | BatchKind::DocumentDeletionByFilter { .. },
                _,
            ) => {
                unreachable!()
@ -637,36 +685,36 @@ mod tests {
        debug_snapshot!(autobatch_from(false,None,  [settings(false)]), @"Some((Settings { allow_index_creation: false, settings_ids: [0] }, false))");
        debug_snapshot!(autobatch_from(false,None,  [settings(false), settings(false), settings(false)]), @"Some((Settings { allow_index_creation: false, settings_ids: [0, 1, 2] }, false))");

-        // We can't autobatch document addition with document deletion
-        debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
-        debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
-        debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
-        debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None), doc_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
-        debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0] }, true))"###);
-        debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0] }, true))"###);
-        debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0] }, false))"###);
-        debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0] }, false))"###);
-        debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, true, None), doc_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
-        debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, true, None), doc_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
-        debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, false, None), doc_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
-        debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, false, None), doc_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
-        debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, true, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0] }, true))"###);
-        debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, true, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0] }, true))"###);
-        debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, false, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0] }, false))"###);
-        debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, false, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0] }, false))"###);
-        // we also can't do the only way around
-        debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))");
-        debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))");
-        debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))");
-        debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))");
-        debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(ReplaceDocuments, true, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))");
-        debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(UpdateDocuments, true, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))");
-        debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(ReplaceDocuments, false, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))");
-        debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(UpdateDocuments, false, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))");
-        debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))");
-        debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))");
-        debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(ReplaceDocuments, false, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))");
-        debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(UpdateDocuments, false, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))");
+        // We can autobatch document addition with document deletion
+        debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
+        debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
+        debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
+        debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None), doc_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
+        debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, true))"###);
+        debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, true))"###);
+        debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
+        debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
+        debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, true, None), doc_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
+        debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, true, None), doc_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
+        debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, false, None), doc_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
+        debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, false, None), doc_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
+        debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, true, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, true))"###);
+        debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, true, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, true))"###);
+        debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, false, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
+        debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, false, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
+        // And the other way around
+        debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, false))");
+        debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, false))");
+        debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
+        debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
+        debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(ReplaceDocuments, true, Some("catto"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
+        debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(UpdateDocuments, true, Some("catto"))]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
+        debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(ReplaceDocuments, false, Some("catto"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
+        debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(UpdateDocuments, false, Some("catto"))]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
+        debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
+        debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
+        debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(ReplaceDocuments, false, Some("catto"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
+        debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(UpdateDocuments, false, Some("catto"))]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
    }

    #[test]
--- a/index-scheduler/src/batch.rs
+++ b/index-scheduler/src/batch.rs
@ -24,13 +24,15 @@ use std::io::BufWriter;

 use dump::IndexMetadata;
 use log::{debug, error, info};
+use meilisearch_types::error::Code;
 use meilisearch_types::heed::{RoTxn, RwTxn};
 use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader};
 use meilisearch_types::milli::heed::CompactionOption;
 use meilisearch_types::milli::update::{
-    DocumentDeletionResult, IndexDocumentsConfig, IndexDocumentsMethod, Settings as MilliSettings,
+    DeleteDocuments, DocumentDeletionResult, IndexDocumentsConfig, IndexDocumentsMethod,
+    Settings as MilliSettings,
 };
-use meilisearch_types::milli::{self, BEU32};
+use meilisearch_types::milli::{self, Filter, BEU32};
 use meilisearch_types::settings::{apply_settings_to_builder, Settings, Unchecked};
 use meilisearch_types::tasks::{Details, IndexSwap, Kind, KindWithContent, Status, Task};
 use meilisearch_types::{compression, Index, VERSION_FILE_NAME};
@ -65,6 +67,10 @@ pub(crate) enum Batch {
        op: IndexOperation,
        must_create_index: bool,
    },
+    IndexDocumentDeletionByFilter {
+        index_uid: String,
+        task: Task,
+    },
    IndexCreation {
        index_uid: String,
        primary_key: Option<String>,
@ -149,6 +155,7 @@ impl Batch {
            | Batch::TaskDeletion(task)
            | Batch::Dump(task)
            | Batch::IndexCreation { task, .. }
+            | Batch::IndexDocumentDeletionByFilter { task, .. }
            | Batch::IndexUpdate { task, .. } => vec![task.uid],
            Batch::SnapshotCreation(tasks) | Batch::IndexDeletion { tasks, .. } => {
                tasks.iter().map(|task| task.uid).collect()
@ -187,7 +194,8 @@ impl Batch {
            IndexOperation { op, .. } => Some(op.index_uid()),
            IndexCreation { index_uid, .. }
            | IndexUpdate { index_uid, .. }
-            | IndexDeletion { index_uid, .. } => Some(index_uid),
+            | IndexDeletion { index_uid, .. }
+            | IndexDocumentDeletionByFilter { index_uid, .. } => Some(index_uid),
        }
    }
 }
@ -227,6 +235,18 @@ impl IndexScheduler {
                },
                must_create_index,
            })),
+            BatchKind::DocumentDeletionByFilter { id } => {
+                let task = self.get_task(rtxn, id)?.ok_or(Error::CorruptedTaskQueue)?;
+                match &task.kind {
+                    KindWithContent::DocumentDeletionByFilter { index_uid, .. } => {
+                        Ok(Some(Batch::IndexDocumentDeletionByFilter {
+                            index_uid: index_uid.clone(),
+                            task,
+                        }))
+                    }
+                    _ => unreachable!(),
+                }
+            }
            BatchKind::DocumentOperation { method, operation_ids, .. } => {
                let tasks = self.get_existing_tasks(rtxn, operation_ids)?;
                let primary_key = tasks
@ -867,6 +887,51 @@ impl IndexScheduler {

                Ok(tasks)
            }
+            Batch::IndexDocumentDeletionByFilter { mut task, index_uid: _ } => {
+                let (index_uid, filter) =
+                    if let KindWithContent::DocumentDeletionByFilter { index_uid, filter_expr } =
+                        &task.kind
+                    {
+                        (index_uid, filter_expr)
+                    } else {
+                        unreachable!()
+                    };
+                let index = {
+                    let rtxn = self.env.read_txn()?;
+                    self.index_mapper.index(&rtxn, index_uid)?
+                };
+                let deleted_documents = delete_document_by_filter(filter, index);
+                let original_filter = if let Some(Details::DocumentDeletionByFilter {
+                    original_filter,
+                    deleted_documents: _,
+                }) = task.details
+                {
+                    original_filter
+                } else {
+                    // In the case of a `documentDeleteByFilter` the details MUST be set
+                    unreachable!();
+                };
+
+                match deleted_documents {
+                    Ok(deleted_documents) => {
+                        task.status = Status::Succeeded;
+                        task.details = Some(Details::DocumentDeletionByFilter {
+                            original_filter,
+                            deleted_documents: Some(deleted_documents),
+                        });
+                    }
+                    Err(e) => {
+                        task.status = Status::Failed;
+                        task.details = Some(Details::DocumentDeletionByFilter {
+                            original_filter,
+                            deleted_documents: Some(0),
+                        });
+                        task.error = Some(e.into());
+                    }
+                }
+
+                Ok(vec![task])
+            }
            Batch::IndexCreation { index_uid, primary_key, task } => {
                let wtxn = self.env.write_txn()?;
                if self.index_mapper.exists(&wtxn, &index_uid)? {
@ -933,7 +998,7 @@ impl IndexScheduler {
                }()
                .unwrap_or_default();

-                // The write transaction is directly owned and commited inside.
+                // The write transaction is directly owned and committed inside.
                match self.index_mapper.delete_index(wtxn, &index_uid) {
                    Ok(()) => (),
                    Err(Error::IndexNotFound(_)) if index_has_been_created => (),
@ -1421,3 +1486,25 @@ impl IndexScheduler {
        Ok(content_files_to_delete)
    }
 }
+
+fn delete_document_by_filter(filter: &serde_json::Value, index: Index) -> Result<u64> {
+    let filter = Filter::from_json(filter)?;
+    Ok(if let Some(filter) = filter {
+        let mut wtxn = index.write_txn()?;
+
+        let candidates = filter.evaluate(&wtxn, &index).map_err(|err| match err {
+            milli::Error::UserError(milli::UserError::InvalidFilter(_)) => {
+                Error::from(err).with_custom_error_code(Code::InvalidDocumentFilter)
+            }
+            e => e.into(),
+        })?;
+        let mut delete_operation = DeleteDocuments::new(&mut wtxn, &index)?;
+        delete_operation.delete_documents(&candidates);
+        let deleted_documents =
+            delete_operation.execute().map(|result| result.deleted_documents)?;
+        wtxn.commit()?;
+        deleted_documents
+    } else {
+        0
+    })
+}
--- a/index-scheduler/src/error.rs
+++ b/index-scheduler/src/error.rs
@ -46,6 +46,8 @@ impl From<DateField> for Code {
 #[allow(clippy::large_enum_variant)]
 #[derive(Error, Debug)]
 pub enum Error {
+    #[error("{1}")]
+    WithCustomErrorCode(Code, Box<Self>),
    #[error("Index `{0}` not found.")]
    IndexNotFound(String),
    #[error("Index `{0}` already exists.")]
@ -134,11 +136,59 @@ pub enum Error {
    TaskDatabaseUpdate(Box<Self>),
    #[error(transparent)]
    HeedTransaction(heed::Error),
+
+    #[cfg(test)]
+    #[error("Planned failure for tests.")]
+    PlannedFailure,
+}
+
+impl Error {
+    pub fn is_recoverable(&self) -> bool {
+        match self {
+            Error::IndexNotFound(_)
+            | Error::WithCustomErrorCode(_, _)
+            | Error::IndexAlreadyExists(_)
+            | Error::SwapDuplicateIndexFound(_)
+            | Error::SwapDuplicateIndexesFound(_)
+            | Error::SwapIndexNotFound(_)
+            | Error::NoSpaceLeftInTaskQueue
+            | Error::SwapIndexesNotFound(_)
+            | Error::CorruptedDump
+            | Error::InvalidTaskDate { .. }
+            | Error::InvalidTaskUids { .. }
+            | Error::InvalidTaskStatuses { .. }
+            | Error::InvalidTaskTypes { .. }
+            | Error::InvalidTaskCanceledBy { .. }
+            | Error::InvalidIndexUid { .. }
+            | Error::TaskNotFound(_)
+            | Error::TaskDeletionWithEmptyQuery
+            | Error::TaskCancelationWithEmptyQuery
+            | Error::Dump(_)
+            | Error::Heed(_)
+            | Error::Milli(_)
+            | Error::ProcessBatchPanicked
+            | Error::FileStore(_)
+            | Error::IoError(_)
+            | Error::Persist(_)
+            | Error::Anyhow(_) => true,
+            Error::CreateBatch(_)
+            | Error::CorruptedTaskQueue
+            | Error::TaskDatabaseUpdate(_)
+            | Error::HeedTransaction(_) => false,
+            #[cfg(test)]
+            Error::PlannedFailure => false,
+        }
+    }
+
+    pub fn with_custom_error_code(self, code: Code) -> Self {
+        Self::WithCustomErrorCode(code, Box::new(self))
+    }
 }

 impl ErrorCode for Error {
    fn error_code(&self) -> Code {
        match self {
+            Error::WithCustomErrorCode(code, _) => *code,
            Error::IndexNotFound(_) => Code::IndexNotFound,
            Error::IndexAlreadyExists(_) => Code::IndexAlreadyExists,
            Error::SwapDuplicateIndexesFound(_) => Code::InvalidSwapDuplicateIndexFound,
@ -171,6 +221,9 @@ impl ErrorCode for Error {
            Error::CorruptedDump => Code::Internal,
            Error::TaskDatabaseUpdate(_) => Code::Internal,
            Error::CreateBatch(_) => Code::Internal,
+
+            #[cfg(test)]
+            Error::PlannedFailure => Code::Internal,
        }
    }
 }
--- a/index-scheduler/src/index_mapper/index_map.rs
+++ b/index-scheduler/src/index_mapper/index_map.rs
@ -5,6 +5,7 @@ use std::collections::BTreeMap;
 use std::path::Path;
 use std::time::Duration;

+use meilisearch_types::heed::flags::Flags;
 use meilisearch_types::heed::{EnvClosingEvent, EnvOpenOptions};
 use meilisearch_types::milli::Index;
 use time::OffsetDateTime;
@ -53,6 +54,7 @@ pub struct IndexMap {
 pub struct ClosingIndex {
    uuid: Uuid,
    closing_event: EnvClosingEvent,
+    enable_mdb_writemap: bool,
    map_size: usize,
    generation: usize,
 }
@ -68,6 +70,7 @@ impl ClosingIndex {
    pub fn wait_timeout(self, timeout: Duration) -> Option<ReopenableIndex> {
        self.closing_event.wait_timeout(timeout).then_some(ReopenableIndex {
            uuid: self.uuid,
+            enable_mdb_writemap: self.enable_mdb_writemap,
            map_size: self.map_size,
            generation: self.generation,
        })
@ -76,6 +79,7 @@ impl ClosingIndex {

 pub struct ReopenableIndex {
    uuid: Uuid,
+    enable_mdb_writemap: bool,
    map_size: usize,
    generation: usize,
 }
@ -103,7 +107,7 @@ impl ReopenableIndex {
                return Ok(());
            }
            map.unavailable.remove(&self.uuid);
-            map.create(&self.uuid, path, None, self.map_size)?;
+            map.create(&self.uuid, path, None, self.enable_mdb_writemap, self.map_size)?;
        }
        Ok(())
    }
@ -170,16 +174,17 @@ impl IndexMap {
        uuid: &Uuid,
        path: &Path,
        date: Option<(OffsetDateTime, OffsetDateTime)>,
+        enable_mdb_writemap: bool,
        map_size: usize,
    ) -> Result<Index> {
        if !matches!(self.get_unavailable(uuid), Missing) {
            panic!("Attempt to open an index that was unavailable");
        }
-        let index = create_or_open_index(path, date, map_size)?;
+        let index = create_or_open_index(path, date, enable_mdb_writemap, map_size)?;
        match self.available.insert(*uuid, index.clone()) {
            InsertionOutcome::InsertedNew => (),
            InsertionOutcome::Evicted(evicted_uuid, evicted_index) => {
-                self.close(evicted_uuid, evicted_index, 0);
+                self.close(evicted_uuid, evicted_index, enable_mdb_writemap, 0);
            }
            InsertionOutcome::Replaced(_) => {
                panic!("Attempt to open an index that was already opened")
@ -212,17 +217,30 @@ impl IndexMap {
    /// | Closing         | Closing       |
    /// | Available       | Closing       |
    ///
-    pub fn close_for_resize(&mut self, uuid: &Uuid, map_size_growth: usize) {
+    pub fn close_for_resize(
+        &mut self,
+        uuid: &Uuid,
+        enable_mdb_writemap: bool,
+        map_size_growth: usize,
+    ) {
        let Some(index) = self.available.remove(uuid) else { return; };
-        self.close(*uuid, index, map_size_growth);
+        self.close(*uuid, index, enable_mdb_writemap, map_size_growth);
    }

-    fn close(&mut self, uuid: Uuid, index: Index, map_size_growth: usize) {
+    fn close(
+        &mut self,
+        uuid: Uuid,
+        index: Index,
+        enable_mdb_writemap: bool,
+        map_size_growth: usize,
+    ) {
        let map_size = index.map_size().unwrap_or(DEFAULT_MAP_SIZE) + map_size_growth;
        let closing_event = index.prepare_for_closing();
        let generation = self.next_generation();
-        self.unavailable
-            .insert(uuid, Some(ClosingIndex { uuid, closing_event, map_size, generation }));
+        self.unavailable.insert(
+            uuid,
+            Some(ClosingIndex { uuid, closing_event, enable_mdb_writemap, map_size, generation }),
+        );
    }

    /// Attempts to delete and index.
@ -282,11 +300,15 @@ impl IndexMap {
 fn create_or_open_index(
    path: &Path,
    date: Option<(OffsetDateTime, OffsetDateTime)>,
+    enable_mdb_writemap: bool,
    map_size: usize,
 ) -> Result<Index> {
    let mut options = EnvOpenOptions::new();
    options.map_size(clamp_to_page_size(map_size));
    options.max_readers(1024);
+    if enable_mdb_writemap {
+        unsafe { options.flag(Flags::MdbWriteMap) };
+    }

    if let Some((created, updated)) = date {
        Ok(Index::new_with_creation_dates(options, path, created, updated)?)
--- a/index-scheduler/src/index_mapper/mod.rs
+++ b/index-scheduler/src/index_mapper/mod.rs
@ -66,6 +66,8 @@ pub struct IndexMapper {
    index_base_map_size: usize,
    /// The quantity by which the map size of an index is incremented upon reopening, in bytes.
    index_growth_amount: usize,
+    /// Whether we open a meilisearch index with the MDB_WRITEMAP option or not.
+    enable_mdb_writemap: bool,
    pub indexer_config: Arc<IndexerConfig>,
 }

@ -88,8 +90,17 @@ pub enum IndexStatus {
 pub struct IndexStats {
    /// Number of documents in the index.
    pub number_of_documents: u64,
-    /// Size of the index' DB, in bytes.
+    /// Size taken up by the index' DB, in bytes.
+    ///
+    /// This includes the size taken by both the used and free pages of the DB, and as the free pages
+    /// are not returned to the disk after a deletion, this number is typically larger than
+    /// `used_database_size` that only includes the size of the used pages.
    pub database_size: u64,
+    /// Size taken by the used pages of the index' DB, in bytes.
+    ///
+    /// As the DB backend does not return to the disk the pages that are not currently used by the DB,
+    /// this value is typically smaller than `database_size`.
+    pub used_database_size: u64,
    /// Association of every field name with the number of times it occurs in the documents.
    pub field_distribution: FieldDistribution,
    /// Creation date of the index.
@ -105,10 +116,10 @@ impl IndexStats {
    ///
    /// - rtxn: a RO transaction for the index, obtained from `Index::read_txn()`.
    pub fn new(index: &Index, rtxn: &RoTxn) -> Result<Self> {
-        let database_size = index.on_disk_size()?;
        Ok(IndexStats {
            number_of_documents: index.number_of_documents(rtxn)?,
-            database_size,
+            database_size: index.on_disk_size()?,
+            used_database_size: index.used_size()?,
            field_distribution: index.field_distribution(rtxn)?,
            created_at: index.created_at(rtxn)?,
            updated_at: index.updated_at(rtxn)?,
@ -123,15 +134,22 @@ impl IndexMapper {
        index_base_map_size: usize,
        index_growth_amount: usize,
        index_count: usize,
+        enable_mdb_writemap: bool,
        indexer_config: IndexerConfig,
    ) -> Result<Self> {
+        let mut wtxn = env.write_txn()?;
+        let index_mapping = env.create_database(&mut wtxn, Some(INDEX_MAPPING))?;
+        let index_stats = env.create_database(&mut wtxn, Some(INDEX_STATS))?;
+        wtxn.commit()?;
+
        Ok(Self {
            index_map: Arc::new(RwLock::new(IndexMap::new(index_count))),
-            index_mapping: env.create_database(Some(INDEX_MAPPING))?,
-            index_stats: env.create_database(Some(INDEX_STATS))?,
+            index_mapping,
+            index_stats,
            base_path,
            index_base_map_size,
            index_growth_amount,
+            enable_mdb_writemap,
            indexer_config: Arc::new(indexer_config),
        })
    }
@ -162,6 +180,7 @@ impl IndexMapper {
                    &uuid,
                    &index_path,
                    date,
+                    self.enable_mdb_writemap,
                    self.index_base_map_size,
                )?;

@ -273,7 +292,11 @@ impl IndexMapper {
            .ok_or_else(|| Error::IndexNotFound(name.to_string()))?;

        // We remove the index from the in-memory index map.
-        self.index_map.write().unwrap().close_for_resize(&uuid, self.index_growth_amount);
+        self.index_map.write().unwrap().close_for_resize(
+            &uuid,
+            self.enable_mdb_writemap,
+            self.index_growth_amount,
+        );

        Ok(())
    }
@ -338,6 +361,7 @@ impl IndexMapper {
                                &uuid,
                                &index_path,
                                None,
+                                self.enable_mdb_writemap,
                                self.index_base_map_size,
                            )?;
                        }
--- a/index-scheduler/src/insta_snapshot.rs
+++ b/index-scheduler/src/insta_snapshot.rs
@ -184,6 +184,9 @@ fn snapshot_details(d: &Details) -> String {
            provided_ids: received_document_ids,
            deleted_documents,
        } => format!("{{ received_document_ids: {received_document_ids}, deleted_documents: {deleted_documents:?} }}"),
+        Details::DocumentDeletionByFilter { original_filter, deleted_documents } => format!(
+           "{{ original_filter: {original_filter}, deleted_documents: {deleted_documents:?} }}"
+        ),
        Details::ClearAll { deleted_documents } => {
            format!("{{ deleted_documents: {deleted_documents:?} }}")
        },
--- a/index-scheduler/src/lib.rs
+++ b/index-scheduler/src/lib.rs
@ -31,7 +31,7 @@ mod uuid_codec;
 pub type Result<T> = std::result::Result<T, Error>;
 pub type TaskId = u32;

-use std::collections::HashMap;
+use std::collections::{BTreeMap, HashMap};
 use std::ops::{Bound, RangeBounds};
 use std::path::{Path, PathBuf};
 use std::sync::atomic::AtomicBool;
@ -233,6 +233,8 @@ pub struct IndexSchedulerOptions {
    pub task_db_size: usize,
    /// The size, in bytes, with which a meilisearch index is opened the first time of each meilisearch index.
    pub index_base_map_size: usize,
+    /// Whether we open a meilisearch index with the MDB_WRITEMAP option or not.
+    pub enable_mdb_writemap: bool,
    /// The size, in bytes, by which the map size of an index is increased when it resized due to being full.
    pub index_growth_amount: usize,
    /// The number of indexes that can be concurrently opened in memory.
@ -374,6 +376,11 @@ impl IndexScheduler {
        std::fs::create_dir_all(&options.indexes_path)?;
        std::fs::create_dir_all(&options.dumps_path)?;

+        if cfg!(windows) && options.enable_mdb_writemap {
+            // programmer error if this happens: in normal use passing the option on Windows is an error in main
+            panic!("Windows doesn't support the MDB_WRITEMAP LMDB option");
+        }
+
        let task_db_size = clamp_to_page_size(options.task_db_size);
        let budget = if options.indexer_config.skip_index_budget {
            IndexBudget {
@ -396,25 +403,37 @@ impl IndexScheduler {
            .open(options.tasks_path)?;
        let file_store = FileStore::new(&options.update_file_path)?;

+        let mut wtxn = env.write_txn()?;
+        let all_tasks = env.create_database(&mut wtxn, Some(db_name::ALL_TASKS))?;
+        let status = env.create_database(&mut wtxn, Some(db_name::STATUS))?;
+        let kind = env.create_database(&mut wtxn, Some(db_name::KIND))?;
+        let index_tasks = env.create_database(&mut wtxn, Some(db_name::INDEX_TASKS))?;
+        let canceled_by = env.create_database(&mut wtxn, Some(db_name::CANCELED_BY))?;
+        let enqueued_at = env.create_database(&mut wtxn, Some(db_name::ENQUEUED_AT))?;
+        let started_at = env.create_database(&mut wtxn, Some(db_name::STARTED_AT))?;
+        let finished_at = env.create_database(&mut wtxn, Some(db_name::FINISHED_AT))?;
+        wtxn.commit()?;
+
        // allow unreachable_code to get rids of the warning in the case of a test build.
        let this = Self {
            must_stop_processing: MustStopProcessing::default(),
            processing_tasks: Arc::new(RwLock::new(ProcessingTasks::new())),
            file_store,
-            all_tasks: env.create_database(Some(db_name::ALL_TASKS))?,
-            status: env.create_database(Some(db_name::STATUS))?,
-            kind: env.create_database(Some(db_name::KIND))?,
-            index_tasks: env.create_database(Some(db_name::INDEX_TASKS))?,
-            canceled_by: env.create_database(Some(db_name::CANCELED_BY))?,
-            enqueued_at: env.create_database(Some(db_name::ENQUEUED_AT))?,
-            started_at: env.create_database(Some(db_name::STARTED_AT))?,
-            finished_at: env.create_database(Some(db_name::FINISHED_AT))?,
+            all_tasks,
+            status,
+            kind,
+            index_tasks,
+            canceled_by,
+            enqueued_at,
+            started_at,
+            finished_at,
            index_mapper: IndexMapper::new(
                &env,
                options.indexes_path,
                budget.map_size,
                options.index_growth_amount,
                budget.index_count,
+                options.enable_mdb_writemap,
                options.indexer_config,
            )?,
            env,
@ -540,13 +559,7 @@ impl IndexScheduler {
                        Err(e) => {
                            log::error!("{}", e);
                            // Wait one second when an irrecoverable error occurs.
-                            if matches!(
-                                e,
-                                Error::CorruptedTaskQueue
-                                    | Error::TaskDatabaseUpdate(_)
-                                    | Error::HeedTransaction(_)
-                                    | Error::CreateBatch(_)
-                            ) {
+                            if !e.is_recoverable() {
                                std::thread::sleep(Duration::from_secs(1));
                            }
                        }
@ -560,10 +573,16 @@ impl IndexScheduler {
        &self.index_mapper.indexer_config
    }

+    /// Return the real database size (i.e.: The size **with** the free pages)
    pub fn size(&self) -> Result<u64> {
        Ok(self.env.real_disk_size()?)
    }

+    /// Return the used database size (i.e.: The size **without** the free pages)
+    pub fn used_size(&self) -> Result<u64> {
+        Ok(self.env.non_free_pages_size()?)
+    }
+
    /// Return the index corresponding to the name.
    ///
    /// * If the index wasn't opened before, the index will be opened.
@ -743,6 +762,38 @@ impl IndexScheduler {
        Ok(tasks)
    }

+    /// The returned structure contains:
+    /// 1. The name of the property being observed can be `statuses`, `types`, or `indexes`.
+    /// 2. The name of the specific data related to the property can be `enqueued` for the `statuses`, `settingsUpdate` for the `types`, or the name of the index for the `indexes`, for example.
+    /// 3. The number of times the properties appeared.
+    pub fn get_stats(&self) -> Result<BTreeMap<String, BTreeMap<String, u64>>> {
+        let rtxn = self.read_txn()?;
+
+        let mut res = BTreeMap::new();
+
+        res.insert(
+            "statuses".to_string(),
+            enum_iterator::all::<Status>()
+                .map(|s| Ok((s.to_string(), self.get_status(&rtxn, s)?.len())))
+                .collect::<Result<BTreeMap<String, u64>>>()?,
+        );
+        res.insert(
+            "types".to_string(),
+            enum_iterator::all::<Kind>()
+                .map(|s| Ok((s.to_string(), self.get_kind(&rtxn, s)?.len())))
+                .collect::<Result<BTreeMap<String, u64>>>()?,
+        );
+        res.insert(
+            "indexes".to_string(),
+            self.index_tasks
+                .iter(&rtxn)?
+                .map(|res| Ok(res.map(|(name, bitmap)| (name.to_string(), bitmap.len()))?))
+                .collect::<Result<BTreeMap<String, u64>>>()?,
+        );
+
+        Ok(res)
+    }
+
    /// Return true iff there is at least one task associated with this index
    /// that is processing.
    pub fn is_index_processing(&self, index: &str) -> Result<bool> {
@ -1270,6 +1321,12 @@ impl<'a> Dump<'a> {
                    documents_ids,
                    index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
                },
+                KindDump::DocumentDeletionByFilter { filter } => {
+                    KindWithContent::DocumentDeletionByFilter {
+                        filter_expr: filter,
+                        index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
+                    }
+                }
                KindDump::DocumentClear => KindWithContent::DocumentClear {
                    index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
                },
@ -1471,6 +1528,7 @@ mod tests {
                dumps_path: tempdir.path().join("dumps"),
                task_db_size: 1000 * 1000, // 1 MB, we don't use MiB on purpose.
                index_base_map_size: 1000 * 1000, // 1 MB, we don't use MiB on purpose.
+                enable_mdb_writemap: false,
                index_growth_amount: 1000 * 1000, // 1 MB
                index_count: 5,
                indexer_config,
@ -1500,7 +1558,7 @@ mod tests {
            (index_scheduler, index_scheduler_handle)
        }

-        /// Return a [`CorruptedTaskQueue`](Error::CorruptedTaskQueue) error if a failure is planned
+        /// Return a [`PlannedFailure`](Error::PlannedFailure) error if a failure is planned
        /// for the given location and current run loop iteration.
        pub fn maybe_fail(&self, location: FailureLocation) -> Result<()> {
            if self.planned_failures.contains(&(*self.run_loop_iteration.read().unwrap(), location))
@ -1509,7 +1567,7 @@ mod tests {
                    FailureLocation::PanicInsideProcessBatch => {
                        panic!("simulated panic")
                    }
-                    _ => Err(Error::CorruptedTaskQueue),
+                    _ => Err(Error::PlannedFailure),
                }
            } else {
                Ok(())
@ -1727,7 +1785,7 @@ mod tests {
            assert_eq!(task.kind.as_kind(), k);
        }

-        snapshot!(snapshot_index_scheduler(&index_scheduler), name: "everything_is_succesfully_registered");
+        snapshot!(snapshot_index_scheduler(&index_scheduler), name: "everything_is_successfully_registered");
    }

    #[test]
@ -2017,6 +2075,105 @@ mod tests {
        snapshot!(snapshot_index_scheduler(&index_scheduler), name: "both_task_succeeded");
    }

+    #[test]
+    fn document_addition_and_document_deletion() {
+        let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]);
+
+        let content = r#"[
+            { "id": 1, "doggo": "jean bob" },
+            { "id": 2, "catto": "jorts" },
+            { "id": 3, "doggo": "bork" }
+        ]"#;
+
+        let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0).unwrap();
+        let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap();
+        file.persist().unwrap();
+        index_scheduler
+            .register(KindWithContent::DocumentAdditionOrUpdate {
+                index_uid: S("doggos"),
+                primary_key: Some(S("id")),
+                method: ReplaceDocuments,
+                content_file: uuid,
+                documents_count,
+                allow_index_creation: true,
+            })
+            .unwrap();
+        snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task");
+        index_scheduler
+            .register(KindWithContent::DocumentDeletion {
+                index_uid: S("doggos"),
+                documents_ids: vec![S("1"), S("2")],
+            })
+            .unwrap();
+        snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_second_task");
+
+        handle.advance_one_successful_batch(); // The addition AND deletion should've been batched together
+        snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_processing_the_batch");
+
+        let index = index_scheduler.index("doggos").unwrap();
+        let rtxn = index.read_txn().unwrap();
+        let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
+        let field_ids = field_ids_map.ids().collect::<Vec<_>>();
+        let documents = index
+            .all_documents(&rtxn)
+            .unwrap()
+            .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
+            .collect::<Vec<_>>();
+        snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents");
+    }
+
+    #[test]
+    fn document_deletion_and_document_addition() {
+        let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]);
+        index_scheduler
+            .register(KindWithContent::DocumentDeletion {
+                index_uid: S("doggos"),
+                documents_ids: vec![S("1"), S("2")],
+            })
+            .unwrap();
+        snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task");
+
+        let content = r#"[
+            { "id": 1, "doggo": "jean bob" },
+            { "id": 2, "catto": "jorts" },
+            { "id": 3, "doggo": "bork" }
+        ]"#;
+
+        let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0).unwrap();
+        let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap();
+        file.persist().unwrap();
+        index_scheduler
+            .register(KindWithContent::DocumentAdditionOrUpdate {
+                index_uid: S("doggos"),
+                primary_key: Some(S("id")),
+                method: ReplaceDocuments,
+                content_file: uuid,
+                documents_count,
+                allow_index_creation: true,
+            })
+            .unwrap();
+        snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_second_task");
+
+        // The deletion should have failed because it can't create an index
+        handle.advance_one_failed_batch();
+        snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_failing_the_deletion");
+
+        // The addition should works
+        handle.advance_one_successful_batch();
+        snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_last_successful_addition");
+
+        let index = index_scheduler.index("doggos").unwrap();
+        let rtxn = index.read_txn().unwrap();
+        let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
+        let field_ids = field_ids_map.ids().collect::<Vec<_>>();
+        let documents = index
+            .all_documents(&rtxn)
+            .unwrap()
+            .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
+            .collect::<Vec<_>>();
+        snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents");
+    }
+
    #[test]
    fn do_not_batch_task_of_different_indexes() {
        let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]);
--- a/index-scheduler/src/snapshots/lib.rs/document_addition_and_document_deletion/after_processing_the_batch.snap
+++ b/index-scheduler/src/snapshots/lib.rs/document_addition_and_document_deletion/after_processing_the_batch.snap
@ -0,0 +1,43 @@
+---
+source: index-scheduler/src/lib.rs
+---
+### Autobatching Enabled = true
+### Processing Tasks:
+[]
+----------------------------------------------------------------------
+### All Tasks:
+0 {uid: 0, status: succeeded, details: { received_documents: 3, indexed_documents: Some(3) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }}
+1 {uid: 1, status: succeeded, details: { received_document_ids: 2, deleted_documents: Some(2) }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1", "2"] }}
+----------------------------------------------------------------------
+### Status:
+enqueued []
+succeeded [0,1,]
+----------------------------------------------------------------------
+### Kind:
+"documentAdditionOrUpdate" [0,]
+"documentDeletion" [1,]
+----------------------------------------------------------------------
+### Index Tasks:
+doggos [0,1,]
+----------------------------------------------------------------------
+### Index Mapper:
+doggos: { number_of_documents: 1, field_distribution: {"doggo": 1, "id": 1} }
+
+----------------------------------------------------------------------
+### Canceled By:
+
+----------------------------------------------------------------------
+### Enqueued At:
+[timestamp] [0,]
+[timestamp] [1,]
+----------------------------------------------------------------------
+### Started At:
+[timestamp] [0,1,]
+----------------------------------------------------------------------
+### Finished At:
+[timestamp] [0,1,]
+----------------------------------------------------------------------
+### File Store:
+
+----------------------------------------------------------------------
+
--- a/index-scheduler/src/snapshots/lib.rs/document_addition_and_document_deletion/documents.snap
+++ b/index-scheduler/src/snapshots/lib.rs/document_addition_and_document_deletion/documents.snap
@ -0,0 +1,9 @@
+---
+source: index-scheduler/src/lib.rs
+---
+[
+  {
+    "id": 3,
+    "doggo": "bork"
+  }
+]
--- a/index-scheduler/src/snapshots/lib.rs/document_addition_and_document_deletion/registered_the_first_task.snap
+++ b/index-scheduler/src/snapshots/lib.rs/document_addition_and_document_deletion/registered_the_first_task.snap
@ -0,0 +1,37 @@
+---
+source: index-scheduler/src/lib.rs
+---
+### Autobatching Enabled = true
+### Processing Tasks:
+[]
+----------------------------------------------------------------------
+### All Tasks:
+0 {uid: 0, status: enqueued, details: { received_documents: 3, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }}
+----------------------------------------------------------------------
+### Status:
+enqueued [0,]
+----------------------------------------------------------------------
+### Kind:
+"documentAdditionOrUpdate" [0,]
+----------------------------------------------------------------------
+### Index Tasks:
+doggos [0,]
+----------------------------------------------------------------------
+### Index Mapper:
+
+----------------------------------------------------------------------
+### Canceled By:
+
+----------------------------------------------------------------------
+### Enqueued At:
+[timestamp] [0,]
+----------------------------------------------------------------------
+### Started At:
+----------------------------------------------------------------------
+### Finished At:
+----------------------------------------------------------------------
+### File Store:
+00000000-0000-0000-0000-000000000000
+
+----------------------------------------------------------------------
+
--- a/index-scheduler/src/snapshots/lib.rs/document_addition_and_document_deletion/registered_the_second_task.snap
+++ b/index-scheduler/src/snapshots/lib.rs/document_addition_and_document_deletion/registered_the_second_task.snap
@ -0,0 +1,40 @@
+---
+source: index-scheduler/src/lib.rs
+---
+### Autobatching Enabled = true
+### Processing Tasks:
+[]
+----------------------------------------------------------------------
+### All Tasks:
+0 {uid: 0, status: enqueued, details: { received_documents: 3, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }}
+1 {uid: 1, status: enqueued, details: { received_document_ids: 2, deleted_documents: None }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1", "2"] }}
+----------------------------------------------------------------------
+### Status:
+enqueued [0,1,]
+----------------------------------------------------------------------
+### Kind:
+"documentAdditionOrUpdate" [0,]
+"documentDeletion" [1,]
+----------------------------------------------------------------------
+### Index Tasks:
+doggos [0,1,]
+----------------------------------------------------------------------
+### Index Mapper:
+
+----------------------------------------------------------------------
+### Canceled By:
+
+----------------------------------------------------------------------
+### Enqueued At:
+[timestamp] [0,]
+[timestamp] [1,]
+----------------------------------------------------------------------
+### Started At:
+----------------------------------------------------------------------
+### Finished At:
+----------------------------------------------------------------------
+### File Store:
+00000000-0000-0000-0000-000000000000
+
+----------------------------------------------------------------------
+
--- a/index-scheduler/src/snapshots/lib.rs/document_deletion_and_document_addition/after_failing_the_deletion.snap
+++ b/index-scheduler/src/snapshots/lib.rs/document_deletion_and_document_addition/after_failing_the_deletion.snap
@ -0,0 +1,43 @@
+---
+source: index-scheduler/src/lib.rs
+---
+### Autobatching Enabled = true
+### Processing Tasks:
+[]
+----------------------------------------------------------------------
+### All Tasks:
+0 {uid: 0, status: failed, error: ResponseError { code: 200, message: "Index `doggos` not found.", error_code: "index_not_found", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_not_found" }, details: { received_document_ids: 2, deleted_documents: Some(0) }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1", "2"] }}
+1 {uid: 1, status: enqueued, details: { received_documents: 3, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }}
+----------------------------------------------------------------------
+### Status:
+enqueued [1,]
+failed [0,]
+----------------------------------------------------------------------
+### Kind:
+"documentAdditionOrUpdate" [1,]
+"documentDeletion" [0,]
+----------------------------------------------------------------------
+### Index Tasks:
+doggos [0,1,]
+----------------------------------------------------------------------
+### Index Mapper:
+
+----------------------------------------------------------------------
+### Canceled By:
+
+----------------------------------------------------------------------
+### Enqueued At:
+[timestamp] [0,]
+[timestamp] [1,]
+----------------------------------------------------------------------
+### Started At:
+[timestamp] [0,]
+----------------------------------------------------------------------
+### Finished At:
+[timestamp] [0,]
+----------------------------------------------------------------------
+### File Store:
+00000000-0000-0000-0000-000000000000
+
+----------------------------------------------------------------------
+
--- a/index-scheduler/src/snapshots/lib.rs/document_deletion_and_document_addition/after_last_successful_addition.snap
+++ b/index-scheduler/src/snapshots/lib.rs/document_deletion_and_document_addition/after_last_successful_addition.snap
@ -0,0 +1,46 @@
+---
+source: index-scheduler/src/lib.rs
+---
+### Autobatching Enabled = true
+### Processing Tasks:
+[]
+----------------------------------------------------------------------
+### All Tasks:
+0 {uid: 0, status: failed, error: ResponseError { code: 200, message: "Index `doggos` not found.", error_code: "index_not_found", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_not_found" }, details: { received_document_ids: 2, deleted_documents: Some(0) }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1", "2"] }}
+1 {uid: 1, status: succeeded, details: { received_documents: 3, indexed_documents: Some(3) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }}
+----------------------------------------------------------------------
+### Status:
+enqueued []
+succeeded [1,]
+failed [0,]
+----------------------------------------------------------------------
+### Kind:
+"documentAdditionOrUpdate" [1,]
+"documentDeletion" [0,]
+----------------------------------------------------------------------
+### Index Tasks:
+doggos [0,1,]
+----------------------------------------------------------------------
+### Index Mapper:
+doggos: { number_of_documents: 3, field_distribution: {"catto": 1, "doggo": 2, "id": 3} }
+
+----------------------------------------------------------------------
+### Canceled By:
+
+----------------------------------------------------------------------
+### Enqueued At:
+[timestamp] [0,]
+[timestamp] [1,]
+----------------------------------------------------------------------
+### Started At:
+[timestamp] [0,]
+[timestamp] [1,]
+----------------------------------------------------------------------
+### Finished At:
+[timestamp] [0,]
+[timestamp] [1,]
+----------------------------------------------------------------------
+### File Store:
+
+----------------------------------------------------------------------
+
--- a/index-scheduler/src/snapshots/lib.rs/document_deletion_and_document_addition/documents.snap
+++ b/index-scheduler/src/snapshots/lib.rs/document_deletion_and_document_addition/documents.snap
@ -0,0 +1,17 @@
+---
+source: index-scheduler/src/lib.rs
+---
+[
+  {
+    "id": 1,
+    "doggo": "jean bob"
+  },
+  {
+    "id": 2,
+    "catto": "jorts"
+  },
+  {
+    "id": 3,
+    "doggo": "bork"
+  }
+]
--- a/index-scheduler/src/snapshots/lib.rs/document_deletion_and_document_addition/registered_the_first_task.snap
+++ b/index-scheduler/src/snapshots/lib.rs/document_deletion_and_document_addition/registered_the_first_task.snap
@ -0,0 +1,36 @@
+---
+source: index-scheduler/src/lib.rs
+---
+### Autobatching Enabled = true
+### Processing Tasks:
+[]
+----------------------------------------------------------------------
+### All Tasks:
+0 {uid: 0, status: enqueued, details: { received_document_ids: 2, deleted_documents: None }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1", "2"] }}
+----------------------------------------------------------------------
+### Status:
+enqueued [0,]
+----------------------------------------------------------------------
+### Kind:
+"documentDeletion" [0,]
+----------------------------------------------------------------------
+### Index Tasks:
+doggos [0,]
+----------------------------------------------------------------------
+### Index Mapper:
+
+----------------------------------------------------------------------
+### Canceled By:
+
+----------------------------------------------------------------------
+### Enqueued At:
+[timestamp] [0,]
+----------------------------------------------------------------------
+### Started At:
+----------------------------------------------------------------------
+### Finished At:
+----------------------------------------------------------------------
+### File Store:
+
+----------------------------------------------------------------------
+
--- a/index-scheduler/src/snapshots/lib.rs/document_deletion_and_document_addition/registered_the_second_task.snap
+++ b/index-scheduler/src/snapshots/lib.rs/document_deletion_and_document_addition/registered_the_second_task.snap
@ -0,0 +1,40 @@
+---
+source: index-scheduler/src/lib.rs
+---
+### Autobatching Enabled = true
+### Processing Tasks:
+[]
+----------------------------------------------------------------------
+### All Tasks:
+0 {uid: 0, status: enqueued, details: { received_document_ids: 2, deleted_documents: None }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1", "2"] }}
+1 {uid: 1, status: enqueued, details: { received_documents: 3, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }}
+----------------------------------------------------------------------
+### Status:
+enqueued [0,1,]
+----------------------------------------------------------------------
+### Kind:
+"documentAdditionOrUpdate" [1,]
+"documentDeletion" [0,]
+----------------------------------------------------------------------
+### Index Tasks:
+doggos [0,1,]
+----------------------------------------------------------------------
+### Index Mapper:
+
+----------------------------------------------------------------------
+### Canceled By:
+
+----------------------------------------------------------------------
+### Enqueued At:
+[timestamp] [0,]
+[timestamp] [1,]
+----------------------------------------------------------------------
+### Started At:
+----------------------------------------------------------------------
+### Finished At:
+----------------------------------------------------------------------
+### File Store:
+00000000-0000-0000-0000-000000000000
+
+----------------------------------------------------------------------
+
--- a/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_addition/document_addition_failed.snap
+++ b/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_document_addition/document_addition_failed.snap
@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
 []
 ----------------------------------------------------------------------
 ### All Tasks:
-0 {uid: 0, status: failed, error: ResponseError { code: 200, message: "Corrupted task queue.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { received_documents: 1, indexed_documents: Some(0) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
+0 {uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { received_documents: 1, indexed_documents: Some(0) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
 ----------------------------------------------------------------------
 ### Status:
 enqueued []
--- a/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_index_creation/index_creation_failed.snap
+++ b/index-scheduler/src/snapshots/lib.rs/fail_in_process_batch_for_index_creation/index_creation_failed.snap
@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
 []
 ----------------------------------------------------------------------
 ### All Tasks:
-0 {uid: 0, status: failed, error: ResponseError { code: 200, message: "Corrupted task queue.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
+0 {uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
 ----------------------------------------------------------------------
 ### Status:
 enqueued []
--- a/index-scheduler/src/snapshots/lib.rs/query_tasks_simple/end.snap
+++ b/index-scheduler/src/snapshots/lib.rs/query_tasks_simple/end.snap
@ -8,7 +8,7 @@ source: index-scheduler/src/lib.rs
 ### All Tasks:
 0 {uid: 0, status: succeeded, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
 1 {uid: 1, status: succeeded, details: { primary_key: Some("sheep") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("sheep") }}
-2 {uid: 2, status: failed, error: ResponseError { code: 200, message: "Corrupted task queue.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { primary_key: Some("fish") }, kind: IndexCreation { index_uid: "whalo", primary_key: Some("fish") }}
+2 {uid: 2, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { primary_key: Some("fish") }, kind: IndexCreation { index_uid: "whalo", primary_key: Some("fish") }}
 ----------------------------------------------------------------------
 ### Status:
 enqueued []
--- a/index-scheduler/src/snapshots/lib.rs/register/everything_is_successfully_registered.snap
+++ b/index-scheduler/src/snapshots/lib.rs/register/everything_is_successfully_registered.snap
--- a/index-scheduler/src/utils.rs
+++ b/index-scheduler/src/utils.rs
@ -239,6 +239,7 @@ pub fn swap_index_uid_in_task(task: &mut Task, swap: (&str, &str)) {
    match &mut task.kind {
        K::DocumentAdditionOrUpdate { index_uid, .. } => index_uids.push(index_uid),
        K::DocumentDeletion { index_uid, .. } => index_uids.push(index_uid),
+        K::DocumentDeletionByFilter { index_uid, .. } => index_uids.push(index_uid),
        K::DocumentClear { index_uid } => index_uids.push(index_uid),
        K::SettingsUpdate { index_uid, .. } => index_uids.push(index_uid),
        K::IndexDeletion { index_uid } => index_uids.push(index_uid),
@ -464,6 +465,29 @@ impl IndexScheduler {
                            }
                        }
                    }
+                    Details::DocumentDeletionByFilter { deleted_documents, original_filter: _ } => {
+                        assert_eq!(kind.as_kind(), Kind::DocumentDeletion);
+                        let (index_uid, _) = if let KindWithContent::DocumentDeletionByFilter {
+                            ref index_uid,
+                            ref filter_expr,
+                        } = kind
+                        {
+                            (index_uid, filter_expr)
+                        } else {
+                            unreachable!()
+                        };
+                        assert_eq!(&task_index_uid.unwrap(), index_uid);
+
+                        match status {
+                            Status::Enqueued | Status::Processing => (),
+                            Status::Succeeded => {
+                                assert!(deleted_documents.is_some());
+                            }
+                            Status::Failed | Status::Canceled => {
+                                assert!(deleted_documents == Some(0));
+                            }
+                        }
+                    }
                    Details::ClearAll { deleted_documents } => {
                        assert!(matches!(
                            kind.as_kind(),
--- a/index-stats/Cargo.toml
+++ b/index-stats/Cargo.toml
@ -0,0 +1,12 @@
+[package]
+name = "index-stats"
+description = "A small program that computes internal stats of a Meilisearch index"
+version = "0.1.0"
+edition = "2021"
+publish = false
+
+[dependencies]
+anyhow = "1.0.71"
+clap = { version = "4.3.5", features = ["derive"] }
+milli = { path = "../milli" }
+piechart = "1.0.0"
--- a/index-stats/src/main.rs
+++ b/index-stats/src/main.rs
@ -0,0 +1,224 @@
+use std::cmp::Reverse;
+use std::path::PathBuf;
+
+use clap::Parser;
+use milli::heed::{types::ByteSlice, EnvOpenOptions, PolyDatabase, RoTxn};
+use milli::index::db_name::*;
+use milli::index::Index;
+use piechart::{Chart, Color, Data};
+
+/// Simple program to greet a person
+#[derive(Parser, Debug)]
+#[command(author, version, about, long_about = None)]
+struct Args {
+    /// The path to the LMDB Meilisearch index database.
+    path: PathBuf,
+
+    /// The radius of the graphs
+    #[clap(long, default_value_t = 10)]
+    graph_radius: u16,
+
+    /// The radius of the graphs
+    #[clap(long, default_value_t = 6)]
+    graph_aspect_ratio: u16,
+}
+
+fn main() -> anyhow::Result<()> {
+    let Args { path, graph_radius, graph_aspect_ratio } = Args::parse();
+    let env = EnvOpenOptions::new().max_dbs(24).open(path)?;
+
+    // TODO not sure to keep that...
+    //      if removed put the pub(crate) back in the Index struct
+    matches!(
+        Option::<Index>::None,
+        Some(Index {
+            env: _,
+            main: _,
+            word_docids: _,
+            exact_word_docids: _,
+            word_prefix_docids: _,
+            exact_word_prefix_docids: _,
+            word_pair_proximity_docids: _,
+            word_prefix_pair_proximity_docids: _,
+            prefix_word_pair_proximity_docids: _,
+            word_position_docids: _,
+            word_fid_docids: _,
+            field_id_word_count_docids: _,
+            word_prefix_position_docids: _,
+            word_prefix_fid_docids: _,
+            script_language_docids: _,
+            facet_id_exists_docids: _,
+            facet_id_is_null_docids: _,
+            facet_id_is_empty_docids: _,
+            facet_id_f64_docids: _,
+            facet_id_string_docids: _,
+            field_id_docid_facet_f64s: _,
+            field_id_docid_facet_strings: _,
+            documents: _,
+        })
+    );
+
+    let mut wtxn = env.write_txn()?;
+    let main = env.create_poly_database(&mut wtxn, Some(MAIN))?;
+    let word_docids = env.create_poly_database(&mut wtxn, Some(WORD_DOCIDS))?;
+    let exact_word_docids = env.create_poly_database(&mut wtxn, Some(EXACT_WORD_DOCIDS))?;
+    let word_prefix_docids = env.create_poly_database(&mut wtxn, Some(WORD_PREFIX_DOCIDS))?;
+    let exact_word_prefix_docids =
+        env.create_poly_database(&mut wtxn, Some(EXACT_WORD_PREFIX_DOCIDS))?;
+    let word_pair_proximity_docids =
+        env.create_poly_database(&mut wtxn, Some(WORD_PAIR_PROXIMITY_DOCIDS))?;
+    let script_language_docids =
+        env.create_poly_database(&mut wtxn, Some(SCRIPT_LANGUAGE_DOCIDS))?;
+    let word_prefix_pair_proximity_docids =
+        env.create_poly_database(&mut wtxn, Some(WORD_PREFIX_PAIR_PROXIMITY_DOCIDS))?;
+    let prefix_word_pair_proximity_docids =
+        env.create_poly_database(&mut wtxn, Some(PREFIX_WORD_PAIR_PROXIMITY_DOCIDS))?;
+    let word_position_docids = env.create_poly_database(&mut wtxn, Some(WORD_POSITION_DOCIDS))?;
+    let word_fid_docids = env.create_poly_database(&mut wtxn, Some(WORD_FIELD_ID_DOCIDS))?;
+    let field_id_word_count_docids =
+        env.create_poly_database(&mut wtxn, Some(FIELD_ID_WORD_COUNT_DOCIDS))?;
+    let word_prefix_position_docids =
+        env.create_poly_database(&mut wtxn, Some(WORD_PREFIX_POSITION_DOCIDS))?;
+    let word_prefix_fid_docids =
+        env.create_poly_database(&mut wtxn, Some(WORD_PREFIX_FIELD_ID_DOCIDS))?;
+    let facet_id_f64_docids = env.create_poly_database(&mut wtxn, Some(FACET_ID_F64_DOCIDS))?;
+    let facet_id_string_docids =
+        env.create_poly_database(&mut wtxn, Some(FACET_ID_STRING_DOCIDS))?;
+    let facet_id_exists_docids =
+        env.create_poly_database(&mut wtxn, Some(FACET_ID_EXISTS_DOCIDS))?;
+    let facet_id_is_null_docids =
+        env.create_poly_database(&mut wtxn, Some(FACET_ID_IS_NULL_DOCIDS))?;
+    let facet_id_is_empty_docids =
+        env.create_poly_database(&mut wtxn, Some(FACET_ID_IS_EMPTY_DOCIDS))?;
+    let field_id_docid_facet_f64s =
+        env.create_poly_database(&mut wtxn, Some(FIELD_ID_DOCID_FACET_F64S))?;
+    let field_id_docid_facet_strings =
+        env.create_poly_database(&mut wtxn, Some(FIELD_ID_DOCID_FACET_STRINGS))?;
+    let documents = env.create_poly_database(&mut wtxn, Some(DOCUMENTS))?;
+    wtxn.commit()?;
+
+    let list = [
+        (main, MAIN),
+        (word_docids, WORD_DOCIDS),
+        (exact_word_docids, EXACT_WORD_DOCIDS),
+        (word_prefix_docids, WORD_PREFIX_DOCIDS),
+        (exact_word_prefix_docids, EXACT_WORD_PREFIX_DOCIDS),
+        (word_pair_proximity_docids, WORD_PAIR_PROXIMITY_DOCIDS),
+        (script_language_docids, SCRIPT_LANGUAGE_DOCIDS),
+        (word_prefix_pair_proximity_docids, WORD_PREFIX_PAIR_PROXIMITY_DOCIDS),
+        (prefix_word_pair_proximity_docids, PREFIX_WORD_PAIR_PROXIMITY_DOCIDS),
+        (word_position_docids, WORD_POSITION_DOCIDS),
+        (word_fid_docids, WORD_FIELD_ID_DOCIDS),
+        (field_id_word_count_docids, FIELD_ID_WORD_COUNT_DOCIDS),
+        (word_prefix_position_docids, WORD_PREFIX_POSITION_DOCIDS),
+        (word_prefix_fid_docids, WORD_PREFIX_FIELD_ID_DOCIDS),
+        (facet_id_f64_docids, FACET_ID_F64_DOCIDS),
+        (facet_id_string_docids, FACET_ID_STRING_DOCIDS),
+        (facet_id_exists_docids, FACET_ID_EXISTS_DOCIDS),
+        (facet_id_is_null_docids, FACET_ID_IS_NULL_DOCIDS),
+        (facet_id_is_empty_docids, FACET_ID_IS_EMPTY_DOCIDS),
+        (field_id_docid_facet_f64s, FIELD_ID_DOCID_FACET_F64S),
+        (field_id_docid_facet_strings, FIELD_ID_DOCID_FACET_STRINGS),
+        (documents, DOCUMENTS),
+    ];
+
+    let rtxn = env.read_txn()?;
+    let result: Result<Vec<_>, _> =
+        list.into_iter().map(|(db, name)| compute_stats(&rtxn, db).map(|s| (s, name))).collect();
+    let mut stats = result?;
+
+    println!("{:1$} Number of Entries", "", graph_radius as usize * 2);
+    stats.sort_by_key(|(s, _)| Reverse(s.number_of_entries));
+    let data = compute_graph_data(stats.iter().map(|(s, n)| (s.number_of_entries as f32, *n)));
+    Chart::new().radius(graph_radius).aspect_ratio(graph_aspect_ratio).draw(&data);
+    display_legend(&data);
+    print!("\r\n");
+
+    println!("{:1$} Size of Entries", "", graph_radius as usize * 2);
+    stats.sort_by_key(|(s, _)| Reverse(s.size_of_entries));
+    let data = compute_graph_data(stats.iter().map(|(s, n)| (s.size_of_entries as f32, *n)));
+    Chart::new().radius(graph_radius).aspect_ratio(graph_aspect_ratio).draw(&data);
+    display_legend(&data);
+    print!("\r\n");
+
+    println!("{:1$} Size of Data", "", graph_radius as usize * 2);
+    stats.sort_by_key(|(s, _)| Reverse(s.size_of_data));
+    let data = compute_graph_data(stats.iter().map(|(s, n)| (s.size_of_data as f32, *n)));
+    Chart::new().radius(graph_radius).aspect_ratio(graph_aspect_ratio).draw(&data);
+    display_legend(&data);
+    print!("\r\n");
+
+    println!("{:1$} Size of Keys", "", graph_radius as usize * 2);
+    stats.sort_by_key(|(s, _)| Reverse(s.size_of_keys));
+    let data = compute_graph_data(stats.iter().map(|(s, n)| (s.size_of_keys as f32, *n)));
+    Chart::new().radius(graph_radius).aspect_ratio(graph_aspect_ratio).draw(&data);
+    display_legend(&data);
+
+    Ok(())
+}
+
+fn display_legend(data: &[Data]) {
+    let total: f32 = data.iter().map(|d| d.value).sum();
+    for Data { label, value, color, fill } in data {
+        println!(
+            "{} {} {:.02}%",
+            color.unwrap().paint(fill.to_string()),
+            label,
+            value / total * 100.0
+        );
+    }
+}
+
+fn compute_graph_data<'a>(stats: impl IntoIterator<Item = (f32, &'a str)>) -> Vec<Data> {
+    let mut colors = [
+        Color::Red,
+        Color::Green,
+        Color::Yellow,
+        Color::Blue,
+        Color::Purple,
+        Color::Cyan,
+        Color::White,
+    ]
+    .into_iter()
+    .cycle();
+
+    let mut characters = ['▴', '▵', '▾', '▿', '▪', '▫', '•', '◦'].into_iter().cycle();
+
+    stats
+        .into_iter()
+        .map(|(value, name)| Data {
+            label: (*name).into(),
+            value,
+            color: Some(colors.next().unwrap().into()),
+            fill: characters.next().unwrap(),
+        })
+        .collect()
+}
+
+#[derive(Debug)]
+pub struct Stats {
+    pub number_of_entries: u64,
+    pub size_of_keys: u64,
+    pub size_of_data: u64,
+    pub size_of_entries: u64,
+}
+
+fn compute_stats(rtxn: &RoTxn, db: PolyDatabase) -> anyhow::Result<Stats> {
+    let mut number_of_entries = 0;
+    let mut size_of_keys = 0;
+    let mut size_of_data = 0;
+
+    for result in db.iter::<_, ByteSlice, ByteSlice>(rtxn)? {
+        let (key, data) = result?;
+        number_of_entries += 1;
+        size_of_keys += key.len() as u64;
+        size_of_data += data.len() as u64;
+    }
+
+    Ok(Stats {
+        number_of_entries,
+        size_of_keys,
+        size_of_data,
+        size_of_entries: size_of_keys + size_of_data,
+    })
+}
--- a/meilisearch-auth/src/lib.rs
+++ b/meilisearch-auth/src/lib.rs
@ -45,6 +45,11 @@ impl AuthController {
        self.store.size()
    }

+    /// Return the used size of the `AuthController` database in bytes.
+    pub fn used_size(&self) -> Result<u64> {
+        self.store.used_size()
+    }
+
    pub fn create_key(&self, create_key: CreateApiKey) -> Result<Key> {
        match self.store.get_api_key(create_key.uid)? {
            Some(_) => Err(AuthControllerError::ApiKeyAlreadyExists(create_key.uid.to_string())),
--- a/meilisearch-auth/src/store.rs
+++ b/meilisearch-auth/src/store.rs
@ -55,9 +55,11 @@ impl HeedAuthStore {
        let path = path.as_ref().join(AUTH_DB_PATH);
        create_dir_all(&path)?;
        let env = Arc::new(open_auth_store_env(path.as_ref())?);
-        let keys = env.create_database(Some(KEY_DB_NAME))?;
+        let mut wtxn = env.write_txn()?;
+        let keys = env.create_database(&mut wtxn, Some(KEY_DB_NAME))?;
        let action_keyid_index_expiration =
-            env.create_database(Some(KEY_ID_ACTION_INDEX_EXPIRATION_DB_NAME))?;
+            env.create_database(&mut wtxn, Some(KEY_ID_ACTION_INDEX_EXPIRATION_DB_NAME))?;
+        wtxn.commit()?;
        Ok(Self { env, keys, action_keyid_index_expiration, should_close_on_drop: true })
    }

@ -73,6 +75,11 @@ impl HeedAuthStore {
        Ok(self.env.real_disk_size()?)
    }

+    /// Return the number of bytes actually used in the database
+    pub fn used_size(&self) -> Result<u64> {
+        Ok(self.env.non_free_pages_size()?)
+    }
+
    pub fn set_drop_on_close(&mut self, v: bool) {
        self.should_close_on_drop = v;
    }
--- a/meilisearch-types/Cargo.toml
+++ b/meilisearch-types/Cargo.toml
@ -22,7 +22,7 @@ file-store = { path = "../file-store" }
 flate2 = "1.0.25"
 fst = "0.4.7"
 memmap2 = "0.5.10"
-milli = { path = "../milli", default-features = false }
+milli = { path = "../milli" }
 roaring = { version = "0.10.1", features = ["serde"] }
 serde = { version = "1.0.160", features = ["derive"] }
 serde-cs = "0.2.4"
@ -40,7 +40,7 @@ meili-snap = { path = "../meili-snap" }

 [features]
 # all specialized tokenizations
-default = ["milli/default"]
+all-tokenizations = ["milli/all-tokenizations"]

 # chinese specialized tokenization
 chinese = ["milli/chinese"]
--- a/meilisearch-types/src/deserr/mod.rs
+++ b/meilisearch-types/src/deserr/mod.rs
@ -150,6 +150,7 @@ make_missing_field_convenience_builder!(MissingApiKeyActions, missing_api_key_ac
 make_missing_field_convenience_builder!(MissingApiKeyExpiresAt, missing_api_key_expires_at);
 make_missing_field_convenience_builder!(MissingApiKeyIndexes, missing_api_key_indexes);
 make_missing_field_convenience_builder!(MissingSwapIndexes, missing_swap_indexes);
+make_missing_field_convenience_builder!(MissingDocumentFilter, missing_document_filter);

 // Integrate a sub-error into a [`DeserrError`] by taking its error message but using
 // the default error code (C) from `Self`
--- a/meilisearch-types/src/error.rs
+++ b/meilisearch-types/src/error.rs
@ -214,6 +214,8 @@ InvalidApiKeyUid                      , InvalidRequest       , BAD_REQUEST ;
 InvalidContentType                    , InvalidRequest       , UNSUPPORTED_MEDIA_TYPE ;
 InvalidDocumentCsvDelimiter           , InvalidRequest       , BAD_REQUEST ;
 InvalidDocumentFields                 , InvalidRequest       , BAD_REQUEST ;
+MissingDocumentFilter                 , InvalidRequest       , BAD_REQUEST ;
+InvalidDocumentFilter                 , InvalidRequest       , BAD_REQUEST ;
 InvalidDocumentGeoField               , InvalidRequest       , BAD_REQUEST ;
 InvalidDocumentId                     , InvalidRequest       , BAD_REQUEST ;
 InvalidDocumentLimit                  , InvalidRequest       , BAD_REQUEST ;
@ -315,6 +317,7 @@ impl ErrorCode for milli::Error {
                    UserError::MaxDatabaseSizeReached => Code::DatabaseSizeLimitReached,
                    UserError::AttributeLimitReached => Code::MaxFieldsLimitExceeded,
                    UserError::InvalidFilter(_) => Code::InvalidSearchFilter,
+                    UserError::InvalidFilterExpression(..) => Code::InvalidSearchFilter,
                    UserError::MissingDocumentId { .. } => Code::MissingDocumentId,
                    UserError::InvalidDocumentId { .. } | UserError::TooManyDocumentIds { .. } => {
                        Code::InvalidDocumentId
--- a/meilisearch-types/src/tasks.rs
+++ b/meilisearch-types/src/tasks.rs
@ -49,6 +49,7 @@ impl Task {
            | IndexSwap { .. } => None,
            DocumentAdditionOrUpdate { index_uid, .. }
            | DocumentDeletion { index_uid, .. }
+            | DocumentDeletionByFilter { index_uid, .. }
            | DocumentClear { index_uid }
            | SettingsUpdate { index_uid, .. }
            | IndexCreation { index_uid, .. }
@ -67,6 +68,7 @@ impl Task {
        match self.kind {
            KindWithContent::DocumentAdditionOrUpdate { content_file, .. } => Some(content_file),
            KindWithContent::DocumentDeletion { .. }
+            | KindWithContent::DocumentDeletionByFilter { .. }
            | KindWithContent::DocumentClear { .. }
            | KindWithContent::SettingsUpdate { .. }
            | KindWithContent::IndexDeletion { .. }
@ -96,6 +98,10 @@ pub enum KindWithContent {
        index_uid: String,
        documents_ids: Vec<String>,
    },
+    DocumentDeletionByFilter {
+        index_uid: String,
+        filter_expr: serde_json::Value,
+    },
    DocumentClear {
        index_uid: String,
    },
@ -145,6 +151,7 @@ impl KindWithContent {
        match self {
            KindWithContent::DocumentAdditionOrUpdate { .. } => Kind::DocumentAdditionOrUpdate,
            KindWithContent::DocumentDeletion { .. } => Kind::DocumentDeletion,
+            KindWithContent::DocumentDeletionByFilter { .. } => Kind::DocumentDeletion,
            KindWithContent::DocumentClear { .. } => Kind::DocumentDeletion,
            KindWithContent::SettingsUpdate { .. } => Kind::SettingsUpdate,
            KindWithContent::IndexCreation { .. } => Kind::IndexCreation,
@ -168,6 +175,7 @@ impl KindWithContent {
            | TaskDeletion { .. } => vec![],
            DocumentAdditionOrUpdate { index_uid, .. }
            | DocumentDeletion { index_uid, .. }
+            | DocumentDeletionByFilter { index_uid, .. }
            | DocumentClear { index_uid }
            | SettingsUpdate { index_uid, .. }
            | IndexCreation { index_uid, .. }
@ -200,6 +208,12 @@ impl KindWithContent {
                    deleted_documents: None,
                })
            }
+            KindWithContent::DocumentDeletionByFilter { index_uid: _, filter_expr } => {
+                Some(Details::DocumentDeletionByFilter {
+                    original_filter: filter_expr.to_string(),
+                    deleted_documents: None,
+                })
+            }
            KindWithContent::DocumentClear { .. } | KindWithContent::IndexDeletion { .. } => {
                Some(Details::ClearAll { deleted_documents: None })
            }
@ -242,6 +256,12 @@ impl KindWithContent {
                    deleted_documents: Some(0),
                })
            }
+            KindWithContent::DocumentDeletionByFilter { index_uid: _, filter_expr } => {
+                Some(Details::DocumentDeletionByFilter {
+                    original_filter: filter_expr.to_string(),
+                    deleted_documents: Some(0),
+                })
+            }
            KindWithContent::DocumentClear { .. } => {
                Some(Details::ClearAll { deleted_documents: None })
            }
@ -282,6 +302,7 @@ impl From<&KindWithContent> for Option<Details> {
                })
            }
            KindWithContent::DocumentDeletion { .. } => None,
+            KindWithContent::DocumentDeletionByFilter { .. } => None,
            KindWithContent::DocumentClear { .. } => None,
            KindWithContent::SettingsUpdate { new_settings, .. } => {
                Some(Details::SettingsUpdate { settings: new_settings.clone() })
@ -478,6 +499,7 @@ pub enum Details {
    SettingsUpdate { settings: Box<Settings<Unchecked>> },
    IndexInfo { primary_key: Option<String> },
    DocumentDeletion { provided_ids: usize, deleted_documents: Option<u64> },
+    DocumentDeletionByFilter { original_filter: String, deleted_documents: Option<u64> },
    ClearAll { deleted_documents: Option<u64> },
    TaskCancelation { matched_tasks: u64, canceled_tasks: Option<u64>, original_filter: String },
    TaskDeletion { matched_tasks: u64, deleted_tasks: Option<u64>, original_filter: String },
@ -493,6 +515,9 @@ impl Details {
                *indexed_documents = Some(0)
            }
            Self::DocumentDeletion { deleted_documents, .. } => *deleted_documents = Some(0),
+            Self::DocumentDeletionByFilter { deleted_documents, .. } => {
+                *deleted_documents = Some(0)
+            }
            Self::ClearAll { deleted_documents } => *deleted_documents = Some(0),
            Self::TaskCancelation { canceled_tasks, .. } => *canceled_tasks = Some(0),
            Self::TaskDeletion { deleted_tasks, .. } => *deleted_tasks = Some(0),
--- a/meilisearch/Cargo.toml
+++ b/meilisearch/Cargo.toml
@ -106,7 +106,7 @@ vergen = { version = "7.5.1", default-features = false, features = ["git"] }
 zip = { version = "0.6.4", optional = true }

 [features]
-default = ["analytics", "meilisearch-types/default", "mini-dashboard"]
+default = ["analytics", "meilisearch-types/all-tokenizations", "mini-dashboard"]
 analytics = ["segment"]
 mini-dashboard = ["actix-web-static-files", "static-files", "anyhow", "cargo_toml", "hex", "reqwest", "sha-1", "tempfile", "zip"]
 chinese = ["meilisearch-types/chinese"]
--- a/meilisearch/src/analytics/mock_analytics.rs
+++ b/meilisearch/src/analytics/mock_analytics.rs
@ -5,7 +5,7 @@ use actix_web::HttpRequest;
 use meilisearch_types::InstanceUid;
 use serde_json::Value;

-use super::{find_user_id, Analytics, DocumentDeletionKind};
+use super::{find_user_id, Analytics, DocumentDeletionKind, DocumentFetchKind};
 use crate::routes::indexes::documents::UpdateDocumentsQuery;
 use crate::routes::tasks::TasksFilterQuery;
 use crate::Opt;
@ -71,6 +71,8 @@ impl Analytics for MockAnalytics {
        _request: &HttpRequest,
    ) {
    }
+    fn get_fetch_documents(&self, _documents_query: &DocumentFetchKind, _request: &HttpRequest) {}
+    fn post_fetch_documents(&self, _documents_query: &DocumentFetchKind, _request: &HttpRequest) {}
    fn get_tasks(&self, _query: &TasksFilterQuery, _request: &HttpRequest) {}
    fn health_seen(&self, _request: &HttpRequest) {}
 }
--- a/meilisearch/src/analytics/mod.rs
+++ b/meilisearch/src/analytics/mod.rs
@ -64,6 +64,13 @@ pub enum DocumentDeletionKind {
    PerDocumentId,
    ClearAll,
    PerBatch,
+    PerFilter,
+}
+
+#[derive(Copy, Clone, Debug, PartialEq, Eq)]
+pub enum DocumentFetchKind {
+    PerDocumentId,
+    Normal { with_filter: bool, limit: usize, offset: usize },
 }

 pub trait Analytics: Sync + Send {
@ -89,6 +96,12 @@ pub trait Analytics: Sync + Send {
        request: &HttpRequest,
    );

+    // this method should be called to aggregate a fetch documents request
+    fn get_fetch_documents(&self, documents_query: &DocumentFetchKind, request: &HttpRequest);
+
+    // this method should be called to aggregate a fetch documents request
+    fn post_fetch_documents(&self, documents_query: &DocumentFetchKind, request: &HttpRequest);
+
    // this method should be called to aggregate a add documents request
    fn delete_documents(&self, kind: DocumentDeletionKind, request: &HttpRequest);

--- a/meilisearch/src/analytics/segment_analytics.rs
+++ b/meilisearch/src/analytics/segment_analytics.rs
@ -23,7 +23,9 @@ use tokio::select;
 use tokio::sync::mpsc::{self, Receiver, Sender};
 use uuid::Uuid;

-use super::{config_user_id_path, DocumentDeletionKind, MEILISEARCH_CONFIG_PATH};
+use super::{
+    config_user_id_path, DocumentDeletionKind, DocumentFetchKind, MEILISEARCH_CONFIG_PATH,
+};
 use crate::analytics::Analytics;
 use crate::option::{default_http_addr, IndexerOpts, MaxMemory, MaxThreads, ScheduleSnapshot};
 use crate::routes::indexes::documents::UpdateDocumentsQuery;
@ -72,6 +74,8 @@ pub enum AnalyticsMsg {
    AggregateAddDocuments(DocumentsAggregator),
    AggregateDeleteDocuments(DocumentsDeletionAggregator),
    AggregateUpdateDocuments(DocumentsAggregator),
+    AggregateGetFetchDocuments(DocumentsFetchAggregator),
+    AggregatePostFetchDocuments(DocumentsFetchAggregator),
    AggregateTasks(TasksAggregator),
    AggregateHealth(HealthAggregator),
 }
@ -139,6 +143,8 @@ impl SegmentAnalytics {
            add_documents_aggregator: DocumentsAggregator::default(),
            delete_documents_aggregator: DocumentsDeletionAggregator::default(),
            update_documents_aggregator: DocumentsAggregator::default(),
+            get_fetch_documents_aggregator: DocumentsFetchAggregator::default(),
+            post_fetch_documents_aggregator: DocumentsFetchAggregator::default(),
            get_tasks_aggregator: TasksAggregator::default(),
            health_aggregator: HealthAggregator::default(),
        });
@ -205,6 +211,16 @@ impl super::Analytics for SegmentAnalytics {
        let _ = self.sender.try_send(AnalyticsMsg::AggregateUpdateDocuments(aggregate));
    }

+    fn get_fetch_documents(&self, documents_query: &DocumentFetchKind, request: &HttpRequest) {
+        let aggregate = DocumentsFetchAggregator::from_query(documents_query, request);
+        let _ = self.sender.try_send(AnalyticsMsg::AggregateGetFetchDocuments(aggregate));
+    }
+
+    fn post_fetch_documents(&self, documents_query: &DocumentFetchKind, request: &HttpRequest) {
+        let aggregate = DocumentsFetchAggregator::from_query(documents_query, request);
+        let _ = self.sender.try_send(AnalyticsMsg::AggregatePostFetchDocuments(aggregate));
+    }
+
    fn get_tasks(&self, query: &TasksFilterQuery, request: &HttpRequest) {
        let aggregate = TasksAggregator::from_query(query, request);
        let _ = self.sender.try_send(AnalyticsMsg::AggregateTasks(aggregate));
@ -225,6 +241,7 @@ impl super::Analytics for SegmentAnalytics {
 struct Infos {
    env: String,
    experimental_enable_metrics: bool,
+    experimental_reduce_indexing_memory_usage: bool,
    db_path: bool,
    import_dump: bool,
    dump_dir: bool,
@ -258,6 +275,7 @@ impl From<Opt> for Infos {
        let Opt {
            db_path,
            experimental_enable_metrics,
+            experimental_reduce_indexing_memory_usage,
            http_addr,
            master_key: _,
            env,
@ -300,6 +318,7 @@ impl From<Opt> for Infos {
        Self {
            env,
            experimental_enable_metrics,
+            experimental_reduce_indexing_memory_usage,
            db_path: db_path != PathBuf::from("./data.ms"),
            import_dump: import_dump.is_some(),
            dump_dir: dump_dir != PathBuf::from("dumps/"),
@ -338,6 +357,8 @@ pub struct Segment {
    add_documents_aggregator: DocumentsAggregator,
    delete_documents_aggregator: DocumentsDeletionAggregator,
    update_documents_aggregator: DocumentsAggregator,
+    get_fetch_documents_aggregator: DocumentsFetchAggregator,
+    post_fetch_documents_aggregator: DocumentsFetchAggregator,
    get_tasks_aggregator: TasksAggregator,
    health_aggregator: HealthAggregator,
 }
@ -400,6 +421,8 @@ impl Segment {
                        Some(AnalyticsMsg::AggregateAddDocuments(agreg)) => self.add_documents_aggregator.aggregate(agreg),
                        Some(AnalyticsMsg::AggregateDeleteDocuments(agreg)) => self.delete_documents_aggregator.aggregate(agreg),
                        Some(AnalyticsMsg::AggregateUpdateDocuments(agreg)) => self.update_documents_aggregator.aggregate(agreg),
+                        Some(AnalyticsMsg::AggregateGetFetchDocuments(agreg)) => self.get_fetch_documents_aggregator.aggregate(agreg),
+                        Some(AnalyticsMsg::AggregatePostFetchDocuments(agreg)) => self.post_fetch_documents_aggregator.aggregate(agreg),
                        Some(AnalyticsMsg::AggregateTasks(agreg)) => self.get_tasks_aggregator.aggregate(agreg),
                        Some(AnalyticsMsg::AggregateHealth(agreg)) => self.health_aggregator.aggregate(agreg),
                        None => (),
@ -450,6 +473,10 @@ impl Segment {
            .into_event(&self.user, "Documents Deleted");
        let update_documents = std::mem::take(&mut self.update_documents_aggregator)
            .into_event(&self.user, "Documents Updated");
+        let get_fetch_documents = std::mem::take(&mut self.get_fetch_documents_aggregator)
+            .into_event(&self.user, "Documents Fetched GET");
+        let post_fetch_documents = std::mem::take(&mut self.post_fetch_documents_aggregator)
+            .into_event(&self.user, "Documents Fetched POST");
        let get_tasks =
            std::mem::take(&mut self.get_tasks_aggregator).into_event(&self.user, "Tasks Seen");
        let health =
@ -473,6 +500,12 @@ impl Segment {
        if let Some(update_documents) = update_documents {
            let _ = self.batcher.push(update_documents).await;
        }
+        if let Some(get_fetch_documents) = get_fetch_documents {
+            let _ = self.batcher.push(get_fetch_documents).await;
+        }
+        if let Some(post_fetch_documents) = post_fetch_documents {
+            let _ = self.batcher.push(post_fetch_documents).await;
+        }
        if let Some(get_tasks) = get_tasks {
            let _ = self.batcher.push(get_tasks).await;
        }
@ -949,6 +982,7 @@ pub struct DocumentsDeletionAggregator {
    per_document_id: bool,
    clear_all: bool,
    per_batch: bool,
+    per_filter: bool,
 }

 impl DocumentsDeletionAggregator {
@ -962,6 +996,7 @@ impl DocumentsDeletionAggregator {
            DocumentDeletionKind::PerDocumentId => ret.per_document_id = true,
            DocumentDeletionKind::ClearAll => ret.clear_all = true,
            DocumentDeletionKind::PerBatch => ret.per_batch = true,
+            DocumentDeletionKind::PerFilter => ret.per_filter = true,
        }

        ret
@ -981,6 +1016,7 @@ impl DocumentsDeletionAggregator {
        self.per_document_id |= other.per_document_id;
        self.clear_all |= other.clear_all;
        self.per_batch |= other.per_batch;
+        self.per_filter |= other.per_filter;
    }

    pub fn into_event(self, user: &User, event_name: &str) -> Option<Track> {
@ -1132,3 +1168,76 @@ impl HealthAggregator {
        })
    }
 }
+
+#[derive(Default, Serialize)]
+pub struct DocumentsFetchAggregator {
+    #[serde(skip)]
+    timestamp: Option<OffsetDateTime>,
+
+    // context
+    #[serde(rename = "user-agent")]
+    user_agents: HashSet<String>,
+
+    #[serde(rename = "requests.max_limit")]
+    total_received: usize,
+
+    // a call on ../documents/:doc_id
+    per_document_id: bool,
+    // if a filter was used
+    per_filter: bool,
+
+    // pagination
+    #[serde(rename = "pagination.max_limit")]
+    max_limit: usize,
+    #[serde(rename = "pagination.max_offset")]
+    max_offset: usize,
+}
+
+impl DocumentsFetchAggregator {
+    pub fn from_query(query: &DocumentFetchKind, request: &HttpRequest) -> Self {
+        let (limit, offset) = match query {
+            DocumentFetchKind::PerDocumentId => (1, 0),
+            DocumentFetchKind::Normal { limit, offset, .. } => (*limit, *offset),
+        };
+        Self {
+            timestamp: Some(OffsetDateTime::now_utc()),
+            user_agents: extract_user_agents(request).into_iter().collect(),
+            total_received: 1,
+            per_document_id: matches!(query, DocumentFetchKind::PerDocumentId),
+            per_filter: matches!(query, DocumentFetchKind::Normal { with_filter, .. } if *with_filter),
+            max_limit: limit,
+            max_offset: offset,
+        }
+    }
+
+    /// Aggregate one [DocumentsFetchAggregator] into another.
+    pub fn aggregate(&mut self, other: Self) {
+        if self.timestamp.is_none() {
+            self.timestamp = other.timestamp;
+        }
+        for user_agent in other.user_agents {
+            self.user_agents.insert(user_agent);
+        }
+
+        self.total_received = self.total_received.saturating_add(other.total_received);
+        self.per_document_id |= other.per_document_id;
+        self.per_filter |= other.per_filter;
+
+        self.max_limit = self.max_limit.max(other.max_limit);
+        self.max_offset = self.max_offset.max(other.max_offset);
+    }
+
+    pub fn into_event(self, user: &User, event_name: &str) -> Option<Track> {
+        // if we had no timestamp it means we never encountered any events and
+        // thus we don't need to send this event.
+        let timestamp = self.timestamp?;
+
+        Some(Track {
+            timestamp: Some(timestamp),
+            user: user.clone(),
+            event: event_name.to_string(),
+            properties: serde_json::to_value(self).ok()?,
+            ..Default::default()
+        })
+    }
+}
--- a/meilisearch/src/error.rs
+++ b/meilisearch/src/error.rs
@ -1,5 +1,6 @@
 use actix_web as aweb;
 use aweb::error::{JsonPayloadError, QueryPayloadError};
+use byte_unit::Byte;
 use meilisearch_types::document_formats::{DocumentFormatError, PayloadType};
 use meilisearch_types::error::{Code, ErrorCode, ResponseError};
 use meilisearch_types::index_uid::{IndexUid, IndexUidFormatError};
@ -20,12 +21,14 @@ pub enum MeilisearchHttpError {
    InvalidContentType(String, Vec<String>),
    #[error("Document `{0}` not found.")]
    DocumentNotFound(String),
+    #[error("Sending an empty filter is forbidden.")]
+    EmptyFilter,
    #[error("Invalid syntax for the filter parameter: `expected {}, found: {1}`.", .0.join(", "))]
    InvalidExpression(&'static [&'static str], Value),
    #[error("A {0} payload is missing.")]
    MissingPayload(PayloadType),
-    #[error("The provided payload reached the size limit.")]
-    PayloadTooLarge,
+    #[error("The provided payload reached the size limit. The maximum accepted payload size is {}.",  Byte::from_bytes(*.0 as u64).get_appropriate_unit(true))]
+    PayloadTooLarge(usize),
    #[error("Two indexes must be given for each swap. The list `[{}]` contains {} indexes.",
        .0.iter().map(|uid| format!("\"{uid}\"")).collect::<Vec<_>>().join(", "), .0.len()
    )]
@ -58,8 +61,9 @@ impl ErrorCode for MeilisearchHttpError {
            MeilisearchHttpError::MissingPayload(_) => Code::MissingPayload,
            MeilisearchHttpError::InvalidContentType(_, _) => Code::InvalidContentType,
            MeilisearchHttpError::DocumentNotFound(_) => Code::DocumentNotFound,
+            MeilisearchHttpError::EmptyFilter => Code::InvalidDocumentFilter,
            MeilisearchHttpError::InvalidExpression(_, _) => Code::InvalidSearchFilter,
-            MeilisearchHttpError::PayloadTooLarge => Code::PayloadTooLarge,
+            MeilisearchHttpError::PayloadTooLarge(_) => Code::PayloadTooLarge,
            MeilisearchHttpError::SwapIndexPayloadWrongLength(_) => Code::InvalidSwapIndexes,
            MeilisearchHttpError::IndexUid(e) => e.error_code(),
            MeilisearchHttpError::SerdeJson(_) => Code::Internal,
--- a/meilisearch/src/extractors/payload.rs
+++ b/meilisearch/src/extractors/payload.rs
@ -11,6 +11,7 @@ use crate::error::MeilisearchHttpError;
 pub struct Payload {
    payload: Decompress<dev::Payload>,
    limit: usize,
+    remaining: usize,
 }

 pub struct PayloadConfig {
@ -43,6 +44,7 @@ impl FromRequest for Payload {
        ready(Ok(Payload {
            payload: Decompress::from_headers(payload.take(), req.headers()),
            limit,
+            remaining: limit,
        }))
    }
 }
@ -54,12 +56,14 @@ impl Stream for Payload {
    fn poll_next(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Option<Self::Item>> {
        match Pin::new(&mut self.payload).poll_next(cx) {
            Poll::Ready(Some(result)) => match result {
-                Ok(bytes) => match self.limit.checked_sub(bytes.len()) {
+                Ok(bytes) => match self.remaining.checked_sub(bytes.len()) {
                    Some(new_limit) => {
-                        self.limit = new_limit;
+                        self.remaining = new_limit;
                        Poll::Ready(Some(Ok(bytes)))
                    }
-                    None => Poll::Ready(Some(Err(MeilisearchHttpError::PayloadTooLarge))),
+                    None => {
+                        Poll::Ready(Some(Err(MeilisearchHttpError::PayloadTooLarge(self.limit))))
+                    }
                },
                x => Poll::Ready(Some(x.map_err(MeilisearchHttpError::from))),
            },
--- a/meilisearch/src/lib.rs
+++ b/meilisearch/src/lib.rs
@ -232,6 +232,7 @@ fn open_or_create_database_unchecked(
            dumps_path: opt.dump_dir.clone(),
            task_db_size: opt.max_task_db_size.get_bytes() as usize,
            index_base_map_size: opt.max_index_size.get_bytes() as usize,
+            enable_mdb_writemap: opt.experimental_reduce_indexing_memory_usage,
            indexer_config: (&opt.indexer_options).try_into()?,
            autobatching_enabled: true,
            max_number_of_tasks: 1_000_000,
--- a/meilisearch/src/main.rs
+++ b/meilisearch/src/main.rs
@ -29,6 +29,11 @@ fn setup(opt: &Opt) -> anyhow::Result<()> {
 async fn main() -> anyhow::Result<()> {
    let (opt, config_read_from) = Opt::try_build()?;

+    anyhow::ensure!(
+        !(cfg!(windows) && opt.experimental_reduce_indexing_memory_usage),
+        "The `experimental-reduce-indexing-memory-usage` flag is not supported on Windows"
+    );
+
    setup(&opt)?;

    match (opt.env.as_ref(), &opt.master_key) {
@ -181,9 +186,9 @@ Anonymous telemetry:\t\"Enabled\""
    }

    eprintln!();
-    eprintln!("Documentation:\t\thttps://docs.meilisearch.com");
+    eprintln!("Documentation:\t\thttps://www.meilisearch.com/docs");
    eprintln!("Source code:\t\thttps://github.com/meilisearch/meilisearch");
-    eprintln!("Contact:\t\thttps://docs.meilisearch.com/resources/contact.html");
+    eprintln!("Discord:\t\thttps://discord.meilisearch.com");
    eprintln!();
 }

--- a/meilisearch/src/metrics.rs
+++ b/meilisearch/src/metrics.rs
@ -4,20 +4,32 @@ use prometheus::{
    register_int_gauge_vec, HistogramVec, IntCounterVec, IntGauge, IntGaugeVec,
 };

-const HTTP_RESPONSE_TIME_CUSTOM_BUCKETS: &[f64; 14] = &[
-    0.0005, 0.0008, 0.00085, 0.0009, 0.00095, 0.001, 0.00105, 0.0011, 0.00115, 0.0012, 0.0015,
-    0.002, 0.003, 1.0,
-];
+/// Create evenly distributed buckets
+fn create_buckets() -> [f64; 29] {
+    (0..10)
+        .chain((10..100).step_by(10))
+        .chain((100..=1000).step_by(100))
+        .map(|i| i as f64 / 1000.)
+        .collect::<Vec<_>>()
+        .try_into()
+        .unwrap()
+}

 lazy_static! {
-    pub static ref HTTP_REQUESTS_TOTAL: IntCounterVec = register_int_counter_vec!(
-        opts!("http_requests_total", "HTTP requests total"),
+    pub static ref HTTP_RESPONSE_TIME_CUSTOM_BUCKETS: [f64; 29] = create_buckets();
+    pub static ref MEILISEARCH_HTTP_REQUESTS_TOTAL: IntCounterVec = register_int_counter_vec!(
+        opts!("meilisearch_http_requests_total", "Meilisearch HTTP requests total"),
        &["method", "path"]
    )
    .expect("Can't create a metric");
    pub static ref MEILISEARCH_DB_SIZE_BYTES: IntGauge =
-        register_int_gauge!(opts!("meilisearch_db_size_bytes", "Meilisearch Db Size In Bytes"))
+        register_int_gauge!(opts!("meilisearch_db_size_bytes", "Meilisearch DB Size In Bytes"))
            .expect("Can't create a metric");
+    pub static ref MEILISEARCH_USED_DB_SIZE_BYTES: IntGauge = register_int_gauge!(opts!(
+        "meilisearch_used_db_size_bytes",
+        "Meilisearch Used DB Size In Bytes"
+    ))
+    .expect("Can't create a metric");
    pub static ref MEILISEARCH_INDEX_COUNT: IntGauge =
        register_int_gauge!(opts!("meilisearch_index_count", "Meilisearch Index Count"))
            .expect("Can't create a metric");
@ -26,11 +38,16 @@ lazy_static! {
        &["index"]
    )
    .expect("Can't create a metric");
-    pub static ref HTTP_RESPONSE_TIME_SECONDS: HistogramVec = register_histogram_vec!(
+    pub static ref MEILISEARCH_HTTP_RESPONSE_TIME_SECONDS: HistogramVec = register_histogram_vec!(
        "http_response_time_seconds",
        "HTTP response times",
        &["method", "path"],
        HTTP_RESPONSE_TIME_CUSTOM_BUCKETS.to_vec()
    )
    .expect("Can't create a metric");
+    pub static ref MEILISEARCH_NB_TASKS: IntGaugeVec = register_int_gauge_vec!(
+        opts!("meilisearch_nb_tasks", "Meilisearch Number of tasks"),
+        &["kind", "value"]
+    )
+    .expect("Can't create a metric");
 }
--- a/meilisearch/src/middleware.rs
+++ b/meilisearch/src/middleware.rs
@ -52,11 +52,11 @@ where
        if is_registered_resource {
            let request_method = req.method().to_string();
            histogram_timer = Some(
-                crate::metrics::HTTP_RESPONSE_TIME_SECONDS
+                crate::metrics::MEILISEARCH_HTTP_RESPONSE_TIME_SECONDS
                    .with_label_values(&[&request_method, request_path])
                    .start_timer(),
            );
-            crate::metrics::HTTP_REQUESTS_TOTAL
+            crate::metrics::MEILISEARCH_HTTP_REQUESTS_TOTAL
                .with_label_values(&[&request_method, request_path])
                .inc();
        }
--- a/meilisearch/src/option.rs
+++ b/meilisearch/src/option.rs
@ -48,6 +48,8 @@ const MEILI_IGNORE_DUMP_IF_DB_EXISTS: &str = "MEILI_IGNORE_DUMP_IF_DB_EXISTS";
 const MEILI_DUMP_DIR: &str = "MEILI_DUMP_DIR";
 const MEILI_LOG_LEVEL: &str = "MEILI_LOG_LEVEL";
 const MEILI_EXPERIMENTAL_ENABLE_METRICS: &str = "MEILI_EXPERIMENTAL_ENABLE_METRICS";
+const MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE: &str =
+    "MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE";

 const DEFAULT_CONFIG_FILE_PATH: &str = "./config.toml";
 const DEFAULT_DB_PATH: &str = "./data.ms";
@ -293,6 +295,11 @@ pub struct Opt {
    #[serde(default)]
    pub experimental_enable_metrics: bool,

+    /// Experimental RAM reduction during indexing, do not use in production, see: <https://github.com/meilisearch/product/discussions/652>
+    #[clap(long, env = MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE)]
+    #[serde(default)]
+    pub experimental_reduce_indexing_memory_usage: bool,
+
    #[serde(flatten)]
    #[clap(flatten)]
    pub indexer_options: IndexerOpts,
@ -385,6 +392,7 @@ impl Opt {
            #[cfg(all(not(debug_assertions), feature = "analytics"))]
            no_analytics,
            experimental_enable_metrics: enable_metrics_route,
+            experimental_reduce_indexing_memory_usage: reduce_indexing_memory_usage,
        } = self;
        export_to_env_if_not_present(MEILI_DB_PATH, db_path);
        export_to_env_if_not_present(MEILI_HTTP_ADDR, http_addr);
@ -426,6 +434,10 @@ impl Opt {
            MEILI_EXPERIMENTAL_ENABLE_METRICS,
            enable_metrics_route.to_string(),
        );
+        export_to_env_if_not_present(
+            MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE,
+            reduce_indexing_memory_usage.to_string(),
+        );
        indexer_options.export_to_env();
    }

--- a/meilisearch/src/routes/indexes/documents.rs
+++ b/meilisearch/src/routes/indexes/documents.rs
@ -4,19 +4,20 @@ use actix_web::http::header::CONTENT_TYPE;
 use actix_web::web::Data;
 use actix_web::{web, HttpMessage, HttpRequest, HttpResponse};
 use bstr::ByteSlice;
-use deserr::actix_web::AwebQueryParameter;
+use deserr::actix_web::{AwebJson, AwebQueryParameter};
 use deserr::Deserr;
 use futures::StreamExt;
 use index_scheduler::IndexScheduler;
 use log::debug;
 use meilisearch_types::deserr::query_params::Param;
-use meilisearch_types::deserr::DeserrQueryParamError;
+use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError};
 use meilisearch_types::document_formats::{read_csv, read_json, read_ndjson, PayloadType};
 use meilisearch_types::error::deserr_codes::*;
 use meilisearch_types::error::{Code, ResponseError};
 use meilisearch_types::heed::RoTxn;
 use meilisearch_types::index_uid::IndexUid;
 use meilisearch_types::milli::update::IndexDocumentsMethod;
+use meilisearch_types::milli::DocumentId;
 use meilisearch_types::star_or::OptionStarOrList;
 use meilisearch_types::tasks::KindWithContent;
 use meilisearch_types::{milli, Document, Index};
@ -28,7 +29,7 @@ use tempfile::tempfile;
 use tokio::fs::File;
 use tokio::io::{AsyncSeekExt, AsyncWriteExt, BufWriter};

-use crate::analytics::{Analytics, DocumentDeletionKind};
+use crate::analytics::{Analytics, DocumentDeletionKind, DocumentFetchKind};
 use crate::error::MeilisearchHttpError;
 use crate::error::PayloadError::ReceivePayload;
 use crate::extractors::authentication::policies::*;
@ -36,6 +37,7 @@ use crate::extractors::authentication::GuardedData;
 use crate::extractors::payload::Payload;
 use crate::extractors::sequential_extractor::SeqHandler;
 use crate::routes::{PaginationView, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT};
+use crate::search::parse_filter;

 static ACCEPTED_CONTENT_TYPE: Lazy<Vec<String>> = Lazy::new(|| {
    vec!["application/json".to_string(), "application/x-ndjson".to_string(), "text/csv".to_string()]
@ -66,13 +68,17 @@ pub struct DocumentParam {
 pub fn configure(cfg: &mut web::ServiceConfig) {
    cfg.service(
        web::resource("")
-            .route(web::get().to(SeqHandler(get_all_documents)))
+            .route(web::get().to(SeqHandler(get_documents)))
            .route(web::post().to(SeqHandler(replace_documents)))
            .route(web::put().to(SeqHandler(update_documents)))
            .route(web::delete().to(SeqHandler(clear_all_documents))),
    )
-    // this route needs to be before the /documents/{document_id} to match properly
-    .service(web::resource("/delete-batch").route(web::post().to(SeqHandler(delete_documents))))
+    // these routes need to be before the /documents/{document_id} to match properly
+    .service(
+        web::resource("/delete-batch").route(web::post().to(SeqHandler(delete_documents_batch))),
+    )
+    .service(web::resource("/delete").route(web::post().to(SeqHandler(delete_documents_by_filter))))
+    .service(web::resource("/fetch").route(web::post().to(SeqHandler(documents_by_query_post))))
    .service(
        web::resource("/{document_id}")
            .route(web::get().to(SeqHandler(get_document)))
@ -91,10 +97,14 @@ pub async fn get_document(
    index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_GET }>, Data<IndexScheduler>>,
    document_param: web::Path<DocumentParam>,
    params: AwebQueryParameter<GetDocument, DeserrQueryParamError>,
+    req: HttpRequest,
+    analytics: web::Data<dyn Analytics>,
 ) -> Result<HttpResponse, ResponseError> {
    let DocumentParam { index_uid, document_id } = document_param.into_inner();
    let index_uid = IndexUid::try_from(index_uid)?;

+    analytics.get_fetch_documents(&DocumentFetchKind::PerDocumentId, &req);
+
    let GetDocument { fields } = params.into_inner();
    let attributes_to_retrieve = fields.merge_star_and_none();

@ -127,29 +137,103 @@ pub async fn delete_document(

 #[derive(Debug, Deserr)]
 #[deserr(error = DeserrQueryParamError, rename_all = camelCase, deny_unknown_fields)]
-pub struct BrowseQuery {
+pub struct BrowseQueryGet {
    #[deserr(default, error = DeserrQueryParamError<InvalidDocumentOffset>)]
    offset: Param<usize>,
    #[deserr(default = Param(PAGINATION_DEFAULT_LIMIT), error = DeserrQueryParamError<InvalidDocumentLimit>)]
    limit: Param<usize>,
    #[deserr(default, error = DeserrQueryParamError<InvalidDocumentFields>)]
    fields: OptionStarOrList<String>,
+    #[deserr(default, error = DeserrQueryParamError<InvalidDocumentFilter>)]
+    filter: Option<String>,
 }

-pub async fn get_all_documents(
+#[derive(Debug, Deserr)]
+#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
+pub struct BrowseQuery {
+    #[deserr(default, error = DeserrJsonError<InvalidDocumentOffset>)]
+    offset: usize,
+    #[deserr(default = PAGINATION_DEFAULT_LIMIT, error = DeserrJsonError<InvalidDocumentLimit>)]
+    limit: usize,
+    #[deserr(default, error = DeserrJsonError<InvalidDocumentFields>)]
+    fields: Option<Vec<String>>,
+    #[deserr(default, error = DeserrJsonError<InvalidDocumentFilter>)]
+    filter: Option<Value>,
+}
+
+pub async fn documents_by_query_post(
    index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_GET }>, Data<IndexScheduler>>,
    index_uid: web::Path<String>,
-    params: AwebQueryParameter<BrowseQuery, DeserrQueryParamError>,
+    body: AwebJson<BrowseQuery, DeserrJsonError>,
+    req: HttpRequest,
+    analytics: web::Data<dyn Analytics>,
+) -> Result<HttpResponse, ResponseError> {
+    debug!("called with body: {:?}", body);
+
+    let body = body.into_inner();
+
+    analytics.post_fetch_documents(
+        &DocumentFetchKind::Normal {
+            with_filter: body.filter.is_some(),
+            limit: body.limit,
+            offset: body.offset,
+        },
+        &req,
+    );
+
+    documents_by_query(&index_scheduler, index_uid, body)
+}
+
+pub async fn get_documents(
+    index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_GET }>, Data<IndexScheduler>>,
+    index_uid: web::Path<String>,
+    params: AwebQueryParameter<BrowseQueryGet, DeserrQueryParamError>,
+    req: HttpRequest,
+    analytics: web::Data<dyn Analytics>,
+) -> Result<HttpResponse, ResponseError> {
+    debug!("called with params: {:?}", params);
+
+    let BrowseQueryGet { limit, offset, fields, filter } = params.into_inner();
+
+    let filter = match filter {
+        Some(f) => match serde_json::from_str(&f) {
+            Ok(v) => Some(v),
+            _ => Some(Value::String(f)),
+        },
+        None => None,
+    };
+
+    let query = BrowseQuery {
+        offset: offset.0,
+        limit: limit.0,
+        fields: fields.merge_star_and_none(),
+        filter,
+    };
+
+    analytics.get_fetch_documents(
+        &DocumentFetchKind::Normal {
+            with_filter: query.filter.is_some(),
+            limit: query.limit,
+            offset: query.offset,
+        },
+        &req,
+    );
+
+    documents_by_query(&index_scheduler, index_uid, query)
+}
+
+fn documents_by_query(
+    index_scheduler: &IndexScheduler,
+    index_uid: web::Path<String>,
+    query: BrowseQuery,
 ) -> Result<HttpResponse, ResponseError> {
    let index_uid = IndexUid::try_from(index_uid.into_inner())?;
-    debug!("called with params: {:?}", params);
-    let BrowseQuery { limit, offset, fields } = params.into_inner();
-    let attributes_to_retrieve = fields.merge_star_and_none();
+    let BrowseQuery { offset, limit, fields, filter } = query;

    let index = index_scheduler.index(&index_uid)?;
-    let (total, documents) = retrieve_documents(&index, offset.0, limit.0, attributes_to_retrieve)?;
+    let (total, documents) = retrieve_documents(&index, offset, limit, filter, fields)?;

-    let ret = PaginationView::new(offset.0, limit.0, total as usize, documents);
+    let ret = PaginationView::new(offset, limit, total as usize, documents);

    debug!("returns: {:?}", ret);
    Ok(HttpResponse::Ok().json(ret))
@ -373,7 +457,7 @@ async fn document_addition(
    Ok(task.into())
 }

-pub async fn delete_documents(
+pub async fn delete_documents_batch(
    index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_DELETE }>, Data<IndexScheduler>>,
    index_uid: web::Path<String>,
    body: web::Json<Vec<Value>>,
@ -399,6 +483,42 @@ pub async fn delete_documents(
    Ok(HttpResponse::Accepted().json(task))
 }

+#[derive(Debug, Deserr)]
+#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
+pub struct DocumentDeletionByFilter {
+    #[deserr(error = DeserrJsonError<InvalidDocumentFilter>, missing_field_error = DeserrJsonError::missing_document_filter)]
+    filter: Value,
+}
+
+pub async fn delete_documents_by_filter(
+    index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_DELETE }>, Data<IndexScheduler>>,
+    index_uid: web::Path<String>,
+    body: AwebJson<DocumentDeletionByFilter, DeserrJsonError>,
+    req: HttpRequest,
+    analytics: web::Data<dyn Analytics>,
+) -> Result<HttpResponse, ResponseError> {
+    debug!("called with params: {:?}", body);
+    let index_uid = IndexUid::try_from(index_uid.into_inner())?;
+    let index_uid = index_uid.into_inner();
+    let filter = body.into_inner().filter;
+
+    analytics.delete_documents(DocumentDeletionKind::PerFilter, &req);
+
+    // we ensure the filter is well formed before enqueuing it
+    || -> Result<_, ResponseError> {
+        Ok(crate::search::parse_filter(&filter)?.ok_or(MeilisearchHttpError::EmptyFilter)?)
+    }()
+    // and whatever was the error, the error code should always be an InvalidDocumentFilter
+    .map_err(|err| ResponseError::from_msg(err.message, Code::InvalidDocumentFilter))?;
+    let task = KindWithContent::DocumentDeletionByFilter { index_uid, filter_expr: filter };
+
+    let task: SummarizedTaskView =
+        tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
+
+    debug!("returns: {:?}", task);
+    Ok(HttpResponse::Accepted().json(task))
+}
+
 pub async fn clear_all_documents(
    index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_DELETE }>, Data<IndexScheduler>>,
    index_uid: web::Path<String>,
@ -416,14 +536,15 @@ pub async fn clear_all_documents(
    Ok(HttpResponse::Accepted().json(task))
 }

-fn all_documents<'a>(
-    index: &Index,
-    rtxn: &'a RoTxn,
+fn some_documents<'a, 't: 'a>(
+    index: &'a Index,
+    rtxn: &'t RoTxn,
+    doc_ids: impl IntoIterator<Item = DocumentId> + 'a,
 ) -> Result<impl Iterator<Item = Result<Document, ResponseError>> + 'a, ResponseError> {
    let fields_ids_map = index.fields_ids_map(rtxn)?;
    let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();

-    Ok(index.all_documents(rtxn)?.map(move |ret| {
+    Ok(index.iter_documents(rtxn, doc_ids)?.map(move |ret| {
        ret.map_err(ResponseError::from).and_then(|(_key, document)| -> Result<_, ResponseError> {
            Ok(milli::obkv_to_json(&all_fields, &fields_ids_map, document)?)
        })
@ -434,24 +555,50 @@ fn retrieve_documents<S: AsRef<str>>(
    index: &Index,
    offset: usize,
    limit: usize,
+    filter: Option<Value>,
    attributes_to_retrieve: Option<Vec<S>>,
 ) -> Result<(u64, Vec<Document>), ResponseError> {
    let rtxn = index.read_txn()?;
+    let filter = &filter;
+    let filter = if let Some(filter) = filter {
+        parse_filter(filter)
+            .map_err(|err| ResponseError::from_msg(err.to_string(), Code::InvalidDocumentFilter))?
+    } else {
+        None
+    };

-    let mut documents = Vec::new();
-    for document in all_documents(index, &rtxn)?.skip(offset).take(limit) {
-        let document = match &attributes_to_retrieve {
-            Some(attributes_to_retrieve) => permissive_json_pointer::select_values(
-                &document?,
-                attributes_to_retrieve.iter().map(|s| s.as_ref()),
-            ),
-            None => document?,
-        };
-        documents.push(document);
-    }
+    let candidates = if let Some(filter) = filter {
+        filter.evaluate(&rtxn, index).map_err(|err| match err {
+            milli::Error::UserError(milli::UserError::InvalidFilter(_)) => {
+                ResponseError::from_msg(err.to_string(), Code::InvalidDocumentFilter)
+            }
+            e => e.into(),
+        })?
+    } else {
+        index.documents_ids(&rtxn)?
+    };

-    let number_of_documents = index.number_of_documents(&rtxn)?;
-    Ok((number_of_documents, documents))
+    let (it, number_of_documents) = {
+        let number_of_documents = candidates.len();
+        (
+            some_documents(index, &rtxn, candidates.into_iter().skip(offset).take(limit))?,
+            number_of_documents,
+        )
+    };
+
+    let documents: Result<Vec<_>, ResponseError> = it
+        .map(|document| {
+            Ok(match &attributes_to_retrieve {
+                Some(attributes_to_retrieve) => permissive_json_pointer::select_values(
+                    &document?,
+                    attributes_to_retrieve.iter().map(|s| s.as_ref()),
+                ),
+                None => document?,
+            })
+        })
+        .collect();
+
+    Ok((number_of_documents, documents?))
 }

 fn retrieve_document<S: AsRef<str>>(
--- a/meilisearch/src/routes/metrics.rs
+++ b/meilisearch/src/routes/metrics.rs
@ -17,7 +17,7 @@ pub fn configure(config: &mut web::ServiceConfig) {

 pub async fn get_metrics(
    index_scheduler: GuardedData<ActionPolicy<{ actions::METRICS_GET }>, Data<IndexScheduler>>,
-    auth_controller: GuardedData<ActionPolicy<{ actions::METRICS_GET }>, Data<AuthController>>,
+    auth_controller: Data<AuthController>,
 ) -> Result<HttpResponse, ResponseError> {
    let auth_filters = index_scheduler.filters();
    if !auth_filters.all_indexes_authorized() {
@ -28,10 +28,10 @@ pub async fn get_metrics(
        return Err(error);
    }

-    let response =
-        create_all_stats((*index_scheduler).clone(), (*auth_controller).clone(), auth_filters)?;
+    let response = create_all_stats((*index_scheduler).clone(), auth_controller, auth_filters)?;

    crate::metrics::MEILISEARCH_DB_SIZE_BYTES.set(response.database_size as i64);
+    crate::metrics::MEILISEARCH_USED_DB_SIZE_BYTES.set(response.used_database_size as i64);
    crate::metrics::MEILISEARCH_INDEX_COUNT.set(response.indexes.len() as i64);

    for (index, value) in response.indexes.iter() {
@ -40,6 +40,14 @@ pub async fn get_metrics(
            .set(value.number_of_documents as i64);
    }

+    for (kind, value) in index_scheduler.get_stats()? {
+        for (value, count) in value {
+            crate::metrics::MEILISEARCH_NB_TASKS
+                .with_label_values(&[&kind, &value])
+                .set(count as i64);
+        }
+    }
+
    let encoder = TextEncoder::new();
    let mut buffer = vec![];
    encoder.encode(&prometheus::gather(), &mut buffer).expect("Failed to encode metrics");
--- a/meilisearch/src/routes/mod.rs
+++ b/meilisearch/src/routes/mod.rs
@ -231,6 +231,8 @@ pub async fn running() -> HttpResponse {
 #[serde(rename_all = "camelCase")]
 pub struct Stats {
    pub database_size: u64,
+    #[serde(skip)]
+    pub used_database_size: u64,
    #[serde(serialize_with = "time::serde::rfc3339::option::serialize")]
    pub last_update: Option<OffsetDateTime>,
    pub indexes: BTreeMap<String, indexes::IndexStats>,
@ -259,6 +261,7 @@ pub fn create_all_stats(
    let mut last_task: Option<OffsetDateTime> = None;
    let mut indexes = BTreeMap::new();
    let mut database_size = 0;
+    let mut used_database_size = 0;

    for index_uid in index_scheduler.index_names()? {
        // Accumulate the size of all indexes, even unauthorized ones, so
@ -266,6 +269,7 @@ pub fn create_all_stats(
        // See <https://github.com/meilisearch/meilisearch/pull/3541#discussion_r1126747643> for context.
        let stats = index_scheduler.index_stats(&index_uid)?;
        database_size += stats.inner_stats.database_size;
+        used_database_size += stats.inner_stats.used_database_size;

        if !filters.is_index_authorized(&index_uid) {
            continue;
@ -278,10 +282,14 @@ pub fn create_all_stats(
    }

    database_size += index_scheduler.size()?;
+    used_database_size += index_scheduler.used_size()?;
    database_size += auth_controller.size()?;
-    database_size += index_scheduler.compute_update_file_size()?;
+    used_database_size += auth_controller.used_size()?;
+    let update_file_size = index_scheduler.compute_update_file_size()?;
+    database_size += update_file_size;
+    used_database_size += update_file_size;

-    let stats = Stats { database_size, last_update: last_task, indexes };
+    let stats = Stats { database_size, used_database_size, last_update: last_task, indexes };
    Ok(stats)
 }

--- a/meilisearch/src/routes/tasks.rs
+++ b/meilisearch/src/routes/tasks.rs
@ -99,7 +99,7 @@ pub struct DetailsView {
    #[serde(skip_serializing_if = "Option::is_none")]
    pub deleted_tasks: Option<Option<u64>>,
    #[serde(skip_serializing_if = "Option::is_none")]
-    pub original_filter: Option<String>,
+    pub original_filter: Option<Option<String>>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub dump_uid: Option<Option<String>>,
    #[serde(skip_serializing_if = "Option::is_none")]
@ -131,8 +131,17 @@ impl From<Details> for DetailsView {
            } => DetailsView {
                provided_ids: Some(received_document_ids),
                deleted_documents: Some(deleted_documents),
+                original_filter: Some(None),
                ..DetailsView::default()
            },
+            Details::DocumentDeletionByFilter { original_filter, deleted_documents } => {
+                DetailsView {
+                    provided_ids: Some(0),
+                    original_filter: Some(Some(original_filter)),
+                    deleted_documents: Some(deleted_documents),
+                    ..DetailsView::default()
+                }
+            }
            Details::ClearAll { deleted_documents } => {
                DetailsView { deleted_documents: Some(deleted_documents), ..DetailsView::default() }
            }
@ -140,7 +149,7 @@ impl From<Details> for DetailsView {
                DetailsView {
                    matched_tasks: Some(matched_tasks),
                    canceled_tasks: Some(canceled_tasks),
-                    original_filter: Some(original_filter),
+                    original_filter: Some(Some(original_filter)),
                    ..DetailsView::default()
                }
            }
@ -148,7 +157,7 @@ impl From<Details> for DetailsView {
                DetailsView {
                    matched_tasks: Some(matched_tasks),
                    deleted_tasks: Some(deleted_tasks),
-                    original_filter: Some(original_filter),
+                    original_filter: Some(Some(original_filter)),
                    ..DetailsView::default()
                }
            }
--- a/meilisearch/src/search.rs
+++ b/meilisearch/src/search.rs
@ -745,7 +745,7 @@ fn format_value<A: AsRef<[u8]>>(
    }
 }

-fn parse_filter(facets: &Value) -> Result<Option<Filter>, MeilisearchHttpError> {
+pub(crate) fn parse_filter(facets: &Value) -> Result<Option<Filter>, MeilisearchHttpError> {
    match facets {
        Value::String(expr) => {
            let condition = Filter::from_str(expr)?;
--- a/meilisearch/tests/auth/authorization.rs
+++ b/meilisearch/tests/auth/authorization.rs
@ -16,8 +16,11 @@ pub static AUTHORIZATIONS: Lazy<HashMap<(&'static str, &'static str), HashSet<&'
            ("GET",     "/indexes/products/search") =>                         hashset!{"search", "*"},
            ("POST",    "/indexes/products/documents") =>                      hashset!{"documents.add", "documents.*", "*"},
            ("GET",     "/indexes/products/documents") =>                      hashset!{"documents.get", "documents.*", "*"},
+            ("POST",    "/indexes/products/documents/fetch") =>                hashset!{"documents.get", "documents.*", "*"},
            ("GET",     "/indexes/products/documents/0") =>                    hashset!{"documents.get", "documents.*", "*"},
            ("DELETE",  "/indexes/products/documents/0") =>                    hashset!{"documents.delete", "documents.*", "*"},
+            ("POST",    "/indexes/products/documents/delete-batch") =>         hashset!{"documents.delete", "documents.*", "*"},
+            ("POST",    "/indexes/products/documents/delete") =>               hashset!{"documents.delete", "documents.*", "*"},
            ("GET",     "/tasks") =>                                           hashset!{"tasks.get", "tasks.*", "*"},
            ("DELETE",  "/tasks") =>                                           hashset!{"tasks.delete", "tasks.*", "*"},
            ("GET",     "/tasks?indexUid=products") =>                         hashset!{"tasks.get", "tasks.*", "*"},
--- a/meilisearch/tests/common/index.rs
+++ b/meilisearch/tests/common/index.rs
@ -198,6 +198,11 @@ impl Index<'_> {
        self.service.get(url).await
    }

+    pub async fn get_document_by_filter(&self, payload: Value) -> (Value, StatusCode) {
+        let url = format!("/indexes/{}/documents/fetch", urlencode(self.uid.as_ref()));
+        self.service.post(url, payload).await
+    }
+
    pub async fn get_all_documents_raw(&self, options: &str) -> (Value, StatusCode) {
        let url = format!("/indexes/{}/documents{}", urlencode(self.uid.as_ref()), options);
        self.service.get(url).await
@ -225,6 +230,11 @@ impl Index<'_> {
        self.service.delete(url).await
    }

+    pub async fn delete_document_by_filter(&self, body: Value) -> (Value, StatusCode) {
+        let url = format!("/indexes/{}/documents/delete", urlencode(self.uid.as_ref()));
+        self.service.post_encoded(url, body, self.encoder).await
+    }
+
    pub async fn clear_all_documents(&self) -> (Value, StatusCode) {
        let url = format!("/indexes/{}/documents", urlencode(self.uid.as_ref()));
        self.service.delete(url).await
--- a/meilisearch/tests/documents/add_documents.rs
+++ b/meilisearch/tests/documents/add_documents.rs
@ -1781,7 +1781,7 @@ async fn error_add_documents_payload_size() {
    snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }),
        @r###"
    {
-      "message": "The provided payload reached the size limit.",
+      "message": "The provided payload reached the size limit. The maximum accepted payload size is 10.00 MiB.",
      "code": "payload_too_large",
      "type": "invalid_request",
      "link": "https://docs.meilisearch.com/errors#payload_too_large"
--- a/meilisearch/tests/documents/delete_documents.rs
+++ b/meilisearch/tests/documents/delete_documents.rs
@ -1,3 +1,4 @@
+use meili_snap::{json_string, snapshot};
 use serde_json::json;

 use crate::common::{GetAllDocumentsOptions, Server};
@ -135,3 +136,254 @@ async fn delete_no_document_batch() {
    assert_eq!(code, 200);
    assert_eq!(response["results"].as_array().unwrap().len(), 3);
 }
+
+#[actix_rt::test]
+async fn delete_document_by_filter() {
+    let server = Server::new().await;
+    let index = server.index("doggo");
+    index.update_settings_filterable_attributes(json!(["color"])).await;
+    index
+        .add_documents(
+            json!([
+                { "id": 0, "color": "red" },
+                { "id": 1, "color": "blue" },
+                { "id": 2, "color": "blue" },
+                { "id": 3 },
+            ]),
+            Some("id"),
+        )
+        .await;
+    index.wait_task(1).await;
+    let (response, code) =
+        index.delete_document_by_filter(json!({ "filter": "color = blue"})).await;
+    snapshot!(code, @"202 Accepted");
+    snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
+    {
+      "taskUid": 2,
+      "indexUid": "doggo",
+      "status": "enqueued",
+      "type": "documentDeletion",
+      "enqueuedAt": "[date]"
+    }
+    "###);
+
+    let response = index.wait_task(2).await;
+    snapshot!(json_string!(response, { ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]" }), @r###"
+    {
+      "uid": 2,
+      "indexUid": "doggo",
+      "status": "succeeded",
+      "type": "documentDeletion",
+      "canceledBy": null,
+      "details": {
+        "providedIds": 0,
+        "deletedDocuments": 2,
+        "originalFilter": "\"color = blue\""
+      },
+      "error": null,
+      "duration": "[duration]",
+      "enqueuedAt": "[date]",
+      "startedAt": "[date]",
+      "finishedAt": "[date]"
+    }
+    "###);
+
+    let (documents, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await;
+    snapshot!(code, @"200 OK");
+    snapshot!(json_string!(documents), @r###"
+    {
+      "results": [
+        {
+          "id": 0,
+          "color": "red"
+        },
+        {
+          "id": 3
+        }
+      ],
+      "offset": 0,
+      "limit": 20,
+      "total": 2
+    }
+    "###);
+
+    let (response, code) =
+        index.delete_document_by_filter(json!({ "filter": "color NOT EXISTS"})).await;
+    snapshot!(code, @"202 Accepted");
+    snapshot!(json_string!(response, { ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]" }), @r###"
+    {
+      "taskUid": 3,
+      "indexUid": "doggo",
+      "status": "enqueued",
+      "type": "documentDeletion",
+      "enqueuedAt": "[date]"
+    }
+    "###);
+
+    let response = index.wait_task(3).await;
+    snapshot!(json_string!(response, { ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]" }), @r###"
+    {
+      "uid": 3,
+      "indexUid": "doggo",
+      "status": "succeeded",
+      "type": "documentDeletion",
+      "canceledBy": null,
+      "details": {
+        "providedIds": 0,
+        "deletedDocuments": 1,
+        "originalFilter": "\"color NOT EXISTS\""
+      },
+      "error": null,
+      "duration": "[duration]",
+      "enqueuedAt": "[date]",
+      "startedAt": "[date]",
+      "finishedAt": "[date]"
+    }
+    "###);
+
+    let (documents, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await;
+    snapshot!(code, @"200 OK");
+    snapshot!(json_string!(documents), @r###"
+    {
+      "results": [
+        {
+          "id": 0,
+          "color": "red"
+        }
+      ],
+      "offset": 0,
+      "limit": 20,
+      "total": 1
+    }
+    "###);
+}
+
+#[actix_rt::test]
+async fn delete_document_by_complex_filter() {
+    let server = Server::new().await;
+    let index = server.index("doggo");
+    index.update_settings_filterable_attributes(json!(["color"])).await;
+    index
+        .add_documents(
+            json!([
+                { "id": 0, "color": "red" },
+                { "id": 1, "color": "blue" },
+                { "id": 2, "color": "blue" },
+                { "id": 3, "color": "green" },
+                { "id": 4 },
+            ]),
+            Some("id"),
+        )
+        .await;
+    index.wait_task(1).await;
+    let (response, code) = index
+        .delete_document_by_filter(
+            json!({ "filter": ["color != red", "color != green", "color EXISTS"] }),
+        )
+        .await;
+    snapshot!(code, @"202 Accepted");
+    snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
+    {
+      "taskUid": 2,
+      "indexUid": "doggo",
+      "status": "enqueued",
+      "type": "documentDeletion",
+      "enqueuedAt": "[date]"
+    }
+    "###);
+
+    let response = index.wait_task(2).await;
+    snapshot!(json_string!(response, { ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]" }), @r###"
+    {
+      "uid": 2,
+      "indexUid": "doggo",
+      "status": "succeeded",
+      "type": "documentDeletion",
+      "canceledBy": null,
+      "details": {
+        "providedIds": 0,
+        "deletedDocuments": 2,
+        "originalFilter": "[\"color != red\",\"color != green\",\"color EXISTS\"]"
+      },
+      "error": null,
+      "duration": "[duration]",
+      "enqueuedAt": "[date]",
+      "startedAt": "[date]",
+      "finishedAt": "[date]"
+    }
+    "###);
+
+    let (documents, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await;
+    snapshot!(code, @"200 OK");
+    snapshot!(json_string!(documents), @r###"
+    {
+      "results": [
+        {
+          "id": 0,
+          "color": "red"
+        },
+        {
+          "id": 3,
+          "color": "green"
+        },
+        {
+          "id": 4
+        }
+      ],
+      "offset": 0,
+      "limit": 20,
+      "total": 3
+    }
+    "###);
+
+    let (response, code) = index
+        .delete_document_by_filter(json!({ "filter": [["color = green", "color NOT EXISTS"]] }))
+        .await;
+    snapshot!(code, @"202 Accepted");
+    snapshot!(json_string!(response, { ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]" }), @r###"
+    {
+      "taskUid": 3,
+      "indexUid": "doggo",
+      "status": "enqueued",
+      "type": "documentDeletion",
+      "enqueuedAt": "[date]"
+    }
+    "###);
+
+    let response = index.wait_task(3).await;
+    snapshot!(json_string!(response, { ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]" }), @r###"
+    {
+      "uid": 3,
+      "indexUid": "doggo",
+      "status": "succeeded",
+      "type": "documentDeletion",
+      "canceledBy": null,
+      "details": {
+        "providedIds": 0,
+        "deletedDocuments": 4,
+        "originalFilter": "[[\"color = green\",\"color NOT EXISTS\"]]"
+      },
+      "error": null,
+      "duration": "[duration]",
+      "enqueuedAt": "[date]",
+      "startedAt": "[date]",
+      "finishedAt": "[date]"
+    }
+    "###);
+
+    let (documents, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await;
+    snapshot!(code, @"200 OK");
+    snapshot!(json_string!(documents), @r###"
+    {
+      "results": [
+        {
+          "id": 0,
+          "color": "red"
+        }
+      ],
+      "offset": 0,
+      "limit": 20,
+      "total": 1
+    }
+    "###);
+}
--- a/meilisearch/tests/documents/errors.rs
+++ b/meilisearch/tests/documents/errors.rs
@ -82,6 +82,111 @@ async fn get_all_documents_bad_limit() {
    "###);
 }

+#[actix_rt::test]
+async fn get_all_documents_bad_filter() {
+    let server = Server::new().await;
+    let index = server.index("test");
+
+    // Since the filter can't be parsed automatically by deserr, we have the wrong error message
+    // if the index does not exist: we could expect to get an error message about the invalid filter before
+    // the existence of the index is checked, but it is not the case.
+    let (response, code) = index.get_all_documents_raw("?filter").await;
+    snapshot!(code, @"404 Not Found");
+    snapshot!(json_string!(response), @r###"
+    {
+      "message": "Index `test` not found.",
+      "code": "index_not_found",
+      "type": "invalid_request",
+      "link": "https://docs.meilisearch.com/errors#index_not_found"
+    }
+    "###);
+
+    let (response, code) = index.get_all_documents_raw("?filter=doggo").await;
+    snapshot!(code, @"404 Not Found");
+    snapshot!(json_string!(response), @r###"
+    {
+      "message": "Index `test` not found.",
+      "code": "index_not_found",
+      "type": "invalid_request",
+      "link": "https://docs.meilisearch.com/errors#index_not_found"
+    }
+    "###);
+
+    let (response, code) = index.get_all_documents_raw("?filter=doggo=bernese").await;
+    snapshot!(code, @"404 Not Found");
+    snapshot!(json_string!(response), @r###"
+    {
+      "message": "Index `test` not found.",
+      "code": "index_not_found",
+      "type": "invalid_request",
+      "link": "https://docs.meilisearch.com/errors#index_not_found"
+    }
+    "###);
+
+    let (response, code) = index.create(None).await;
+    snapshot!(code, @"202 Accepted");
+    snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
+    {
+      "taskUid": 0,
+      "indexUid": "test",
+      "status": "enqueued",
+      "type": "indexCreation",
+      "enqueuedAt": "[date]"
+    }
+    "###);
+    let response = server.wait_task(0).await;
+    snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }), @r###"
+    {
+      "uid": 0,
+      "indexUid": "test",
+      "status": "succeeded",
+      "type": "indexCreation",
+      "canceledBy": null,
+      "details": {
+        "primaryKey": null
+      },
+      "error": null,
+      "duration": "[duration]",
+      "enqueuedAt": "[date]",
+      "startedAt": "[date]",
+      "finishedAt": "[date]"
+    }
+    "###);
+
+    let (response, code) = index.get_all_documents_raw("?filter").await;
+    snapshot!(code, @"200 OK");
+    snapshot!(json_string!(response), @r###"
+    {
+      "results": [],
+      "offset": 0,
+      "limit": 20,
+      "total": 0
+    }
+    "###);
+
+    let (response, code) = index.get_all_documents_raw("?filter=doggo").await;
+    snapshot!(code, @"400 Bad Request");
+    snapshot!(json_string!(response), @r###"
+    {
+      "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `doggo`.\n1:6 doggo",
+      "code": "invalid_document_filter",
+      "type": "invalid_request",
+      "link": "https://docs.meilisearch.com/errors#invalid_document_filter"
+    }
+    "###);
+
+    let (response, code) = index.get_all_documents_raw("?filter=doggo=bernese").await;
+    snapshot!(code, @"400 Bad Request");
+    snapshot!(json_string!(response), @r###"
+    {
+      "message": "Attribute `doggo` is not filterable. This index does not have configured filterable attributes.\n1:6 doggo=bernese",
+      "code": "invalid_document_filter",
+      "type": "invalid_request",
+      "link": "https://docs.meilisearch.com/errors#invalid_document_filter"
+    }
+    "###);
+}
+
 #[actix_rt::test]
 async fn delete_documents_batch() {
    let server = Server::new().await;
@ -418,3 +523,264 @@ async fn update_documents_csv_delimiter_with_bad_content_type() {
    }
    "###);
 }
+
+#[actix_rt::test]
+async fn delete_document_by_filter() {
+    let server = Server::new().await;
+    let index = server.index("doggo");
+
+    // send a bad payload type
+    let (response, code) = index.delete_document_by_filter(json!("hello")).await;
+    snapshot!(code, @"400 Bad Request");
+    snapshot!(json_string!(response), @r###"
+    {
+      "message": "Invalid value type: expected an object, but found a string: `\"hello\"`",
+      "code": "bad_request",
+      "type": "invalid_request",
+      "link": "https://docs.meilisearch.com/errors#bad_request"
+    }
+    "###);
+
+    // send bad payload type
+    let (response, code) = index.delete_document_by_filter(json!({ "filter": true })).await;
+    snapshot!(code, @"400 Bad Request");
+    snapshot!(json_string!(response), @r###"
+    {
+      "message": "Invalid syntax for the filter parameter: `expected String, Array, found: true`.",
+      "code": "invalid_document_filter",
+      "type": "invalid_request",
+      "link": "https://docs.meilisearch.com/errors#invalid_document_filter"
+    }
+    "###);
+
+    // send bad filter
+    let (response, code) = index.delete_document_by_filter(json!({ "filter": "hello"})).await;
+    snapshot!(code, @"400 Bad Request");
+    snapshot!(json_string!(response), @r###"
+    {
+      "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `hello`.\n1:6 hello",
+      "code": "invalid_document_filter",
+      "type": "invalid_request",
+      "link": "https://docs.meilisearch.com/errors#invalid_document_filter"
+    }
+    "###);
+
+    // send empty filter
+    let (response, code) = index.delete_document_by_filter(json!({ "filter": ""})).await;
+    snapshot!(code, @"400 Bad Request");
+    snapshot!(json_string!(response), @r###"
+    {
+      "message": "Sending an empty filter is forbidden.",
+      "code": "invalid_document_filter",
+      "type": "invalid_request",
+      "link": "https://docs.meilisearch.com/errors#invalid_document_filter"
+    }
+    "###);
+
+    // do not send any filter
+    let (response, code) = index.delete_document_by_filter(json!({})).await;
+    snapshot!(code, @"400 Bad Request");
+    snapshot!(json_string!(response), @r###"
+    {
+      "message": "Missing field `filter`",
+      "code": "missing_document_filter",
+      "type": "invalid_request",
+      "link": "https://docs.meilisearch.com/errors#missing_document_filter"
+    }
+    "###);
+
+    // index does not exists
+    let (response, code) =
+        index.delete_document_by_filter(json!({ "filter": "doggo = bernese"})).await;
+    snapshot!(code, @"202 Accepted");
+    let response = server.wait_task(response["taskUid"].as_u64().unwrap()).await;
+    snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]"}), @r###"
+    {
+      "uid": 0,
+      "indexUid": "doggo",
+      "status": "failed",
+      "type": "documentDeletion",
+      "canceledBy": null,
+      "details": {
+        "providedIds": 0,
+        "deletedDocuments": 0,
+        "originalFilter": "\"doggo = bernese\""
+      },
+      "error": {
+        "message": "Index `doggo` not found.",
+        "code": "index_not_found",
+        "type": "invalid_request",
+        "link": "https://docs.meilisearch.com/errors#index_not_found"
+      },
+      "duration": "[duration]",
+      "enqueuedAt": "[date]",
+      "startedAt": "[date]",
+      "finishedAt": "[date]"
+    }
+    "###);
+
+    let (response, code) = index.create(None).await;
+    snapshot!(code, @"202 Accepted");
+    server.wait_task(response["taskUid"].as_u64().unwrap()).await;
+
+    // no filterable are set
+    let (response, code) =
+        index.delete_document_by_filter(json!({ "filter": "doggo = bernese"})).await;
+    snapshot!(code, @"202 Accepted");
+    let response = server.wait_task(response["taskUid"].as_u64().unwrap()).await;
+    snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]"}), @r###"
+    {
+      "uid": 2,
+      "indexUid": "doggo",
+      "status": "failed",
+      "type": "documentDeletion",
+      "canceledBy": null,
+      "details": {
+        "providedIds": 0,
+        "deletedDocuments": 0,
+        "originalFilter": "\"doggo = bernese\""
+      },
+      "error": {
+        "message": "Attribute `doggo` is not filterable. This index does not have configured filterable attributes.\n1:6 doggo = bernese",
+        "code": "invalid_document_filter",
+        "type": "invalid_request",
+        "link": "https://docs.meilisearch.com/errors#invalid_document_filter"
+      },
+      "duration": "[duration]",
+      "enqueuedAt": "[date]",
+      "startedAt": "[date]",
+      "finishedAt": "[date]"
+    }
+    "###);
+
+    let (response, code) = index.update_settings_filterable_attributes(json!(["doggo"])).await;
+    snapshot!(code, @"202 Accepted");
+    server.wait_task(response["taskUid"].as_u64().unwrap()).await;
+
+    // not filterable while there is a filterable attribute
+    let (response, code) =
+        index.delete_document_by_filter(json!({ "filter": "catto = jorts"})).await;
+    snapshot!(code, @"202 Accepted");
+    let response = server.wait_task(response["taskUid"].as_u64().unwrap()).await;
+    snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]"}), @r###"
+    {
+      "uid": 4,
+      "indexUid": "doggo",
+      "status": "failed",
+      "type": "documentDeletion",
+      "canceledBy": null,
+      "details": {
+        "providedIds": 0,
+        "deletedDocuments": 0,
+        "originalFilter": "\"catto = jorts\""
+      },
+      "error": {
+        "message": "Attribute `catto` is not filterable. Available filterable attributes are: `doggo`.\n1:6 catto = jorts",
+        "code": "invalid_document_filter",
+        "type": "invalid_request",
+        "link": "https://docs.meilisearch.com/errors#invalid_document_filter"
+      },
+      "duration": "[duration]",
+      "enqueuedAt": "[date]",
+      "startedAt": "[date]",
+      "finishedAt": "[date]"
+    }
+    "###);
+}
+
+#[actix_rt::test]
+async fn fetch_document_by_filter() {
+    let server = Server::new().await;
+    let index = server.index("doggo");
+    index.update_settings_filterable_attributes(json!(["color"])).await;
+    index
+        .add_documents(
+            json!([
+                { "id": 0, "color": "red" },
+                { "id": 1, "color": "blue" },
+                { "id": 2, "color": "blue" },
+                { "id": 3 },
+            ]),
+            Some("id"),
+        )
+        .await;
+    index.wait_task(1).await;
+
+    let (response, code) = index.get_document_by_filter(json!(null)).await;
+    snapshot!(code, @"400 Bad Request");
+    snapshot!(json_string!(response), @r###"
+    {
+      "message": "Invalid value type: expected an object, but found null",
+      "code": "bad_request",
+      "type": "invalid_request",
+      "link": "https://docs.meilisearch.com/errors#bad_request"
+    }
+    "###);
+
+    let (response, code) = index.get_document_by_filter(json!({ "offset": "doggo" })).await;
+    snapshot!(code, @"400 Bad Request");
+    snapshot!(json_string!(response), @r###"
+    {
+      "message": "Invalid value type at `.offset`: expected a positive integer, but found a string: `\"doggo\"`",
+      "code": "invalid_document_offset",
+      "type": "invalid_request",
+      "link": "https://docs.meilisearch.com/errors#invalid_document_offset"
+    }
+    "###);
+
+    let (response, code) = index.get_document_by_filter(json!({ "limit": "doggo" })).await;
+    snapshot!(code, @"400 Bad Request");
+    snapshot!(json_string!(response), @r###"
+    {
+      "message": "Invalid value type at `.limit`: expected a positive integer, but found a string: `\"doggo\"`",
+      "code": "invalid_document_limit",
+      "type": "invalid_request",
+      "link": "https://docs.meilisearch.com/errors#invalid_document_limit"
+    }
+    "###);
+
+    let (response, code) = index.get_document_by_filter(json!({ "fields": "doggo" })).await;
+    snapshot!(code, @"400 Bad Request");
+    snapshot!(json_string!(response), @r###"
+    {
+      "message": "Invalid value type at `.fields`: expected an array, but found a string: `\"doggo\"`",
+      "code": "invalid_document_fields",
+      "type": "invalid_request",
+      "link": "https://docs.meilisearch.com/errors#invalid_document_fields"
+    }
+    "###);
+
+    let (response, code) = index.get_document_by_filter(json!({ "filter": true })).await;
+    snapshot!(code, @"400 Bad Request");
+    snapshot!(json_string!(response), @r###"
+    {
+      "message": "Invalid syntax for the filter parameter: `expected String, Array, found: true`.",
+      "code": "invalid_document_filter",
+      "type": "invalid_request",
+      "link": "https://docs.meilisearch.com/errors#invalid_document_filter"
+    }
+    "###);
+
+    let (response, code) = index.get_document_by_filter(json!({ "filter": "cool doggo" })).await;
+    snapshot!(code, @"400 Bad Request");
+    snapshot!(json_string!(response), @r###"
+    {
+      "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `cool doggo`.\n1:11 cool doggo",
+      "code": "invalid_document_filter",
+      "type": "invalid_request",
+      "link": "https://docs.meilisearch.com/errors#invalid_document_filter"
+    }
+    "###);
+
+    let (response, code) =
+        index.get_document_by_filter(json!({ "filter": "doggo = bernese" })).await;
+    snapshot!(code, @"400 Bad Request");
+    snapshot!(json_string!(response), @r###"
+    {
+      "message": "Attribute `doggo` is not filterable. Available filterable attributes are: `color`.\n1:6 doggo = bernese",
+      "code": "invalid_document_filter",
+      "type": "invalid_request",
+      "link": "https://docs.meilisearch.com/errors#invalid_document_filter"
+    }
+    "###);
+}
--- a/meilisearch/tests/documents/get_documents.rs
+++ b/meilisearch/tests/documents/get_documents.rs
@ -1,5 +1,6 @@
 use actix_web::test;
 use http::header::ACCEPT_ENCODING;
+use meili_snap::*;
 use serde_json::{json, Value};
 use urlencoding::encode as urlencode;

@ -378,3 +379,164 @@ async fn get_documents_displayed_attributes_is_ignored() {
    assert_eq!(response.as_object().unwrap().keys().count(), 16);
    assert!(response.as_object().unwrap().get("gender").is_some());
 }
+
+#[actix_rt::test]
+async fn get_document_by_filter() {
+    let server = Server::new().await;
+    let index = server.index("doggo");
+    index.update_settings_filterable_attributes(json!(["color"])).await;
+    index
+        .add_documents(
+            json!([
+                { "id": 0, "color": "red" },
+                { "id": 1, "color": "blue" },
+                { "id": 2, "color": "blue" },
+                { "id": 3 },
+            ]),
+            Some("id"),
+        )
+        .await;
+    index.wait_task(1).await;
+
+    let (response, code) = index.get_document_by_filter(json!({})).await;
+    let (response2, code2) = index.get_all_documents_raw("").await;
+    snapshot!(code, @"200 OK");
+    snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
+    {
+      "results": [
+        {
+          "id": 0,
+          "color": "red"
+        },
+        {
+          "id": 1,
+          "color": "blue"
+        },
+        {
+          "id": 2,
+          "color": "blue"
+        },
+        {
+          "id": 3
+        }
+      ],
+      "offset": 0,
+      "limit": 20,
+      "total": 4
+    }
+    "###);
+    assert_eq!(code, code2);
+    assert_eq!(response, response2);
+
+    let (response, code) = index.get_document_by_filter(json!({ "filter": "color = blue" })).await;
+    let (response2, code2) = index.get_all_documents_raw("?filter=color=blue").await;
+    snapshot!(code, @"200 OK");
+    snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
+    {
+      "results": [
+        {
+          "id": 1,
+          "color": "blue"
+        },
+        {
+          "id": 2,
+          "color": "blue"
+        }
+      ],
+      "offset": 0,
+      "limit": 20,
+      "total": 2
+    }
+    "###);
+    assert_eq!(code, code2);
+    assert_eq!(response, response2);
+
+    let (response, code) = index
+        .get_document_by_filter(json!({ "offset": 1, "limit": 1, "filter": "color != blue" }))
+        .await;
+    let (response2, code2) =
+        index.get_all_documents_raw("?filter=color!=blue&offset=1&limit=1").await;
+    snapshot!(code, @"200 OK");
+    snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
+    {
+      "results": [
+        {
+          "id": 3
+        }
+      ],
+      "offset": 1,
+      "limit": 1,
+      "total": 2
+    }
+    "###);
+    assert_eq!(code, code2);
+    assert_eq!(response, response2);
+
+    let (response, code) = index
+        .get_document_by_filter(
+            json!({ "limit": 1, "filter": "color != blue", "fields": ["color"] }),
+        )
+        .await;
+    let (response2, code2) =
+        index.get_all_documents_raw("?limit=1&filter=color!=blue&fields=color").await;
+    snapshot!(code, @"200 OK");
+    snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
+    {
+      "results": [
+        {
+          "color": "red"
+        }
+      ],
+      "offset": 0,
+      "limit": 1,
+      "total": 2
+    }
+    "###);
+    assert_eq!(code, code2);
+    assert_eq!(response, response2);
+
+    // Now testing more complex filter that the get route can't represent
+
+    let (response, code) =
+        index.get_document_by_filter(json!({ "filter": [["color = blue", "color = red"]] })).await;
+    snapshot!(code, @"200 OK");
+    snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
+    {
+      "results": [
+        {
+          "id": 0,
+          "color": "red"
+        },
+        {
+          "id": 1,
+          "color": "blue"
+        },
+        {
+          "id": 2,
+          "color": "blue"
+        }
+      ],
+      "offset": 0,
+      "limit": 20,
+      "total": 3
+    }
+    "###);
+
+    let (response, code) = index
+        .get_document_by_filter(json!({ "filter": [["color != blue"], "color EXISTS"] }))
+        .await;
+    snapshot!(code, @"200 OK");
+    snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
+    {
+      "results": [
+        {
+          "id": 0,
+          "color": "red"
+        }
+      ],
+      "offset": 0,
+      "limit": 20,
+      "total": 1
+    }
+    "###);
+}
--- a/meilisearch/tests/search/errors.rs
+++ b/meilisearch/tests/search/errors.rs
@ -946,7 +946,7 @@ async fn sort_unset_ranking_rule() {
    index.wait_task(1).await;

    let expected_response = json!({
-        "message": "The sort ranking rule must be specified in the ranking rules settings to use the sort parameter at search time.",
+        "message": "You must specify where `sort` is listed in the rankingRules setting to use the sort parameter at search time.",
        "code": "invalid_search_sort",
        "type": "invalid_request",
        "link": "https://docs.meilisearch.com/errors#invalid_search_sort"
--- a/meilisearch/tests/tasks/mod.rs
+++ b/meilisearch/tests/tasks/mod.rs
@ -413,7 +413,7 @@ async fn test_summarized_document_addition_or_update() {
 }

 #[actix_web::test]
-async fn test_summarized_delete_batch() {
+async fn test_summarized_delete_documents_by_batch() {
    let server = Server::new().await;
    let index = server.index("test");
    index.delete_batch(vec![1, 2, 3]).await;
@ -430,7 +430,8 @@ async fn test_summarized_delete_batch() {
      "canceledBy": null,
      "details": {
        "providedIds": 3,
-        "deletedDocuments": 0
+        "deletedDocuments": 0,
+        "originalFilter": null
      },
      "error": {
        "message": "Index `test` not found.",
@ -460,7 +461,8 @@ async fn test_summarized_delete_batch() {
      "canceledBy": null,
      "details": {
        "providedIds": 1,
-        "deletedDocuments": 0
+        "deletedDocuments": 0,
+        "originalFilter": null
      },
      "error": null,
      "duration": "[duration]",
@ -472,7 +474,100 @@ async fn test_summarized_delete_batch() {
 }

 #[actix_web::test]
-async fn test_summarized_delete_document() {
+async fn test_summarized_delete_documents_by_filter() {
+    let server = Server::new().await;
+    let index = server.index("test");
+
+    index.delete_document_by_filter(json!({ "filter": "doggo = bernese" })).await;
+    index.wait_task(0).await;
+    let (task, _) = index.get_task(0).await;
+    assert_json_snapshot!(task,
+        { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" },
+        @r###"
+    {
+      "uid": 0,
+      "indexUid": "test",
+      "status": "failed",
+      "type": "documentDeletion",
+      "canceledBy": null,
+      "details": {
+        "providedIds": 0,
+        "deletedDocuments": 0,
+        "originalFilter": "\"doggo = bernese\""
+      },
+      "error": {
+        "message": "Index `test` not found.",
+        "code": "index_not_found",
+        "type": "invalid_request",
+        "link": "https://docs.meilisearch.com/errors#index_not_found"
+      },
+      "duration": "[duration]",
+      "enqueuedAt": "[date]",
+      "startedAt": "[date]",
+      "finishedAt": "[date]"
+    }
+    "###);
+
+    index.create(None).await;
+    index.delete_document_by_filter(json!({ "filter": "doggo = bernese" })).await;
+    index.wait_task(2).await;
+    let (task, _) = index.get_task(2).await;
+    assert_json_snapshot!(task,
+        { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" },
+        @r###"
+    {
+      "uid": 2,
+      "indexUid": "test",
+      "status": "failed",
+      "type": "documentDeletion",
+      "canceledBy": null,
+      "details": {
+        "providedIds": 0,
+        "deletedDocuments": 0,
+        "originalFilter": "\"doggo = bernese\""
+      },
+      "error": {
+        "message": "Attribute `doggo` is not filterable. This index does not have configured filterable attributes.\n1:6 doggo = bernese",
+        "code": "invalid_document_filter",
+        "type": "invalid_request",
+        "link": "https://docs.meilisearch.com/errors#invalid_document_filter"
+      },
+      "duration": "[duration]",
+      "enqueuedAt": "[date]",
+      "startedAt": "[date]",
+      "finishedAt": "[date]"
+    }
+    "###);
+
+    index.update_settings(json!({ "filterableAttributes": ["doggo"] })).await;
+    index.delete_document_by_filter(json!({ "filter": "doggo = bernese" })).await;
+    index.wait_task(4).await;
+    let (task, _) = index.get_task(4).await;
+    assert_json_snapshot!(task,
+        { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" },
+        @r###"
+    {
+      "uid": 4,
+      "indexUid": "test",
+      "status": "succeeded",
+      "type": "documentDeletion",
+      "canceledBy": null,
+      "details": {
+        "providedIds": 0,
+        "deletedDocuments": 0,
+        "originalFilter": "\"doggo = bernese\""
+      },
+      "error": null,
+      "duration": "[duration]",
+      "enqueuedAt": "[date]",
+      "startedAt": "[date]",
+      "finishedAt": "[date]"
+    }
+    "###);
+}
+
+#[actix_web::test]
+async fn test_summarized_delete_document_by_id() {
    let server = Server::new().await;
    let index = server.index("test");
    index.delete_document(1).await;
@ -489,7 +584,8 @@ async fn test_summarized_delete_document() {
      "canceledBy": null,
      "details": {
        "providedIds": 1,
-        "deletedDocuments": 0
+        "deletedDocuments": 0,
+        "originalFilter": null
      },
      "error": {
        "message": "Index `test` not found.",
@ -519,7 +615,8 @@ async fn test_summarized_delete_document() {
      "canceledBy": null,
      "details": {
        "providedIds": 1,
-        "deletedDocuments": 0
+        "deletedDocuments": 0,
+        "originalFilter": null
      },
      "error": null,
      "duration": "[duration]",
--- a/milli/Cargo.toml
+++ b/milli/Cargo.toml
@ -25,8 +25,13 @@ flatten-serde-json = { path = "../flatten-serde-json" }
 fst = "0.4.7"
 fxhash = "0.2.1"
 geoutils = "0.5.1"
-grenad = { version = "0.4.4", default-features = false, features = ["tempfile"] }
-heed = { git = "https://github.com/meilisearch/heed", tag = "v0.12.5", default-features = false, features = ["lmdb", "sync-read-txn"] }
+grenad = { version = "0.4.4", default-features = false, features = [
+    "tempfile",
+] }
+heed = { git = "https://github.com/meilisearch/heed", tag = "v0.12.6", default-features = false, features = [
+    "lmdb",
+    "sync-read-txn",
+] }
 json-depth-checker = { path = "../json-depth-checker" }
 levenshtein_automata = { version = "0.2.1", features = ["fst_automaton"] }
 memmap2 = "0.5.10"
@ -39,12 +44,17 @@ rstar = { version = "0.10.0", features = ["serde"] }
 serde = { version = "1.0.160", features = ["derive"] }
 serde_json = { version = "1.0.95", features = ["preserve_order"] }
 slice-group-by = "0.3.0"
-smallstr =  { version = "0.3.0", features = ["serde"] }
+smallstr = { version = "0.3.0", features = ["serde"] }
 smallvec = "1.10.0"
 smartstring = "1.0.1"
 tempfile = "3.5.0"
 thiserror = "1.0.40"
-time = { version = "0.3.20", features = ["serde-well-known", "formatting", "parsing", "macros"] }
+time = { version = "0.3.20", features = [
+    "serde-well-known",
+    "formatting",
+    "parsing",
+    "macros",
+] }
 uuid = { version = "1.3.1", features = ["v4"] }

 filter-parser = { path = "../filter-parser" }
@ -63,13 +73,10 @@ big_s = "1.0.2"
 insta = "1.29.0"
 maplit = "1.0.2"
 md5 = "0.7.0"
-rand = {version = "0.8.5", features = ["small_rng"] }
-
-[target.'cfg(fuzzing)'.dev-dependencies]
-fuzzcheck = "0.12.1"
+rand = { version = "0.8.5", features = ["small_rng"] }

 [features]
-default = [ "charabia/default" ]
+all-tokenizations = ["charabia/default"]

 # Use POSIX semaphores instead of SysV semaphores in LMDB
 # For more information on this feature, see heed's Cargo.toml
--- a/milli/src/documents/mod.rs
+++ b/milli/src/documents/mod.rs
@ -111,7 +111,6 @@ pub enum Error {
    Io(#[from] io::Error),
 }

-#[cfg(test)]
 pub fn objects_from_json_value(json: serde_json::Value) -> Vec<crate::Object> {
    let documents = match json {
        object @ serde_json::Value::Object(_) => vec![object],
@ -141,7 +140,6 @@ macro_rules! documents {
    }};
 }

-#[cfg(test)]
 pub fn documents_batch_reader_from_objects(
    objects: impl IntoIterator<Item = Object>,
 ) -> DocumentsBatchReader<std::io::Cursor<Vec<u8>>> {
--- a/milli/src/error.rs
+++ b/milli/src/error.rs
@ -112,6 +112,8 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco
    InvalidGeoField(#[from] GeoError),
    #[error("{0}")]
    InvalidFilter(String),
+    #[error("Invalid type for filter subexpression: `expected {}, found: {1}`.", .0.join(", "))]
+    InvalidFilterExpression(&'static [&'static str], Value),
    #[error("Attribute `{}` is not sortable. {}",
        .field,
        match .valid_fields.is_empty() {
@ -124,7 +126,7 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco
    InvalidSortableAttribute { field: String, valid_fields: BTreeSet<String> },
    #[error("{}", HeedError::BadOpenOptions)]
    InvalidLmdbOpenOptions,
-    #[error("The sort ranking rule must be specified in the ranking rules settings to use the sort parameter at search time.")]
+    #[error("You must specify where `sort` is listed in the rankingRules setting to use the sort parameter at search time.")]
    SortRankingRuleMissing,
    #[error("The database file is in an invalid state.")]
    InvalidStoreFile,
--- a/milli/src/external_documents_ids.rs
+++ b/milli/src/external_documents_ids.rs
@ -106,22 +106,30 @@ impl<'a> ExternalDocumentsIds<'a> {
        map
    }

+    /// Return an fst of the combined hard and soft deleted ID.
+    pub fn to_fst<'b>(&'b self) -> fst::Result<Cow<'b, fst::Map<Cow<'a, [u8]>>>> {
+        if self.soft.is_empty() {
+            return Ok(Cow::Borrowed(&self.hard));
+        }
+        let union_op = self.hard.op().add(&self.soft).r#union();
+
+        let mut iter = union_op.into_stream();
+        let mut new_hard_builder = fst::MapBuilder::memory();
+        while let Some((external_id, marked_docids)) = iter.next() {
+            let value = indexed_last_value(marked_docids).unwrap();
+            if value != DELETED_ID {
+                new_hard_builder.insert(external_id, value)?;
+            }
+        }
+
+        drop(iter);
+
+        Ok(Cow::Owned(new_hard_builder.into_map().map_data(Cow::Owned)?))
+    }
+
    fn merge_soft_into_hard(&mut self) -> fst::Result<()> {
        if self.soft.len() >= self.hard.len() / 2 {
-            let union_op = self.hard.op().add(&self.soft).r#union();
-
-            let mut iter = union_op.into_stream();
-            let mut new_hard_builder = fst::MapBuilder::memory();
-            while let Some((external_id, marked_docids)) = iter.next() {
-                let value = indexed_last_value(marked_docids).unwrap();
-                if value != DELETED_ID {
-                    new_hard_builder.insert(external_id, value)?;
-                }
-            }
-
-            drop(iter);
-
-            self.hard = new_hard_builder.into_map().map_data(Cow::Owned)?;
+            self.hard = self.to_fst()?.into_owned();
            self.soft = fst::Map::default().map_data(Cow::Owned)?;
        }

--- a/milli/src/heed_codec/roaring_bitmap/cbo_roaring_bitmap_codec.rs
+++ b/milli/src/heed_codec/roaring_bitmap/cbo_roaring_bitmap_codec.rs
@ -49,7 +49,7 @@ impl CboRoaringBitmapCodec {
        } else {
            // Otherwise, it means we used the classic RoaringBitmapCodec and
            // that the header takes threshold integers.
-            RoaringBitmap::deserialize_from(bytes)
+            RoaringBitmap::deserialize_unchecked_from(bytes)
        }
    }

@ -69,7 +69,7 @@ impl CboRoaringBitmapCodec {
                    vec.push(integer);
                }
            } else {
-                roaring |= RoaringBitmap::deserialize_from(bytes.as_ref())?;
+                roaring |= RoaringBitmap::deserialize_unchecked_from(bytes.as_ref())?;
            }
        }

--- a/milli/src/heed_codec/roaring_bitmap/roaring_bitmap_codec.rs
+++ b/milli/src/heed_codec/roaring_bitmap/roaring_bitmap_codec.rs
@ -8,7 +8,7 @@ impl heed::BytesDecode<'_> for RoaringBitmapCodec {
    type DItem = RoaringBitmap;

    fn bytes_decode(bytes: &[u8]) -> Option<Self::DItem> {
-        RoaringBitmap::deserialize_from(bytes).ok()
+        RoaringBitmap::deserialize_unchecked_from(bytes).ok()
    }
 }

--- a/milli/src/index.rs
+++ b/milli/src/index.rs
@ -21,10 +21,9 @@ use crate::heed_codec::facet::{
 };
 use crate::heed_codec::{ScriptLanguageCodec, StrBEU16Codec, StrRefCodec};
 use crate::{
-    default_criteria, BEU32StrCodec, BoRoaringBitmapCodec, CboRoaringBitmapCodec, Criterion,
-    DocumentId, ExternalDocumentsIds, FacetDistribution, FieldDistribution, FieldId,
-    FieldIdWordCountCodec, GeoPoint, ObkvCodec, Result, RoaringBitmapCodec, RoaringBitmapLenCodec,
-    Search, U8StrStrCodec, BEU16, BEU32,
+    default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds,
+    FacetDistribution, FieldDistribution, FieldId, FieldIdWordCountCodec, GeoPoint, ObkvCodec,
+    Result, RoaringBitmapCodec, RoaringBitmapLenCodec, Search, U8StrStrCodec, BEU16, BEU32,
 };

 pub const DEFAULT_MIN_WORD_LEN_ONE_TYPO: u8 = 5;
@ -94,10 +93,10 @@ pub mod db_name {
 #[derive(Clone)]
 pub struct Index {
    /// The LMDB environment which this index is associated with.
-    pub(crate) env: heed::Env,
+    pub env: heed::Env,

    /// Contains many different types (e.g. the fields ids map).
-    pub(crate) main: PolyDatabase,
+    pub main: PolyDatabase,

    /// A word and all the documents ids containing the word.
    pub word_docids: Database<Str, RoaringBitmapCodec>,
@ -111,9 +110,6 @@ pub struct Index {
    /// A prefix of word and all the documents ids containing this prefix, from attributes for which typos are not allowed.
    pub exact_word_prefix_docids: Database<Str, RoaringBitmapCodec>,

-    /// Maps a word and a document id (u32) to all the positions where the given word appears.
-    pub docid_word_positions: Database<BEU32StrCodec, BoRoaringBitmapCodec>,
-
    /// Maps the proximity between a pair of words with all the docids where this relation appears.
    pub word_pair_proximity_docids: Database<U8StrStrCodec, CboRoaringBitmapCodec>,
    /// Maps the proximity between a pair of word and prefix with all the docids where this relation appears.
@ -154,7 +150,7 @@ pub struct Index {
    pub field_id_docid_facet_strings: Database<FieldDocIdFacetStringCodec, Str>,

    /// Maps the document id to the document as an obkv store.
-    pub(crate) documents: Database<OwnedType<BEU32>, ObkvCodec>,
+    pub documents: Database<OwnedType<BEU32>, ObkvCodec>,
 }

 impl Index {
@ -170,33 +166,45 @@ impl Index {
        unsafe { options.flag(Flags::MdbAlwaysFreePages) };

        let env = options.open(path)?;
-        let main = env.create_poly_database(Some(MAIN))?;
-        let word_docids = env.create_database(Some(WORD_DOCIDS))?;
-        let exact_word_docids = env.create_database(Some(EXACT_WORD_DOCIDS))?;
-        let word_prefix_docids = env.create_database(Some(WORD_PREFIX_DOCIDS))?;
-        let exact_word_prefix_docids = env.create_database(Some(EXACT_WORD_PREFIX_DOCIDS))?;
-        let docid_word_positions = env.create_database(Some(DOCID_WORD_POSITIONS))?;
-        let word_pair_proximity_docids = env.create_database(Some(WORD_PAIR_PROXIMITY_DOCIDS))?;
-        let script_language_docids = env.create_database(Some(SCRIPT_LANGUAGE_DOCIDS))?;
+        let mut wtxn = env.write_txn()?;
+        let main = env.create_poly_database(&mut wtxn, Some(MAIN))?;
+        let word_docids = env.create_database(&mut wtxn, Some(WORD_DOCIDS))?;
+        let exact_word_docids = env.create_database(&mut wtxn, Some(EXACT_WORD_DOCIDS))?;
+        let word_prefix_docids = env.create_database(&mut wtxn, Some(WORD_PREFIX_DOCIDS))?;
+        let exact_word_prefix_docids =
+            env.create_database(&mut wtxn, Some(EXACT_WORD_PREFIX_DOCIDS))?;
+        let word_pair_proximity_docids =
+            env.create_database(&mut wtxn, Some(WORD_PAIR_PROXIMITY_DOCIDS))?;
+        let script_language_docids =
+            env.create_database(&mut wtxn, Some(SCRIPT_LANGUAGE_DOCIDS))?;
        let word_prefix_pair_proximity_docids =
-            env.create_database(Some(WORD_PREFIX_PAIR_PROXIMITY_DOCIDS))?;
+            env.create_database(&mut wtxn, Some(WORD_PREFIX_PAIR_PROXIMITY_DOCIDS))?;
        let prefix_word_pair_proximity_docids =
-            env.create_database(Some(PREFIX_WORD_PAIR_PROXIMITY_DOCIDS))?;
-        let word_position_docids = env.create_database(Some(WORD_POSITION_DOCIDS))?;
-        let word_fid_docids = env.create_database(Some(WORD_FIELD_ID_DOCIDS))?;
-        let field_id_word_count_docids = env.create_database(Some(FIELD_ID_WORD_COUNT_DOCIDS))?;
-        let word_prefix_position_docids = env.create_database(Some(WORD_PREFIX_POSITION_DOCIDS))?;
-        let word_prefix_fid_docids = env.create_database(Some(WORD_PREFIX_FIELD_ID_DOCIDS))?;
-        let facet_id_f64_docids = env.create_database(Some(FACET_ID_F64_DOCIDS))?;
-        let facet_id_string_docids = env.create_database(Some(FACET_ID_STRING_DOCIDS))?;
-        let facet_id_exists_docids = env.create_database(Some(FACET_ID_EXISTS_DOCIDS))?;
-        let facet_id_is_null_docids = env.create_database(Some(FACET_ID_IS_NULL_DOCIDS))?;
-        let facet_id_is_empty_docids = env.create_database(Some(FACET_ID_IS_EMPTY_DOCIDS))?;
+            env.create_database(&mut wtxn, Some(PREFIX_WORD_PAIR_PROXIMITY_DOCIDS))?;
+        let word_position_docids = env.create_database(&mut wtxn, Some(WORD_POSITION_DOCIDS))?;
+        let word_fid_docids = env.create_database(&mut wtxn, Some(WORD_FIELD_ID_DOCIDS))?;
+        let field_id_word_count_docids =
+            env.create_database(&mut wtxn, Some(FIELD_ID_WORD_COUNT_DOCIDS))?;
+        let word_prefix_position_docids =
+            env.create_database(&mut wtxn, Some(WORD_PREFIX_POSITION_DOCIDS))?;
+        let word_prefix_fid_docids =
+            env.create_database(&mut wtxn, Some(WORD_PREFIX_FIELD_ID_DOCIDS))?;
+        let facet_id_f64_docids = env.create_database(&mut wtxn, Some(FACET_ID_F64_DOCIDS))?;
+        let facet_id_string_docids =
+            env.create_database(&mut wtxn, Some(FACET_ID_STRING_DOCIDS))?;
+        let facet_id_exists_docids =
+            env.create_database(&mut wtxn, Some(FACET_ID_EXISTS_DOCIDS))?;
+        let facet_id_is_null_docids =
+            env.create_database(&mut wtxn, Some(FACET_ID_IS_NULL_DOCIDS))?;
+        let facet_id_is_empty_docids =
+            env.create_database(&mut wtxn, Some(FACET_ID_IS_EMPTY_DOCIDS))?;

-        let field_id_docid_facet_f64s = env.create_database(Some(FIELD_ID_DOCID_FACET_F64S))?;
+        let field_id_docid_facet_f64s =
+            env.create_database(&mut wtxn, Some(FIELD_ID_DOCID_FACET_F64S))?;
        let field_id_docid_facet_strings =
-            env.create_database(Some(FIELD_ID_DOCID_FACET_STRINGS))?;
-        let documents = env.create_database(Some(DOCUMENTS))?;
+            env.create_database(&mut wtxn, Some(FIELD_ID_DOCID_FACET_STRINGS))?;
+        let documents = env.create_database(&mut wtxn, Some(DOCUMENTS))?;
+        wtxn.commit()?;

        Index::set_creation_dates(&env, main, created_at, updated_at)?;

@ -207,7 +215,6 @@ impl Index {
            exact_word_docids,
            word_prefix_docids,
            exact_word_prefix_docids,
-            docid_word_positions,
            word_pair_proximity_docids,
            script_language_docids,
            word_prefix_pair_proximity_docids,
@ -1032,16 +1039,15 @@ impl Index {

    /* documents */

-    /// Returns a [`Vec`] of the requested documents. Returns an error if a document is missing.
-    pub fn documents<'t>(
-        &self,
+    /// Returns an iterator over the requested documents. The next item will be an error if a document is missing.
+    pub fn iter_documents<'a, 't: 'a>(
+        &'a self,
        rtxn: &'t RoTxn,
-        ids: impl IntoIterator<Item = DocumentId>,
-    ) -> Result<Vec<(DocumentId, obkv::KvReaderU16<'t>)>> {
+        ids: impl IntoIterator<Item = DocumentId> + 'a,
+    ) -> Result<impl Iterator<Item = Result<(DocumentId, obkv::KvReaderU16<'t>)>> + 'a> {
        let soft_deleted_documents = self.soft_deleted_documents_ids(rtxn)?;
-        let mut documents = Vec::new();

-        for id in ids {
+        Ok(ids.into_iter().map(move |id| {
            if soft_deleted_documents.contains(id) {
                return Err(UserError::AccessingSoftDeletedDocument { document_id: id })?;
            }
@ -1049,27 +1055,25 @@ impl Index {
                .documents
                .get(rtxn, &BEU32::new(id))?
                .ok_or(UserError::UnknownInternalDocumentId { document_id: id })?;
-            documents.push((id, kv));
-        }
+            Ok((id, kv))
+        }))
+    }

-        Ok(documents)
+    /// Returns a [`Vec`] of the requested documents. Returns an error if a document is missing.
+    pub fn documents<'t>(
+        &self,
+        rtxn: &'t RoTxn,
+        ids: impl IntoIterator<Item = DocumentId>,
+    ) -> Result<Vec<(DocumentId, obkv::KvReaderU16<'t>)>> {
+        self.iter_documents(rtxn, ids)?.collect()
    }

    /// Returns an iterator over all the documents in the index.
-    pub fn all_documents<'t>(
-        &self,
+    pub fn all_documents<'a, 't: 'a>(
+        &'a self,
        rtxn: &'t RoTxn,
-    ) -> Result<impl Iterator<Item = heed::Result<(DocumentId, obkv::KvReaderU16<'t>)>>> {
-        let soft_deleted_docids = self.soft_deleted_documents_ids(rtxn)?;
-
-        Ok(self
-            .documents
-            .iter(rtxn)?
-            // we cast the BEU32 to a DocumentId
-            .map(|document| document.map(|(id, obkv)| (id.get(), obkv)))
-            .filter(move |document| {
-                document.as_ref().map_or(true, |(id, _)| !soft_deleted_docids.contains(*id))
-            }))
+    ) -> Result<impl Iterator<Item = Result<(DocumentId, obkv::KvReaderU16<'t>)>> + 'a> {
+        self.iter_documents(rtxn, self.documents_ids(rtxn)?)
    }

    pub fn facets_distribution<'a>(&'a self, rtxn: &'a RoTxn) -> FacetDistribution<'a> {
@ -1462,9 +1466,9 @@ pub(crate) mod tests {

        db_snap!(index, field_distribution,
            @r###"
-        age              1     
-        id               2     
-        name             2     
+        age              1      |
+        id               2      |
+        name             2      |
        "###
        );

@ -1482,9 +1486,9 @@ pub(crate) mod tests {

        db_snap!(index, field_distribution,
            @r###"
-        age              1     
-        id               2     
-        name             2     
+        age              1      |
+        id               2      |
+        name             2      |
        "###
        );

@ -1498,9 +1502,9 @@ pub(crate) mod tests {

        db_snap!(index, field_distribution,
            @r###"
-        has_dog          1     
-        id               2     
-        name             2     
+        has_dog          1      |
+        id               2      |
+        name             2      |
        "###
        );
    }
--- a/milli/src/lib.rs
+++ b/milli/src/lib.rs
@ -5,52 +5,6 @@
 #[global_allocator]
 pub static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;

-// #[cfg(test)]
-// pub mod allocator {
-//     use std::alloc::{GlobalAlloc, System};
-//     use std::sync::atomic::{self, AtomicI64};
-
-//     #[global_allocator]
-//     pub static ALLOC: CountingAlloc = CountingAlloc {
-//         max_resident: AtomicI64::new(0),
-//         resident: AtomicI64::new(0),
-//         allocated: AtomicI64::new(0),
-//     };
-
-//     pub struct CountingAlloc {
-//         pub max_resident: AtomicI64,
-//         pub resident: AtomicI64,
-//         pub allocated: AtomicI64,
-//     }
-//     unsafe impl GlobalAlloc for CountingAlloc {
-//         unsafe fn alloc(&self, layout: std::alloc::Layout) -> *mut u8 {
-//             self.allocated.fetch_add(layout.size() as i64, atomic::Ordering::SeqCst);
-//             let old_resident =
-//                 self.resident.fetch_add(layout.size() as i64, atomic::Ordering::SeqCst);
-
-//             let resident = old_resident + layout.size() as i64;
-//             self.max_resident.fetch_max(resident, atomic::Ordering::SeqCst);
-
-//             // if layout.size() > 1_000_000 {
-//             //     eprintln!(
-//             //         "allocating {} with new resident size: {resident}",
-//             //         layout.size() / 1_000_000
-//             //     );
-//             //     // let trace = std::backtrace::Backtrace::capture();
-//             //     // let t = trace.to_string();
-//             //     // eprintln!("{t}");
-//             // }
-
-//             System.alloc(layout)
-//         }
-
-//         unsafe fn dealloc(&self, ptr: *mut u8, layout: std::alloc::Layout) {
-//             self.resident.fetch_sub(layout.size() as i64, atomic::Ordering::Relaxed);
-//             System.dealloc(ptr, layout)
-//         }
-//     }
-// }
-
 #[macro_use]
 pub mod documents;

--- a/milli/src/search/facet/filter.rs
+++ b/milli/src/search/facet/filter.rs
@ -5,6 +5,7 @@ use std::ops::Bound::{self, Excluded, Included};
 use either::Either;
 pub use filter_parser::{Condition, Error as FPError, FilterCondition, Span, Token};
 use roaring::RoaringBitmap;
+use serde_json::Value;

 use super::facet_range_search;
 use crate::error::{Error, UserError};
@ -112,6 +113,52 @@ impl<'a> From<Filter<'a>> for FilterCondition<'a> {
 }

 impl<'a> Filter<'a> {
+    pub fn from_json(facets: &'a Value) -> Result<Option<Self>> {
+        match facets {
+            Value::String(expr) => {
+                let condition = Filter::from_str(expr)?;
+                Ok(condition)
+            }
+            Value::Array(arr) => Self::parse_filter_array(arr),
+            v => Err(Error::UserError(UserError::InvalidFilterExpression(
+                &["String", "Array"],
+                v.clone(),
+            ))),
+        }
+    }
+
+    fn parse_filter_array(arr: &'a [Value]) -> Result<Option<Self>> {
+        let mut ands = Vec::new();
+        for value in arr {
+            match value {
+                Value::String(s) => ands.push(Either::Right(s.as_str())),
+                Value::Array(arr) => {
+                    let mut ors = Vec::new();
+                    for value in arr {
+                        match value {
+                            Value::String(s) => ors.push(s.as_str()),
+                            v => {
+                                return Err(Error::UserError(UserError::InvalidFilterExpression(
+                                    &["String"],
+                                    v.clone(),
+                                )))
+                            }
+                        }
+                    }
+                    ands.push(Either::Left(ors));
+                }
+                v => {
+                    return Err(Error::UserError(UserError::InvalidFilterExpression(
+                        &["String", "[String]"],
+                        v.clone(),
+                    )))
+                }
+            }
+        }
+
+        Filter::from_array(ands)
+    }
+
    pub fn from_array<I, J>(array: I) -> Result<Option<Self>>
    where
        I: IntoIterator<Item = Either<J, &'a str>>,
--- a/milli/src/search/mod.rs
+++ b/milli/src/search/mod.rs
@ -201,12 +201,14 @@ pub fn build_dfa(word: &str, typos: u8, is_prefix: bool) -> DFA {

 #[cfg(test)]
 mod test {
+    #[allow(unused_imports)]
    use super::*;
-    use crate::index::tests::TempIndex;

-    #[cfg(feature = "default")]
+    #[cfg(feature = "japanese")]
    #[test]
    fn test_kanji_language_detection() {
+        use crate::index::tests::TempIndex;
+
        let index = TempIndex::new();

        index
--- a/milli/src/search/new/distinct.rs
+++ b/milli/src/search/new/distinct.rs
@ -26,7 +26,6 @@ pub fn apply_distinct_rule(
    ctx: &mut SearchContext,
    field_id: u16,
    candidates: &RoaringBitmap,
-    // TODO: add a universe here, such that the `excluded` are a subset of the universe?
 ) -> Result<DistinctOutput> {
    let mut excluded = RoaringBitmap::new();
    let mut remaining = RoaringBitmap::new();
--- a/milli/src/search/new/exact_attribute.rs
+++ b/milli/src/search/new/exact_attribute.rs
@ -206,7 +206,7 @@ impl State {
            )?;
            intersection &= &candidates;
            if !intersection.is_empty() {
-                // TODO: although not really worth it in terms of performance,
+                // Although not really worth it in terms of performance,
                // if would be good to put this in cache for the sake of consistency
                let candidates_with_exact_word_count = if count_all_positions < u8::MAX as usize {
                    ctx.index
--- a/milli/src/search/new/graph_based_ranking_rule.rs
+++ b/milli/src/search/new/graph_based_ranking_rule.rs
@ -46,7 +46,7 @@ use super::logger::SearchLogger;
 use super::query_graph::QueryNode;
 use super::ranking_rule_graph::{
    ConditionDocIdsCache, DeadEndsCache, ExactnessGraph, FidGraph, PositionGraph, ProximityGraph,
-    RankingRuleGraph, RankingRuleGraphTrait, TypoGraph,
+    RankingRuleGraph, RankingRuleGraphTrait, TypoGraph, WordsGraph,
 };
 use super::small_bitmap::SmallBitmap;
 use super::{QueryGraph, RankingRule, RankingRuleOutput, SearchContext};
@ -54,6 +54,12 @@ use crate::search::new::query_term::LocatedQueryTermSubset;
 use crate::search::new::ranking_rule_graph::PathVisitor;
 use crate::{Result, TermsMatchingStrategy};

+pub type Words = GraphBasedRankingRule<WordsGraph>;
+impl GraphBasedRankingRule<WordsGraph> {
+    pub fn new(terms_matching_strategy: TermsMatchingStrategy) -> Self {
+        Self::new_with_id("words".to_owned(), Some(terms_matching_strategy))
+    }
+}
 pub type Proximity = GraphBasedRankingRule<ProximityGraph>;
 impl GraphBasedRankingRule<ProximityGraph> {
    pub fn new(terms_matching_strategy: Option<TermsMatchingStrategy>) -> Self {
--- a/milli/src/search/new/interner.rs
+++ b/milli/src/search/new/interner.rs
@ -32,7 +32,7 @@ impl<T> Interned<T> {
 #[derive(Clone)]
 pub struct DedupInterner<T> {
    stable_store: Vec<T>,
-    lookup: FxHashMap<T, Interned<T>>, // TODO: Arc
+    lookup: FxHashMap<T, Interned<T>>,
 }
 impl<T> Default for DedupInterner<T> {
    fn default() -> Self {
--- a/milli/src/search/new/limits.rs
+++ b/milli/src/search/new/limits.rs
@ -1,5 +1,4 @@
 /// Maximum number of tokens we consider in a single search.
-// TODO: Loic, find proper value here so we don't overflow the interner.
 pub const MAX_TOKEN_COUNT: usize = 1_000;

 /// Maximum number of prefixes that can be derived from a single word.
--- a/milli/src/search/new/logger/visual.rs
+++ b/milli/src/search/new/logger/visual.rs
@ -4,7 +4,6 @@ use std::io::{BufWriter, Write};
 use std::path::{Path, PathBuf};
 use std::time::Instant;

-// use rand::random;
 use roaring::RoaringBitmap;

 use crate::search::new::interner::Interned;
@ -13,6 +12,7 @@ use crate::search::new::query_term::LocatedQueryTermSubset;
 use crate::search::new::ranking_rule_graph::{
    Edge, FidCondition, FidGraph, PositionCondition, PositionGraph, ProximityCondition,
    ProximityGraph, RankingRuleGraph, RankingRuleGraphTrait, TypoCondition, TypoGraph,
+    WordsCondition, WordsGraph,
 };
 use crate::search::new::ranking_rules::BoxRankingRule;
 use crate::search::new::{QueryGraph, QueryNode, RankingRule, SearchContext, SearchLogger};
@ -24,11 +24,12 @@ pub enum SearchEvents {
    RankingRuleSkipBucket { ranking_rule_idx: usize, bucket_len: u64 },
    RankingRuleEndIteration { ranking_rule_idx: usize, universe_len: u64 },
    ExtendResults { new: Vec<u32> },
-    WordsGraph { query_graph: QueryGraph },
    ProximityGraph { graph: RankingRuleGraph<ProximityGraph> },
    ProximityPaths { paths: Vec<Vec<Interned<ProximityCondition>>> },
    TypoGraph { graph: RankingRuleGraph<TypoGraph> },
    TypoPaths { paths: Vec<Vec<Interned<TypoCondition>>> },
+    WordsGraph { graph: RankingRuleGraph<WordsGraph> },
+    WordsPaths { paths: Vec<Vec<Interned<WordsCondition>>> },
    FidGraph { graph: RankingRuleGraph<FidGraph> },
    FidPaths { paths: Vec<Vec<Interned<FidCondition>>> },
    PositionGraph { graph: RankingRuleGraph<PositionGraph> },
@ -139,8 +140,11 @@ impl SearchLogger<QueryGraph> for VisualSearchLogger {
        let Some(location) = self.location.last() else { return };
        match location {
            Location::Words => {
-                if let Some(query_graph) = state.downcast_ref::<QueryGraph>() {
-                    self.events.push(SearchEvents::WordsGraph { query_graph: query_graph.clone() });
+                if let Some(graph) = state.downcast_ref::<RankingRuleGraph<WordsGraph>>() {
+                    self.events.push(SearchEvents::WordsGraph { graph: graph.clone() });
+                }
+                if let Some(paths) = state.downcast_ref::<Vec<Vec<Interned<WordsCondition>>>>() {
+                    self.events.push(SearchEvents::WordsPaths { paths: paths.clone() });
                }
            }
            Location::Typo => {
@ -329,7 +333,6 @@ impl<'ctx> DetailedLoggerFinish<'ctx> {
            SearchEvents::ExtendResults { new } => {
                self.write_extend_results(new)?;
            }
-            SearchEvents::WordsGraph { query_graph } => self.write_words_graph(query_graph)?,
            SearchEvents::ProximityGraph { graph } => self.write_rr_graph(&graph)?,
            SearchEvents::ProximityPaths { paths } => {
                self.write_rr_graph_paths::<ProximityGraph>(paths)?;
@ -338,6 +341,10 @@ impl<'ctx> DetailedLoggerFinish<'ctx> {
            SearchEvents::TypoPaths { paths } => {
                self.write_rr_graph_paths::<TypoGraph>(paths)?;
            }
+            SearchEvents::WordsGraph { graph } => self.write_rr_graph(&graph)?,
+            SearchEvents::WordsPaths { paths } => {
+                self.write_rr_graph_paths::<WordsGraph>(paths)?;
+            }
            SearchEvents::FidGraph { graph } => self.write_rr_graph(&graph)?,
            SearchEvents::FidPaths { paths } => {
                self.write_rr_graph_paths::<FidGraph>(paths)?;
@ -455,7 +462,7 @@ fill: \"#B6E2D3\"
                shape: class
                max_nbr_typo: {}",
                    term_subset.description(ctx),
-                    term_subset.max_nbr_typos(ctx)
+                    term_subset.max_typo_cost(ctx)
                )?;

                for w in term_subset.all_single_words_except_prefix_db(ctx)? {
@ -482,13 +489,6 @@ fill: \"#B6E2D3\"
        }
        Ok(())
    }
-    fn write_words_graph(&mut self, qg: QueryGraph) -> Result<()> {
-        self.make_new_file_for_internal_state_if_needed()?;
-
-        self.write_query_graph(&qg)?;
-
-        Ok(())
-    }
    fn write_rr_graph<R: RankingRuleGraphTrait>(
        &mut self,
        graph: &RankingRuleGraph<R>,
--- a/milli/src/search/new/matches/matching_words.rs
+++ b/milli/src/search/new/matches/matching_words.rs
@ -52,7 +52,7 @@ impl MatchingWords {
            words.push(LocatedMatchingWords {
                value: matching_words,
                positions: located_term.positions.clone(),
-                is_prefix: term.is_cached_prefix(),
+                is_prefix: term.is_prefix(),
                original_char_count: term.original_word(&ctx).chars().count(),
            });
        }
@ -244,6 +244,8 @@ pub(crate) mod tests {
        temp_index
            .add_documents(documents!([
                { "id": 1, "name": "split this world westfali westfalia the Ŵôřlḑôle" },
+                { "id": 2, "name": "Westfália" },
+                { "id": 3, "name": "Ŵôřlḑôle" },
            ]))
            .unwrap();
        temp_index
@ -305,7 +307,7 @@ pub(crate) mod tests {
                    ..Default::default()
                })
                .next(),
-            None
+            Some(MatchType::Full { char_len: 5, ids: &(2..=2) })
        );
        assert_eq!(
            matching_words
--- a/milli/src/search/new/matches/mod.rs
+++ b/milli/src/search/new/matches/mod.rs
@ -499,17 +499,36 @@ mod tests {
    use charabia::TokenizerBuilder;
    use matching_words::tests::temp_index_with_documents;

-    use super::super::located_query_terms_from_tokens;
    use super::*;
-    use crate::SearchContext;
+    use crate::index::tests::TempIndex;
+    use crate::{execute_search, SearchContext};

    impl<'a> MatcherBuilder<'a, &[u8]> {
-        pub fn new_test(mut ctx: SearchContext, query: &'a str) -> Self {
-            let tokenizer = TokenizerBuilder::new().build();
-            let tokens = tokenizer.tokenize(query);
-            let query_terms = located_query_terms_from_tokens(&mut ctx, tokens, None).unwrap();
-            let matching_words = MatchingWords::new(ctx, query_terms);
-            Self::new(matching_words, TokenizerBuilder::new().build())
+        fn new_test(rtxn: &'a heed::RoTxn, index: &'a TempIndex, query: &str) -> Self {
+            let mut ctx = SearchContext::new(index, rtxn);
+            let crate::search::PartialSearchResult { located_query_terms, .. } = execute_search(
+                &mut ctx,
+                &Some(query.to_string()),
+                crate::TermsMatchingStrategy::default(),
+                false,
+                &None,
+                &None,
+                crate::search::new::GeoSortStrategy::default(),
+                0,
+                100,
+                Some(10),
+                &mut crate::DefaultSearchLogger,
+                &mut crate::DefaultSearchLogger,
+            )
+            .unwrap();
+
+            // consume context and located_query_terms to build MatchingWords.
+            let matching_words = match located_query_terms {
+                Some(located_query_terms) => MatchingWords::new(ctx, located_query_terms),
+                None => MatchingWords::default(),
+            };
+
+            MatcherBuilder::new(matching_words, TokenizerBuilder::new().build())
        }
    }

@ -517,8 +536,7 @@ mod tests {
    fn format_identity() {
        let temp_index = temp_index_with_documents();
        let rtxn = temp_index.read_txn().unwrap();
-        let ctx = SearchContext::new(&temp_index, &rtxn);
-        let builder = MatcherBuilder::new_test(ctx, "split the world");
+        let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "split the world");

        let format_options = FormatOptions { highlight: false, crop: None };

@ -545,8 +563,7 @@ mod tests {
    fn format_highlight() {
        let temp_index = temp_index_with_documents();
        let rtxn = temp_index.read_txn().unwrap();
-        let ctx = SearchContext::new(&temp_index, &rtxn);
-        let builder = MatcherBuilder::new_test(ctx, "split the world");
+        let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "split the world");

        let format_options = FormatOptions { highlight: true, crop: None };

@ -589,8 +606,7 @@ mod tests {
    fn highlight_unicode() {
        let temp_index = temp_index_with_documents();
        let rtxn = temp_index.read_txn().unwrap();
-        let ctx = SearchContext::new(&temp_index, &rtxn);
-        let builder = MatcherBuilder::new_test(ctx, "world");
+        let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "world");
        let format_options = FormatOptions { highlight: true, crop: None };

        // Text containing prefix match.
@ -599,7 +615,7 @@ mod tests {
        // no crop should return complete text with highlighted matches.
        insta::assert_snapshot!(
            matcher.format(format_options),
-            @"<em>Ŵôřlḑôle</em>"
+            @"<em>Ŵôřlḑ</em>ôle"
        );

        // Text containing unicode match.
@ -611,8 +627,7 @@ mod tests {
            @"<em>Ŵôřlḑ</em>"
        );

-        let ctx = SearchContext::new(&temp_index, &rtxn);
-        let builder = MatcherBuilder::new_test(ctx, "westfali");
+        let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "westfali");
        let format_options = FormatOptions { highlight: true, crop: None };

        // Text containing unicode match.
@ -621,7 +636,7 @@ mod tests {
        // no crop should return complete text with highlighted matches.
        insta::assert_snapshot!(
            matcher.format(format_options),
-            @"<em>Westfália</em>"
+            @"<em>Westfáli</em>a"
        );
    }

@ -629,8 +644,7 @@ mod tests {
    fn format_crop() {
        let temp_index = temp_index_with_documents();
        let rtxn = temp_index.read_txn().unwrap();
-        let ctx = SearchContext::new(&temp_index, &rtxn);
-        let builder = MatcherBuilder::new_test(ctx, "split the world");
+        let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "split the world");

        let format_options = FormatOptions { highlight: false, crop: Some(10) };

@ -727,8 +741,7 @@ mod tests {
    fn format_highlight_crop() {
        let temp_index = temp_index_with_documents();
        let rtxn = temp_index.read_txn().unwrap();
-        let ctx = SearchContext::new(&temp_index, &rtxn);
-        let builder = MatcherBuilder::new_test(ctx, "split the world");
+        let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "split the world");

        let format_options = FormatOptions { highlight: true, crop: Some(10) };

@ -790,8 +803,7 @@ mod tests {
        //! testing: https://github.com/meilisearch/specifications/pull/120#discussion_r836536295
        let temp_index = temp_index_with_documents();
        let rtxn = temp_index.read_txn().unwrap();
-        let ctx = SearchContext::new(&temp_index, &rtxn);
-        let builder = MatcherBuilder::new_test(ctx, "split the world");
+        let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "split the world");

        let text = "void void split the world void void.";

@ -827,8 +839,8 @@ mod tests {
    fn partial_matches() {
        let temp_index = temp_index_with_documents();
        let rtxn = temp_index.read_txn().unwrap();
-        let ctx = SearchContext::new(&temp_index, &rtxn);
-        let mut builder = MatcherBuilder::new_test(ctx, "the \"t he\" door \"do or\"");
+        let mut builder =
+            MatcherBuilder::new_test(&rtxn, &temp_index, "the \"t he\" door \"do or\"");
        builder.highlight_prefix("_".to_string());
        builder.highlight_suffix("_".to_string());

--- a/milli/src/search/new/mod.rs
+++ b/milli/src/search/new/mod.rs
@ -15,11 +15,7 @@ mod resolve_query_graph;
 mod small_bitmap;

 mod exact_attribute;
-// TODO: documentation + comments
-// implementation is currently an adaptation of the previous implementation to fit with the new model
 mod sort;
-// TODO: documentation + comments
-mod words;

 #[cfg(test)]
 mod tests;
@ -43,10 +39,10 @@ use ranking_rules::{
 use resolve_query_graph::{compute_query_graph_docids, PhraseDocIdsCache};
 use roaring::RoaringBitmap;
 use sort::Sort;
-use words::Words;

 use self::geo_sort::GeoSort;
 pub use self::geo_sort::Strategy as GeoSortStrategy;
+use self::graph_based_ranking_rule::Words;
 use self::interner::Interned;
 use crate::search::new::distinct::apply_distinct_rule;
 use crate::{AscDesc, DocumentId, Filter, Index, Member, Result, TermsMatchingStrategy, UserError};
@ -202,6 +198,11 @@ fn get_ranking_rules_for_query_graph_search<'ctx>(
    let mut sorted_fields = HashSet::new();
    let mut geo_sorted = false;

+    // Don't add the `words` ranking rule if the term matching strategy is `All`
+    if matches!(terms_matching_strategy, TermsMatchingStrategy::All) {
+        words = true;
+    }
+
    let mut ranking_rules: Vec<BoxRankingRule<QueryGraph>> = vec![];
    let settings_ranking_rules = ctx.index.criteria(ctx.txn)?;
    for rr in settings_ranking_rules {
@ -397,8 +398,8 @@ pub fn execute_search(
        None
    };
    let bucket_sort_output = if let Some(query_terms) = query_terms {
-        let graph = QueryGraph::from_query(ctx, &query_terms)?;
-        located_query_terms = Some(query_terms);
+        let (graph, new_located_query_terms) = QueryGraph::from_query(ctx, &query_terms)?;
+        located_query_terms = Some(new_located_query_terms);

        let ranking_rules = get_ranking_rules_for_query_graph_search(
            ctx,
--- a/milli/src/search/new/query_graph.rs
+++ b/milli/src/search/new/query_graph.rs
@ -88,29 +88,33 @@ pub struct QueryGraph {
 }

 impl QueryGraph {
-    /// Build the query graph from the parsed user search query.
+    /// Build the query graph from the parsed user search query, return an updated list of the located query terms
+    /// which contains ngrams.
    pub fn from_query(
        ctx: &mut SearchContext,
-        // NOTE: the terms here must be consecutive
+        // The terms here must be consecutive
        terms: &[LocatedQueryTerm],
-    ) -> Result<QueryGraph> {
+    ) -> Result<(QueryGraph, Vec<LocatedQueryTerm>)> {
+        let mut new_located_query_terms = terms.to_vec();
+
        let nbr_typos = number_of_typos_allowed(ctx)?;

        let mut nodes_data: Vec<QueryNodeData> = vec![QueryNodeData::Start, QueryNodeData::End];
        let root_node = 0;
        let end_node = 1;

-        // TODO: we could consider generalizing to 4,5,6,7,etc. ngrams
+        // Ee could consider generalizing to 4,5,6,7,etc. ngrams
        let (mut prev2, mut prev1, mut prev0): (Vec<u16>, Vec<u16>, Vec<u16>) =
            (vec![], vec![], vec![root_node]);

        let original_terms_len = terms.len();
        for term_idx in 0..original_terms_len {
            let mut new_nodes = vec![];
+
            let new_node_idx = add_node(
                &mut nodes_data,
                QueryNodeData::Term(LocatedQueryTermSubset {
-                    term_subset: QueryTermSubset::full(Interned::from_raw(term_idx as u16)),
+                    term_subset: QueryTermSubset::full(terms[term_idx].value),
                    positions: terms[term_idx].positions.clone(),
                    term_ids: term_idx as u8..=term_idx as u8,
                }),
@ -121,6 +125,7 @@ impl QueryGraph {
                if let Some(ngram) =
                    query_term::make_ngram(ctx, &terms[term_idx - 1..=term_idx], &nbr_typos)?
                {
+                    new_located_query_terms.push(ngram.clone());
                    let ngram_idx = add_node(
                        &mut nodes_data,
                        QueryNodeData::Term(LocatedQueryTermSubset {
@ -136,6 +141,7 @@ impl QueryGraph {
                if let Some(ngram) =
                    query_term::make_ngram(ctx, &terms[term_idx - 2..=term_idx], &nbr_typos)?
                {
+                    new_located_query_terms.push(ngram.clone());
                    let ngram_idx = add_node(
                        &mut nodes_data,
                        QueryNodeData::Term(LocatedQueryTermSubset {
@ -167,7 +173,7 @@ impl QueryGraph {
        let mut graph = QueryGraph { root_node, end_node, nodes };
        graph.build_initial_edges();

-        Ok(graph)
+        Ok((graph, new_located_query_terms))
    }

    /// Remove the given nodes, connecting all their predecessors to all their successors.
--- a/milli/src/search/new/query_term/compute_derivations.rs
+++ b/milli/src/search/new/query_term/compute_derivations.rs
@ -28,16 +28,14 @@ pub enum ZeroOrOneTypo {
 impl Interned<QueryTerm> {
    pub fn compute_fully_if_needed(self, ctx: &mut SearchContext) -> Result<()> {
        let s = ctx.term_interner.get_mut(self);
-        if s.max_nbr_typos == 0 {
-            s.one_typo = Lazy::Init(OneTypoTerm::default());
-            s.two_typo = Lazy::Init(TwoTypoTerm::default());
-        } else if s.max_nbr_typos == 1 && s.one_typo.is_uninit() {
+        if s.max_levenshtein_distance <= 1 && s.one_typo.is_uninit() {
            assert!(s.two_typo.is_uninit());
+            // Initialize one_typo subterm even if max_nbr_typo is 0 because of split words
            self.initialize_one_typo_subterm(ctx)?;
            let s = ctx.term_interner.get_mut(self);
            assert!(s.one_typo.is_init());
            s.two_typo = Lazy::Init(TwoTypoTerm::default());
-        } else if s.max_nbr_typos > 1 && s.two_typo.is_uninit() {
+        } else if s.max_levenshtein_distance > 1 && s.two_typo.is_uninit() {
            assert!(s.two_typo.is_uninit());
            self.initialize_one_and_two_typo_subterm(ctx)?;
            let s = ctx.term_interner.get_mut(self);
@ -187,7 +185,7 @@ pub fn partially_initialized_term_from_word(
                original: ctx.word_interner.insert(word.to_owned()),
                ngram_words: None,
                is_prefix: false,
-                max_nbr_typos: 0,
+                max_levenshtein_distance: 0,
                zero_typo: <_>::default(),
                one_typo: Lazy::Init(<_>::default()),
                two_typo: Lazy::Init(<_>::default()),
@ -258,7 +256,7 @@ pub fn partially_initialized_term_from_word(
    Ok(QueryTerm {
        original: word_interned,
        ngram_words: None,
-        max_nbr_typos: max_typo,
+        max_levenshtein_distance: max_typo,
        is_prefix,
        zero_typo,
        one_typo: Lazy::Uninit,
@ -277,7 +275,16 @@ fn find_split_words(ctx: &mut SearchContext, word: &str) -> Result<Option<Intern
 impl Interned<QueryTerm> {
    fn initialize_one_typo_subterm(self, ctx: &mut SearchContext) -> Result<()> {
        let self_mut = ctx.term_interner.get_mut(self);
-        let QueryTerm { original, is_prefix, one_typo, .. } = self_mut;
+
+        let allows_split_words = self_mut.allows_split_words();
+        let QueryTerm {
+            original,
+            is_prefix,
+            one_typo,
+            max_levenshtein_distance: max_nbr_typos,
+            ..
+        } = self_mut;
+
        let original = *original;
        let is_prefix = *is_prefix;
        // let original_str = ctx.word_interner.get(*original).to_owned();
@ -286,26 +293,33 @@ impl Interned<QueryTerm> {
        }
        let mut one_typo_words = BTreeSet::new();

-        find_zero_one_typo_derivations(ctx, original, is_prefix, |derived_word, nbr_typos| {
-            match nbr_typos {
-                ZeroOrOneTypo::Zero => {}
-                ZeroOrOneTypo::One => {
-                    if one_typo_words.len() < limits::MAX_ONE_TYPO_COUNT {
-                        one_typo_words.insert(derived_word);
-                    } else {
-                        return Ok(ControlFlow::Break(()));
+        if *max_nbr_typos > 0 {
+            find_zero_one_typo_derivations(ctx, original, is_prefix, |derived_word, nbr_typos| {
+                match nbr_typos {
+                    ZeroOrOneTypo::Zero => {}
+                    ZeroOrOneTypo::One => {
+                        if one_typo_words.len() < limits::MAX_ONE_TYPO_COUNT {
+                            one_typo_words.insert(derived_word);
+                        } else {
+                            return Ok(ControlFlow::Break(()));
+                        }
                    }
                }
-            }
-            Ok(ControlFlow::Continue(()))
-        })?;
-        let original_str = ctx.word_interner.get(original).to_owned();
-        let split_words = find_split_words(ctx, original_str.as_str())?;
+                Ok(ControlFlow::Continue(()))
+            })?;
+        }
+
+        let split_words = if allows_split_words {
+            let original_str = ctx.word_interner.get(original).to_owned();
+            find_split_words(ctx, original_str.as_str())?
+        } else {
+            None
+        };

        let self_mut = ctx.term_interner.get_mut(self);

        // Only add the split words to the derivations if:
-        // 1. the term is not an ngram; OR
+        // 1. the term is neither an ngram nor a phrase; OR
        // 2. the term is an ngram, but the split words are different from the ngram's component words
        let split_words = if let Some((ngram_words, split_words)) =
            self_mut.ngram_words.as_ref().zip(split_words.as_ref())
@ -327,7 +341,13 @@ impl Interned<QueryTerm> {
    }
    fn initialize_one_and_two_typo_subterm(self, ctx: &mut SearchContext) -> Result<()> {
        let self_mut = ctx.term_interner.get_mut(self);
-        let QueryTerm { original, is_prefix, two_typo, .. } = self_mut;
+        let QueryTerm {
+            original,
+            is_prefix,
+            two_typo,
+            max_levenshtein_distance: max_nbr_typos,
+            ..
+        } = self_mut;
        let original_str = ctx.word_interner.get(*original).to_owned();
        if two_typo.is_init() {
            return Ok(());
@ -335,34 +355,37 @@ impl Interned<QueryTerm> {
        let mut one_typo_words = BTreeSet::new();
        let mut two_typo_words = BTreeSet::new();

-        find_zero_one_two_typo_derivations(
-            *original,
-            *is_prefix,
-            ctx.index.words_fst(ctx.txn)?,
-            &mut ctx.word_interner,
-            |derived_word, nbr_typos| {
-                if one_typo_words.len() >= limits::MAX_ONE_TYPO_COUNT
-                    && two_typo_words.len() >= limits::MAX_TWO_TYPOS_COUNT
-                {
-                    // No chance we will add either one- or two-typo derivations anymore, stop iterating.
-                    return Ok(ControlFlow::Break(()));
-                }
-                match nbr_typos {
-                    NumberOfTypos::Zero => {}
-                    NumberOfTypos::One => {
-                        if one_typo_words.len() < limits::MAX_ONE_TYPO_COUNT {
-                            one_typo_words.insert(derived_word);
+        if *max_nbr_typos > 0 {
+            find_zero_one_two_typo_derivations(
+                *original,
+                *is_prefix,
+                ctx.index.words_fst(ctx.txn)?,
+                &mut ctx.word_interner,
+                |derived_word, nbr_typos| {
+                    if one_typo_words.len() >= limits::MAX_ONE_TYPO_COUNT
+                        && two_typo_words.len() >= limits::MAX_TWO_TYPOS_COUNT
+                    {
+                        // No chance we will add either one- or two-typo derivations anymore, stop iterating.
+                        return Ok(ControlFlow::Break(()));
+                    }
+                    match nbr_typos {
+                        NumberOfTypos::Zero => {}
+                        NumberOfTypos::One => {
+                            if one_typo_words.len() < limits::MAX_ONE_TYPO_COUNT {
+                                one_typo_words.insert(derived_word);
+                            }
+                        }
+                        NumberOfTypos::Two => {
+                            if two_typo_words.len() < limits::MAX_TWO_TYPOS_COUNT {
+                                two_typo_words.insert(derived_word);
+                            }
                        }
                    }
-                    NumberOfTypos::Two => {
-                        if two_typo_words.len() < limits::MAX_TWO_TYPOS_COUNT {
-                            two_typo_words.insert(derived_word);
-                        }
-                    }
-                }
-                Ok(ControlFlow::Continue(()))
-            },
-        )?;
+                    Ok(ControlFlow::Continue(()))
+                },
+            )?;
+        }
+
        let split_words = find_split_words(ctx, original_str.as_str())?;
        let self_mut = ctx.term_interner.get_mut(self);

--- a/milli/src/search/new/query_term/mod.rs
+++ b/milli/src/search/new/query_term/mod.rs
@ -43,7 +43,7 @@ pub struct QueryTermSubset {
 pub struct QueryTerm {
    original: Interned<String>,
    ngram_words: Option<Vec<Interned<String>>>,
-    max_nbr_typos: u8,
+    max_levenshtein_distance: u8,
    is_prefix: bool,
    zero_typo: ZeroTypoTerm,
    // May not be computed yet
@ -132,7 +132,6 @@ impl QueryTermSubset {
        if full_query_term.ngram_words.is_some() {
            return None;
        }
-        // TODO: included in subset
        if let Some(phrase) = full_query_term.zero_typo.phrase {
            self.zero_typo_subset.contains_phrase(phrase).then_some(ExactTerm::Phrase(phrase))
        } else if let Some(word) = full_query_term.zero_typo.exact {
@ -182,7 +181,6 @@ impl QueryTermSubset {
        let word = match &self.zero_typo_subset {
            NTypoTermSubset::All => Some(use_prefix_db),
            NTypoTermSubset::Subset { words, phrases: _ } => {
-                // TODO: use a subset of prefix words instead
                if words.contains(&use_prefix_db) {
                    Some(use_prefix_db)
                } else {
@ -204,7 +202,6 @@ impl QueryTermSubset {
        ctx: &mut SearchContext,
    ) -> Result<BTreeSet<Word>> {
        let mut result = BTreeSet::default();
-        // TODO: a compute_partially funtion
        if !self.one_typo_subset.is_empty() || !self.two_typo_subset.is_empty() {
            self.original.compute_fully_if_needed(ctx)?;
        }
@ -300,7 +297,6 @@ impl QueryTermSubset {
        let mut result = BTreeSet::default();

        if !self.one_typo_subset.is_empty() {
-            // TODO: compute less than fully if possible
            self.original.compute_fully_if_needed(ctx)?;
        }
        let original = ctx.term_interner.get_mut(self.original);
@ -342,10 +338,16 @@ impl QueryTermSubset {
        }
        None
    }
-    pub fn max_nbr_typos(&self, ctx: &SearchContext) -> u8 {
+    pub fn max_typo_cost(&self, ctx: &SearchContext) -> u8 {
        let t = ctx.term_interner.get(self.original);
-        match t.max_nbr_typos {
-            0 => 0,
+        match t.max_levenshtein_distance {
+            0 => {
+                if t.allows_split_words() {
+                    1
+                } else {
+                    0
+                }
+            }
            1 => {
                if self.one_typo_subset.is_empty() {
                    0
@ -438,6 +440,9 @@ impl QueryTerm {

        self.zero_typo.is_empty() && one_typo.is_empty() && two_typo.is_empty()
    }
+    fn allows_split_words(&self) -> bool {
+        self.zero_typo.phrase.is_none()
+    }
 }

 impl Interned<QueryTerm> {
@ -470,6 +475,9 @@ impl QueryTerm {
    pub fn is_cached_prefix(&self) -> bool {
        self.zero_typo.use_prefix_db.is_some()
    }
+    pub fn is_prefix(&self) -> bool {
+        self.is_prefix
+    }
    pub fn original_word(&self, ctx: &SearchContext) -> String {
        ctx.word_interner.get(self.original).clone()
    }
--- a/milli/src/search/new/query_term/parse_query.rs
+++ b/milli/src/search/new/query_term/parse_query.rs
@ -77,13 +77,9 @@ pub fn located_query_terms_from_tokens(
                }
            }
            TokenKind::Separator(separator_kind) => {
-                match separator_kind {
-                    SeparatorKind::Hard => {
-                        position += 1;
-                    }
-                    SeparatorKind::Soft => {
-                        position += 0;
-                    }
+                // add penalty for hard separators
+                if let SeparatorKind::Hard = separator_kind {
+                    position = position.wrapping_add(7);
                }

                phrase = 'phrase: {
@ -143,7 +139,6 @@ pub fn number_of_typos_allowed<'ctx>(
    let min_len_one_typo = ctx.index.min_word_len_one_typo(ctx.txn)?;
    let min_len_two_typos = ctx.index.min_word_len_two_typos(ctx.txn)?;

-    // TODO: should `exact_words` also disable prefix search, ngrams, split words, or synonyms?
    let exact_words = ctx.index.exact_words(ctx.txn)?;

    Ok(Box::new(move |word: &str| {
@ -217,7 +212,7 @@ pub fn make_ngram(
        original: ngram_str_interned,
        ngram_words: Some(words_interned),
        is_prefix,
-        max_nbr_typos,
+        max_levenshtein_distance: max_nbr_typos,
        zero_typo: term.zero_typo,
        one_typo: Lazy::Uninit,
        two_typo: Lazy::Uninit,
@ -254,8 +249,6 @@ impl PhraseBuilder {
        } else {
            // token has kind Word
            let word = ctx.word_interner.insert(token.lemma().to_string());
-            // TODO: in a phrase, check that every word exists
-            // otherwise return an empty term
            self.words.push(Some(word));
        }
    }
@ -271,7 +264,7 @@ impl PhraseBuilder {
                QueryTerm {
                    original: ctx.word_interner.insert(phrase_desc),
                    ngram_words: None,
-                    max_nbr_typos: 0,
+                    max_levenshtein_distance: 0,
                    is_prefix: false,
                    zero_typo: ZeroTypoTerm {
                        phrase: Some(phrase),
@ -288,3 +281,36 @@ impl PhraseBuilder {
        })
    }
 }
+
+#[cfg(test)]
+mod tests {
+    use charabia::TokenizerBuilder;
+
+    use super::*;
+    use crate::index::tests::TempIndex;
+
+    fn temp_index_with_documents() -> TempIndex {
+        let temp_index = TempIndex::new();
+        temp_index
+            .add_documents(documents!([
+                { "id": 1, "name": "split this world westfali westfalia the Ŵôřlḑôle" },
+                { "id": 2, "name": "Westfália" },
+                { "id": 3, "name": "Ŵôřlḑôle" },
+            ]))
+            .unwrap();
+        temp_index
+    }
+
+    #[test]
+    fn start_with_hard_separator() -> Result<()> {
+        let tokenizer = TokenizerBuilder::new().build();
+        let tokens = tokenizer.tokenize(".");
+        let index = temp_index_with_documents();
+        let rtxn = index.read_txn()?;
+        let mut ctx = SearchContext::new(&index, &rtxn);
+        // panics with `attempt to add with overflow` before <https://github.com/meilisearch/meilisearch/issues/3785>
+        let located_query_terms = located_query_terms_from_tokens(&mut ctx, tokens, None)?;
+        assert!(located_query_terms.is_empty());
+        Ok(())
+    }
+}
--- a/Show More
+++ b/Show More