Merge #4408

4408: Disable incremental facet update as a stop gap r=Kerollmops a=dureuill # Pull Request Temporarily disable incremental facet update to fix issue related with out-of-order sorts. ## Related issue Workaround for #4409 ## What does this PR do? - Always use bulk insert to insert facet values Co-authored-by: Louis Dureuil <louis@meilisearch.com>
Disable incremental facet update as a stop gap
2025-07-19 04:50:37 +00:00 · 2024-02-13 14:47:34 +00:00 · 2024-02-13 15:17:43 +01:00 · 2024-02-13 14:06:36 +00:00 · 2024-02-13 13:59:21 +00:00 · 2024-01-31 14:52:42 +00:00
325 changed files with 14699 additions and 11598 deletions
--- a/.github/ISSUE_TEMPLATE/sprint_issue.md
+++ b/.github/ISSUE_TEMPLATE/sprint_issue.md
@ -7,19 +7,17 @@ assignees: ''

 ---

-Related product team resources: [roadmap card]() (_internal only_) and [PRD]() (_internal only_)
+Related product team resources: [PRD]() (_internal only_)
 Related product discussion:
 Related spec: WIP

 ## Motivation

-<!---Copy/paste the information in the roadmap resources or briefly detail the product motivation. Ask product team if any hesitation.-->
+<!---Copy/paste the information in PRD or briefly detail the product motivation. Ask product team if any hesitation.-->

 ## Usage

-<!---Write a quick description of the usage if the usage has already been defined-->
-
-Refer to the final spec to know the details and the final decisions about the usage.
+<!---Link to the public part of the PRD, or to the related product discussion for experimental features-->

 ## TODO

--- a/.github/workflows/benchmarks-manual.yml
+++ b/.github/workflows/benchmarks-manual.yml
@ -74,4 +74,4 @@ jobs:
          echo "${{ steps.file.outputs.basename }}.json has just been pushed."
          echo 'How to compare this benchmark with another one?'
          echo '  - Check the available files with: ./benchmarks/scripts/list.sh'
-          echo "  - Run the following command: ./benchmaks/scipts/compare.sh <file-to-compare-with> ${{ steps.file.outputs.basename }}.json"
+          echo "  - Run the following command: ./benchmaks/scripts/compare.sh <file-to-compare-with> ${{ steps.file.outputs.basename }}.json"
--- a/.github/workflows/benchmarks-pr.yml
+++ b/.github/workflows/benchmarks-pr.yml
@ -0,0 +1,98 @@
+name: Benchmarks (PR)
+on: issue_comment
+permissions:
+  issues: write
+
+env:
+  GH_TOKEN: ${{ secrets.MEILI_BOT_GH_PAT }}
+
+jobs:
+  run-benchmarks-on-comment:
+    if: startsWith(github.event.comment.body, '/benchmark')
+    name: Run and upload benchmarks
+    runs-on: benchmarks
+    timeout-minutes: 4320 # 72h
+    steps:
+      - uses: actions-rs/toolchain@v1
+        with:
+          profile: minimal
+          toolchain: stable
+          override: true
+
+      - name: Check for Command
+        id: command
+        uses: xt0rted/slash-command-action@v2
+        with:
+          command: benchmark
+          reaction-type: "eyes"
+          repo-token: ${{ env.GH_TOKEN }}
+
+      - uses: xt0rted/pull-request-comment-branch@v2
+        id: comment-branch
+        with:
+          repo_token: ${{ env.GH_TOKEN }}
+
+      - uses: actions/checkout@v3
+        if: success()
+        with:
+          fetch-depth: 0 # fetch full history to be able to get main commit sha
+          ref: ${{ steps.comment-branch.outputs.head_ref }}
+
+      # Set variables
+      - name: Set current branch name
+        shell: bash
+        run: echo "name=$(git rev-parse --abbrev-ref HEAD)" >> $GITHUB_OUTPUT
+        id: current_branch
+      - name: Set normalized current branch name # Replace `/` by `_` in branch name to avoid issues when pushing to S3
+        shell: bash
+        run: echo "name=$(git rev-parse --abbrev-ref HEAD | tr '/' '_')" >> $GITHUB_OUTPUT
+        id: normalized_current_branch
+      - name: Set shorter commit SHA
+        shell: bash
+        run: echo "short=$(echo $GITHUB_SHA | cut -c1-8)" >> $GITHUB_OUTPUT
+        id: commit_sha
+      - name: Set file basename with format "dataset_branch_commitSHA"
+        shell: bash
+        run: echo "basename=$(echo ${{ steps.command.outputs.command-arguments }}_${{ steps.normalized_current_branch.outputs.name }}_${{ steps.commit_sha.outputs.short }})" >> $GITHUB_OUTPUT
+        id: file
+
+      # Run benchmarks
+      - name: Run benchmarks - Dataset ${{ steps.command.outputs.command-arguments }} - Branch ${{ steps.current_branch.outputs.name }} - Commit ${{ steps.commit_sha.outputs.short }}
+        run: |
+          cd benchmarks
+          cargo bench --bench ${{ steps.command.outputs.command-arguments }} -- --save-baseline ${{ steps.file.outputs.basename }}
+
+      # Generate critcmp files
+      - name: Install critcmp
+        uses: taiki-e/install-action@v2
+        with:
+          tool: critcmp
+      - name: Export cripcmp file
+        run: |
+          critcmp --export ${{ steps.file.outputs.basename }} > ${{ steps.file.outputs.basename }}.json
+
+      # Upload benchmarks
+      - name: Upload ${{ steps.file.outputs.basename }}.json to DO Spaces # DigitalOcean Spaces = S3
+        uses: BetaHuhn/do-spaces-action@v2
+        with:
+          access_key: ${{ secrets.DO_SPACES_ACCESS_KEY }}
+          secret_key: ${{ secrets.DO_SPACES_SECRET_KEY }}
+          space_name: ${{ secrets.DO_SPACES_SPACE_NAME }}
+          space_region: ${{ secrets.DO_SPACES_SPACE_REGION }}
+          source: ${{ steps.file.outputs.basename }}.json
+          out_dir: critcmp_results
+
+      # Compute the diff of the benchmarks and send a message on the GitHub PR
+      - name: Compute and send a message in the PR
+        env:
+          GITHUB_TOKEN: ${{ secrets.MEILI_BOT_GH_PAT }}
+        run: |
+          set -x
+          export base_ref=$(git merge-base origin/main ${{ steps.comment-branch.outputs.head_ref }} | head -c8)
+          export base_filename=$(echo ${{ steps.command.outputs.command-arguments }}_main_${base_ref}.json)
+          export bench_name=$(echo ${{ steps.command.outputs.command-arguments }})
+          echo "Here are your $bench_name benchmarks diff 👊" >> body.txt
+          echo '```' >> body.txt
+          ./benchmarks/scripts/compare.sh $base_filename ${{ steps.file.outputs.basename }}.json >> body.txt
+          echo '```' >> body.txt
+          gh pr comment ${{ steps.current_branch.outputs.name }} --body-file body.txt
--- a/.github/workflows/publish-apt-brew-pkg.yml
+++ b/.github/workflows/publish-apt-brew-pkg.yml
@ -50,7 +50,7 @@ jobs:
    needs: check-version
    steps:
      - name: Create PR to Homebrew
-        uses: mislav/bump-homebrew-formula-action@v2
+        uses: mislav/bump-homebrew-formula-action@v3
        with:
          formula-name: meilisearch
          formula-path: Formula/m/meilisearch.rb
--- a/.github/workflows/publish-docker-images.yml
+++ b/.github/workflows/publish-docker-images.yml
@ -57,20 +57,20 @@ jobs:
          echo "date=$commit_date" >> $GITHUB_OUTPUT

      - name: Set up QEMU
-        uses: docker/setup-qemu-action@v2
+        uses: docker/setup-qemu-action@v3

      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v2
+        uses: docker/setup-buildx-action@v3

      - name: Login to Docker Hub
-        uses: docker/login-action@v2
+        uses: docker/login-action@v3
        with:
          username: ${{ secrets.DOCKERHUB_USERNAME }}
          password: ${{ secrets.DOCKERHUB_TOKEN }}

      - name: Docker meta
        id: meta
-        uses: docker/metadata-action@v4
+        uses: docker/metadata-action@v5
        with:
          images: getmeili/meilisearch
          # Prevent `latest` to be updated for each new tag pushed.
@ -83,7 +83,7 @@ jobs:
            type=raw,value=latest,enable=${{ steps.check-tag-format.outputs.stable == 'true' && steps.check-tag-format.outputs.latest == 'true' }}

      - name: Build and push
-        uses: docker/build-push-action@v4
+        uses: docker/build-push-action@v5
        with:
          push: true
          platforms: linux/amd64,linux/arm64
--- a/.github/workflows/sdks-tests.yml
+++ b/.github/workflows/sdks-tests.yml
@ -160,7 +160,7 @@ jobs:
        with:
          repository: meilisearch/meilisearch-js
      - name: Setup node
-        uses: actions/setup-node@v3
+        uses: actions/setup-node@v4
        with:
          cache: 'yarn'
      - name: Install dependencies
@ -318,7 +318,7 @@ jobs:
        with:
          repository: meilisearch/meilisearch-js-plugins
      - name: Setup node
-        uses: actions/setup-node@v3
+        uses: actions/setup-node@v4
        with:
          cache: yarn
      - name: Install dependencies
--- a/.github/workflows/test-suite.yml
+++ b/.github/workflows/test-suite.yml
@ -43,7 +43,7 @@ jobs:
          toolchain: nightly
          override: true
      - name: Cache dependencies
-        uses: Swatinem/rust-cache@v2.6.2
+        uses: Swatinem/rust-cache@v2.7.1
      - name: Run cargo check without any default features
        uses: actions-rs/cargo@v1
        with:
@ -65,7 +65,7 @@ jobs:
    steps:
      - uses: actions/checkout@v3
      - name: Cache dependencies
-        uses: Swatinem/rust-cache@v2.6.2
+        uses: Swatinem/rust-cache@v2.7.1
      - name: Run cargo check without any default features
        uses: actions-rs/cargo@v1
        with:
@ -149,7 +149,7 @@ jobs:
          toolchain: stable
          override: true
      - name: Cache dependencies
-        uses: Swatinem/rust-cache@v2.6.2
+        uses: Swatinem/rust-cache@v2.7.1
      - name: Run tests in debug
        uses: actions-rs/cargo@v1
        with:
@ -168,7 +168,7 @@ jobs:
          override: true
          components: clippy
      - name: Cache dependencies
-        uses: Swatinem/rust-cache@v2.6.2
+        uses: Swatinem/rust-cache@v2.7.1
      - name: Run cargo clippy
        uses: actions-rs/cargo@v1
        with:
@ -187,7 +187,7 @@ jobs:
          override: true
          components: rustfmt
      - name: Cache dependencies
-        uses: Swatinem/rust-cache@v2.6.2
+        uses: Swatinem/rust-cache@v2.7.1
      - name: Run cargo fmt
        # Since we never ran the `build.rs` script in the benchmark directory we are missing one auto-generated import file.
        # Since we want to trigger (and fail) this action as fast as possible, instead of building the benchmark crate
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@ -2,6 +2,7 @@
 resolver = "2"
 members = [
    "meilisearch",
+    "meilitool",
    "meilisearch-types",
    "meilisearch-auth",
    "meili-snap",
@ -18,7 +19,7 @@ members = [
 ]

 [workspace.package]
-version = "1.4.0"
+version = "1.6.2"
 authors = ["Quentin de Quelen <quentin@dequelen.me>", "Clément Renault <clement@meilisearch.com>"]
 description = "Meilisearch HTTP server"
 homepage = "https://meilisearch.com"
--- a/13
+++ b/13
@ -1,9 +1,9 @@
 # Compile
-FROM    rust:alpine3.16 AS compiler
+FROM    rust:1.71.1-alpine3.18 AS compiler

 RUN     apk add -q --update-cache --no-cache build-base openssl-dev

-WORKDIR /meilisearch
+WORKDIR /

 ARG     COMMIT_SHA
 ARG     COMMIT_DATE
@ -17,7 +17,7 @@ RUN     set -eux; \
        if [ "$apkArch" = "aarch64" ]; then \
            export JEMALLOC_SYS_WITH_LG_PAGE=16; \
        fi && \
-        cargo build --release
+        cargo build --release -p meilisearch -p meilitool

 # Run
 FROM    alpine:3.16
@ -28,9 +28,10 @@ ENV     MEILI_SERVER_PROVIDER docker
 RUN     apk update --quiet \
        && apk add -q --no-cache libgcc tini curl

-# add meilisearch to the `/bin` so you can run it from anywhere and it's easy
-# to find.
-COPY    --from=compiler /meilisearch/target/release/meilisearch /bin/meilisearch
+# add meilisearch and meilitool to the `/bin` so you can run it from anywhere
+# and it's easy to find.
+COPY    --from=compiler /target/release/meilisearch /bin/meilisearch
+COPY    --from=compiler /target/release/meilitool /bin/meilitool
 # To stay compatible with the older version of the container (pre v0.27.0) we're
 # going to symlink the meilisearch binary in the path to `/meilisearch`
 RUN     ln -s /bin/meilisearch /meilisearch
--- a/PROFILING.md
+++ b/PROFILING.md
@ -1,14 +1,14 @@
 # Profiling Meilisearch

-Search engine technologies are complex pieces of software that require thorough profiling tools. We chose to use [Puffin](https://github.com/EmbarkStudios/puffin), which the Rust gaming industry uses extensively. You can export and import the profiling reports using the top bar's _File_ menu options.
+Search engine technologies are complex pieces of software that require thorough profiling tools. We chose to use [Puffin](https://github.com/EmbarkStudios/puffin), which the Rust gaming industry uses extensively. You can export and import the profiling reports using the top bar's _File_ menu options [in Puffin Viewer](https://github.com/embarkstudios/puffin#ui).

 ![An example profiling with Puffin viewer](assets/profiling-example.png)

 ## Profiling the Indexing Process

-When you enable the `profile-with-puffin` feature of Meilisearch, a Puffin HTTP server will run on Meilisearch and listen on the default _0.0.0.0:8585_ address. This server will record a "frame" whenever it executes the `IndexScheduler::tick` method.
+When you enable [the `exportPuffinReports` experimental feature](https://www.meilisearch.com/docs/learn/experimental/overview) of Meilisearch, Puffin reports with the `.puffin` extension will be automatically exported to disk. When this option is enabled, the engine will automatically create a "frame" whenever it executes the `IndexScheduler::tick` method.

-Once your Meilisearch is running and awaits new indexation operations, you must [install and run the `puffin_viewer` tool](https://github.com/EmbarkStudios/puffin/tree/main/puffin_viewer) to see the profiling results. I advise you to run the viewer with the `RUST_LOG=puffin_http::client=debug` environment variable to see the client trying to connect to your server.
+[Puffin Viewer](https://github.com/EmbarkStudios/puffin/tree/main/puffin_viewer) is used to analyze the reports. Those reports show areas where Meilisearch spent time during indexing.

 Another piece of advice on the Puffin viewer UI interface is to consider the _Merge children with same ID_ option. It can hide the exact actual timings at which events were sent. Please turn it off when you see strange gaps on the Flamegraph. It can help.

--- a/benchmarks/benches/indexing.rs
+++ b/benchmarks/benches/indexing.rs
@ -6,9 +6,7 @@ use std::path::Path;

 use criterion::{criterion_group, criterion_main, Criterion};
 use milli::heed::{EnvOpenOptions, RwTxn};
-use milli::update::{
-    DeleteDocuments, IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings,
-};
+use milli::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings};
 use milli::Index;
 use rand::seq::SliceRandom;
 use rand_chacha::rand_core::SeedableRng;
@ -38,7 +36,7 @@ fn setup_index() -> Index {
 }

 fn setup_settings<'t>(
-    wtxn: &mut RwTxn<'t, '_>,
+    wtxn: &mut RwTxn<'t>,
    index: &'t Index,
    primary_key: &str,
    searchable_fields: &[&str],
@ -266,17 +264,7 @@ fn deleting_songs_in_batches_default(c: &mut Criterion) {
                (index, document_ids_to_delete)
            },
            move |(index, document_ids_to_delete)| {
-                let mut wtxn = index.write_txn().unwrap();
-
-                for ids in document_ids_to_delete {
-                    let mut builder = DeleteDocuments::new(&mut wtxn, &index).unwrap();
-                    builder.delete_documents(&ids);
-                    builder.execute().unwrap();
-                }
-
-                wtxn.commit().unwrap();
-
-                index.prepare_for_closing().wait();
+                delete_documents_from_ids(index, document_ids_to_delete)
            },
        )
    });
@ -613,17 +601,7 @@ fn deleting_wiki_in_batches_default(c: &mut Criterion) {
                (index, document_ids_to_delete)
            },
            move |(index, document_ids_to_delete)| {
-                let mut wtxn = index.write_txn().unwrap();
-
-                for ids in document_ids_to_delete {
-                    let mut builder = DeleteDocuments::new(&mut wtxn, &index).unwrap();
-                    builder.delete_documents(&ids);
-                    builder.execute().unwrap();
-                }
-
-                wtxn.commit().unwrap();
-
-                index.prepare_for_closing().wait();
+                delete_documents_from_ids(index, document_ids_to_delete)
            },
        )
    });
@ -875,22 +853,31 @@ fn deleting_movies_in_batches_default(c: &mut Criterion) {
                (index, document_ids_to_delete)
            },
            move |(index, document_ids_to_delete)| {
-                let mut wtxn = index.write_txn().unwrap();
-
-                for ids in document_ids_to_delete {
-                    let mut builder = DeleteDocuments::new(&mut wtxn, &index).unwrap();
-                    builder.delete_documents(&ids);
-                    builder.execute().unwrap();
-                }
-
-                wtxn.commit().unwrap();
-
-                index.prepare_for_closing().wait();
+                delete_documents_from_ids(index, document_ids_to_delete)
            },
        )
    });
 }

+fn delete_documents_from_ids(index: Index, document_ids_to_delete: Vec<RoaringBitmap>) {
+    let mut wtxn = index.write_txn().unwrap();
+
+    let indexer_config = IndexerConfig::default();
+    for ids in document_ids_to_delete {
+        let config = IndexDocumentsConfig::default();
+
+        let mut builder =
+            IndexDocuments::new(&mut wtxn, &index, &indexer_config, config, |_| (), || false)
+                .unwrap();
+        (builder, _) = builder.remove_documents_from_db_no_batch(&ids).unwrap();
+        builder.execute().unwrap();
+    }
+
+    wtxn.commit().unwrap();
+
+    index.prepare_for_closing().wait();
+}
+
 fn indexing_movies_in_three_batches(c: &mut Criterion) {
    let mut group = c.benchmark_group("indexing");
    group.sample_size(BENCHMARK_ITERATION);
@ -1112,17 +1099,7 @@ fn deleting_nested_movies_in_batches_default(c: &mut Criterion) {
                (index, document_ids_to_delete)
            },
            move |(index, document_ids_to_delete)| {
-                let mut wtxn = index.write_txn().unwrap();
-
-                for ids in document_ids_to_delete {
-                    let mut builder = DeleteDocuments::new(&mut wtxn, &index).unwrap();
-                    builder.delete_documents(&ids);
-                    builder.execute().unwrap();
-                }
-
-                wtxn.commit().unwrap();
-
-                index.prepare_for_closing().wait();
+                delete_documents_from_ids(index, document_ids_to_delete)
            },
        )
    });
@ -1338,17 +1315,7 @@ fn deleting_geo_in_batches_default(c: &mut Criterion) {
                (index, document_ids_to_delete)
            },
            move |(index, document_ids_to_delete)| {
-                let mut wtxn = index.write_txn().unwrap();
-
-                for ids in document_ids_to_delete {
-                    let mut builder = DeleteDocuments::new(&mut wtxn, &index).unwrap();
-                    builder.delete_documents(&ids);
-                    builder.execute().unwrap();
-                }
-
-                wtxn.commit().unwrap();
-
-                index.prepare_for_closing().wait();
+                delete_documents_from_ids(index, document_ids_to_delete)
            },
        )
    });
--- a/config.toml
+++ b/config.toml
@ -129,3 +129,6 @@ experimental_enable_metrics = false

 # Experimental RAM reduction during indexing, do not use in production, see: <https://github.com/meilisearch/product/discussions/652>
 experimental_reduce_indexing_memory_usage = false
+
+# Experimentally reduces the maximum number of tasks that will be processed at once, see: <https://github.com/orgs/meilisearch/discussions/713>
+# experimental_max_number_of_batched_tasks = 100
--- a/dump/src/lib.rs
+++ b/dump/src/lib.rs
@ -267,6 +267,7 @@ pub(crate) mod test {
            dictionary: Setting::NotSet,
            synonyms: Setting::NotSet,
            distinct_attribute: Setting::NotSet,
+            proximity_precision: Setting::NotSet,
            typo_tolerance: Setting::NotSet,
            faceting: Setting::Set(FacetingSettings {
                max_values_per_facet: Setting::Set(111),
@ -275,6 +276,7 @@ pub(crate) mod test {
                ),
            }),
            pagination: Setting::NotSet,
+            embedders: Setting::NotSet,
            _kind: std::marker::PhantomData,
        };
        settings.check()
--- a/dump/src/reader/compat/v5_to_v6.rs
+++ b/dump/src/reader/compat/v5_to_v6.rs
@ -345,6 +345,7 @@ impl<T> From<v5::Settings<T>> for v6::Settings<v6::Unchecked> {
            dictionary: v6::Setting::NotSet,
            synonyms: settings.synonyms.into(),
            distinct_attribute: settings.distinct_attribute.into(),
+            proximity_precision: v6::Setting::NotSet,
            typo_tolerance: match settings.typo_tolerance {
                v5::Setting::Set(typo) => v6::Setting::Set(v6::TypoTolerance {
                    enabled: typo.enabled.into(),
@ -377,6 +378,7 @@ impl<T> From<v5::Settings<T>> for v6::Settings<v6::Unchecked> {
                v5::Setting::Reset => v6::Setting::Reset,
                v5::Setting::NotSet => v6::Setting::NotSet,
            },
+            embedders: v6::Setting::NotSet,
            _kind: std::marker::PhantomData,
        }
    }
--- a/dump/src/reader/mod.rs
+++ b/dump/src/reader/mod.rs
@ -13,12 +13,12 @@ use crate::{Result, Version};

 mod compat;

-pub(self) mod v1;
-pub(self) mod v2;
-pub(self) mod v3;
-pub(self) mod v4;
-pub(self) mod v5;
-pub(self) mod v6;
+mod v1;
+mod v2;
+mod v3;
+mod v4;
+mod v5;
+mod v6;

 pub type Document = serde_json::Map<String, serde_json::Value>;
 pub type UpdateFile = dyn Iterator<Item = Result<Document>>;
@ -526,12 +526,12 @@ pub(crate) mod test {
        assert!(indexes.is_empty());

        // products
-        insta::assert_json_snapshot!(products.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
+        insta::assert_json_snapshot!(products.metadata(), @r###"
        {
          "uid": "products",
          "primaryKey": "sku",
-          "createdAt": "[now]",
-          "updatedAt": "[now]"
+          "createdAt": "2022-10-09T20:27:22.688964637Z",
+          "updatedAt": "2022-10-09T20:27:23.951017769Z"
        }
        "###);

@ -541,12 +541,12 @@ pub(crate) mod test {
        meili_snap::snapshot_hash!(format!("{:#?}", documents), @"548284a84de510f71e88e6cdea495cf5");

        // movies
-        insta::assert_json_snapshot!(movies.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
+        insta::assert_json_snapshot!(movies.metadata(), @r###"
        {
          "uid": "movies",
          "primaryKey": "id",
-          "createdAt": "[now]",
-          "updatedAt": "[now]"
+          "createdAt": "2022-10-09T20:27:22.197788495Z",
+          "updatedAt": "2022-10-09T20:28:01.93111053Z"
        }
        "###);

@ -571,12 +571,12 @@ pub(crate) mod test {
        meili_snap::snapshot_hash!(format!("{:#?}", documents), @"d751713988987e9331980363e24189ce");

        // spells
-        insta::assert_json_snapshot!(spells.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
+        insta::assert_json_snapshot!(spells.metadata(), @r###"
        {
          "uid": "dnd_spells",
          "primaryKey": "index",
-          "createdAt": "[now]",
-          "updatedAt": "[now]"
+          "createdAt": "2022-10-09T20:27:24.242683494Z",
+          "updatedAt": "2022-10-09T20:27:24.312809641Z"
        }
        "###);

@ -617,12 +617,12 @@ pub(crate) mod test {
        assert!(indexes.is_empty());

        // products
-        insta::assert_json_snapshot!(products.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
+        insta::assert_json_snapshot!(products.metadata(), @r###"
        {
          "uid": "products",
          "primaryKey": "sku",
-          "createdAt": "[now]",
-          "updatedAt": "[now]"
+          "createdAt": "2023-01-30T16:25:56.595257Z",
+          "updatedAt": "2023-01-30T16:25:58.70348Z"
        }
        "###);

@ -632,12 +632,12 @@ pub(crate) mod test {
        meili_snap::snapshot_hash!(format!("{:#?}", documents), @"548284a84de510f71e88e6cdea495cf5");

        // movies
-        insta::assert_json_snapshot!(movies.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
+        insta::assert_json_snapshot!(movies.metadata(), @r###"
        {
          "uid": "movies",
          "primaryKey": "id",
-          "createdAt": "[now]",
-          "updatedAt": "[now]"
+          "createdAt": "2023-01-30T16:25:56.192178Z",
+          "updatedAt": "2023-01-30T16:25:56.455714Z"
        }
        "###);

@ -647,12 +647,12 @@ pub(crate) mod test {
        meili_snap::snapshot_hash!(format!("{:#?}", documents), @"0227598af846e574139ee0b80e03a720");

        // spells
-        insta::assert_json_snapshot!(spells.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
+        insta::assert_json_snapshot!(spells.metadata(), @r###"
        {
          "uid": "dnd_spells",
          "primaryKey": "index",
-          "createdAt": "[now]",
-          "updatedAt": "[now]"
+          "createdAt": "2023-01-30T16:25:58.876405Z",
+          "updatedAt": "2023-01-30T16:25:59.079906Z"
        }
        "###);

--- a/dump/src/reader/snapshots/dumpreadertest__import_dump_v1-11.snap
+++ b/dump/src/reader/snapshots/dumpreadertest__import_dump_v1-11.snap
@ -1,24 +0,0 @@
---
-source: dump/src/reader/mod.rs
-expression: spells.settings().unwrap()
---
-{
-  "displayedAttributes": [
-    "*"
-  ],
-  "searchableAttributes": [
-    "*"
-  ],
-  "filterableAttributes": [],
-  "sortableAttributes": [],
-  "rankingRules": [
-    "typo",
-    "words",
-    "proximity",
-    "attribute",
-    "exactness"
-  ],
-  "stopWords": [],
-  "synonyms": {},
-  "distinctAttribute": null
-}
--- a/dump/src/reader/snapshots/dumpreadertest__import_dump_v1-5.snap
+++ b/dump/src/reader/snapshots/dumpreadertest__import_dump_v1-5.snap
@ -1,38 +0,0 @@
---
-source: dump/src/reader/mod.rs
-expression: products.settings().unwrap()
---
-{
-  "displayedAttributes": [
-    "*"
-  ],
-  "searchableAttributes": [
-    "*"
-  ],
-  "filterableAttributes": [],
-  "sortableAttributes": [],
-  "rankingRules": [
-    "typo",
-    "words",
-    "proximity",
-    "attribute",
-    "exactness"
-  ],
-  "stopWords": [],
-  "synonyms": {
-    "android": [
-      "phone",
-      "smartphone"
-    ],
-    "iphone": [
-      "phone",
-      "smartphone"
-    ],
-    "phone": [
-      "android",
-      "iphone",
-      "smartphone"
-    ]
-  },
-  "distinctAttribute": null
-}
--- a/dump/src/reader/snapshots/dumpreadertest__import_dump_v1-8.snap
+++ b/dump/src/reader/snapshots/dumpreadertest__import_dump_v1-8.snap
@ -1,31 +0,0 @@
---
-source: dump/src/reader/mod.rs
-expression: movies.settings().unwrap()
---
-{
-  "displayedAttributes": [
-    "*"
-  ],
-  "searchableAttributes": [
-    "*"
-  ],
-  "filterableAttributes": [
-    "genres",
-    "id"
-  ],
-  "sortableAttributes": [
-    "genres",
-    "id"
-  ],
-  "rankingRules": [
-    "typo",
-    "words",
-    "proximity",
-    "attribute",
-    "exactness",
-    "release_date:asc"
-  ],
-  "stopWords": [],
-  "synonyms": {},
-  "distinctAttribute": null
-}
--- a/dump/src/reader/v1/settings.rs
+++ b/dump/src/reader/v1/settings.rs
@ -56,8 +56,7 @@ pub enum RankingRule {
    Desc(String),
 }

-static ASC_DESC_REGEX: Lazy<Regex> =
-    Lazy::new(|| Regex::new(r#"(asc|desc)\(([\w_-]+)\)"#).unwrap());
+static ASC_DESC_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"(asc|desc)\(([\w_-]+)\)").unwrap());

 impl FromStr for RankingRule {
    type Err = ();
--- a/dump/src/reader/v2/mod.rs
+++ b/dump/src/reader/v2/mod.rs
@ -46,6 +46,7 @@ pub type Checked = settings::Checked;
 pub type Unchecked = settings::Unchecked;

 pub type Task = updates::UpdateEntry;
+pub type Kind = updates::UpdateMeta;

 // everything related to the errors
 pub type ResponseError = errors::ResponseError;
@ -107,8 +108,11 @@ impl V2Reader {
    pub fn indexes(&self) -> Result<impl Iterator<Item = Result<V2IndexReader>> + '_> {
        Ok(self.index_uuid.iter().map(|index| -> Result<_> {
            V2IndexReader::new(
-                index.uid.clone(),
                &self.dump.path().join("indexes").join(format!("index-{}", index.uuid)),
+                index,
+                BufReader::new(
+                    File::open(self.dump.path().join("updates").join("data.jsonl")).unwrap(),
+                ),
            )
        }))
    }
@ -143,16 +147,41 @@ pub struct V2IndexReader {
 }

 impl V2IndexReader {
-    pub fn new(name: String, path: &Path) -> Result<Self> {
+    pub fn new(path: &Path, index_uuid: &IndexUuid, tasks: BufReader<File>) -> Result<Self> {
        let meta = File::open(path.join("meta.json"))?;
        let meta: DumpMeta = serde_json::from_reader(meta)?;

+        let mut created_at = None;
+        let mut updated_at = None;
+
+        for line in tasks.lines() {
+            let task: Task = serde_json::from_str(&line?)?;
+            if !(task.uuid == index_uuid.uuid && task.is_finished()) {
+                continue;
+            }
+
+            let new_created_at = match task.update.meta() {
+                Kind::DocumentsAddition { .. } | Kind::Settings(_) => task.update.finished_at(),
+                _ => None,
+            };
+            let new_updated_at = task.update.finished_at();
+
+            if created_at.is_none() || created_at > new_created_at {
+                created_at = new_created_at;
+            }
+
+            if updated_at.is_none() || updated_at < new_updated_at {
+                updated_at = new_updated_at;
+            }
+        }
+
+        let current_time = OffsetDateTime::now_utc();
+
        let metadata = IndexMetadata {
-            uid: name,
+            uid: index_uuid.uid.clone(),
            primary_key: meta.primary_key,
-            // FIXME: Iterate over the whole task queue to find the creation and last update date.
-            created_at: OffsetDateTime::now_utc(),
-            updated_at: OffsetDateTime::now_utc(),
+            created_at: created_at.unwrap_or(current_time),
+            updated_at: updated_at.unwrap_or(current_time),
        };

        let ret = V2IndexReader {
@ -248,12 +277,12 @@ pub(crate) mod test {
        assert!(indexes.is_empty());

        // products
-        insta::assert_json_snapshot!(products.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
+        insta::assert_json_snapshot!(products.metadata(), @r###"
        {
          "uid": "products",
          "primaryKey": "sku",
-          "createdAt": "[now]",
-          "updatedAt": "[now]"
+          "createdAt": "2022-10-09T20:27:22.688964637Z",
+          "updatedAt": "2022-10-09T20:27:23.951017769Z"
        }
        "###);

@ -263,12 +292,12 @@ pub(crate) mod test {
        meili_snap::snapshot_hash!(format!("{:#?}", documents), @"548284a84de510f71e88e6cdea495cf5");

        // movies
-        insta::assert_json_snapshot!(movies.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
+        insta::assert_json_snapshot!(movies.metadata(), @r###"
        {
          "uid": "movies",
          "primaryKey": "id",
-          "createdAt": "[now]",
-          "updatedAt": "[now]"
+          "createdAt": "2022-10-09T20:27:22.197788495Z",
+          "updatedAt": "2022-10-09T20:28:01.93111053Z"
        }
        "###);

@ -293,12 +322,12 @@ pub(crate) mod test {
        meili_snap::snapshot_hash!(format!("{:#?}", documents), @"d751713988987e9331980363e24189ce");

        // spells
-        insta::assert_json_snapshot!(spells.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
+        insta::assert_json_snapshot!(spells.metadata(), @r###"
        {
          "uid": "dnd_spells",
          "primaryKey": "index",
-          "createdAt": "[now]",
-          "updatedAt": "[now]"
+          "createdAt": "2022-10-09T20:27:24.242683494Z",
+          "updatedAt": "2022-10-09T20:27:24.312809641Z"
        }
        "###);

@ -340,12 +369,12 @@ pub(crate) mod test {
        assert!(indexes.is_empty());

        // products
-        insta::assert_json_snapshot!(products.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
+        insta::assert_json_snapshot!(products.metadata(), @r###"
        {
          "uid": "products",
          "primaryKey": "sku",
-          "createdAt": "[now]",
-          "updatedAt": "[now]"
+          "createdAt": "2023-01-30T16:25:56.595257Z",
+          "updatedAt": "2023-01-30T16:25:58.70348Z"
        }
        "###);

@ -355,12 +384,12 @@ pub(crate) mod test {
        meili_snap::snapshot_hash!(format!("{:#?}", documents), @"548284a84de510f71e88e6cdea495cf5");

        // movies
-        insta::assert_json_snapshot!(movies.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
+        insta::assert_json_snapshot!(movies.metadata(), @r###"
        {
          "uid": "movies",
          "primaryKey": "id",
-          "createdAt": "[now]",
-          "updatedAt": "[now]"
+          "createdAt": "2023-01-30T16:25:56.192178Z",
+          "updatedAt": "2023-01-30T16:25:56.455714Z"
        }
        "###);

@ -370,12 +399,12 @@ pub(crate) mod test {
        meili_snap::snapshot_hash!(format!("{:#?}", documents), @"0227598af846e574139ee0b80e03a720");

        // spells
-        insta::assert_json_snapshot!(spells.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
+        insta::assert_json_snapshot!(spells.metadata(), @r###"
        {
          "uid": "dnd_spells",
          "primaryKey": "index",
-          "createdAt": "[now]",
-          "updatedAt": "[now]"
+          "createdAt": "2023-01-30T16:25:58.876405Z",
+          "updatedAt": "2023-01-30T16:25:59.079906Z"
        }
        "###);

--- a/dump/src/reader/v2/updates.rs
+++ b/dump/src/reader/v2/updates.rs
@ -227,4 +227,14 @@ impl UpdateStatus {
            _ => None,
        }
    }
+
+    pub fn finished_at(&self) -> Option<OffsetDateTime> {
+        match self {
+            UpdateStatus::Processing(_) => None,
+            UpdateStatus::Enqueued(_) => None,
+            UpdateStatus::Processed(u) => Some(u.processed_at),
+            UpdateStatus::Aborted(_) => None,
+            UpdateStatus::Failed(u) => Some(u.failed_at),
+        }
+    }
 }
--- a/filter-parser/src/lib.rs
+++ b/filter-parser/src/lib.rs
@ -564,10 +564,10 @@ pub mod tests {

    #[test]
    fn parse_escaped() {
-        insta::assert_display_snapshot!(p(r#"title = 'foo\\'"#), @r#"{title} = {foo\}"#);
-        insta::assert_display_snapshot!(p(r#"title = 'foo\\\\'"#), @r#"{title} = {foo\\}"#);
-        insta::assert_display_snapshot!(p(r#"title = 'foo\\\\\\'"#), @r#"{title} = {foo\\\}"#);
-        insta::assert_display_snapshot!(p(r#"title = 'foo\\\\\\\\'"#), @r#"{title} = {foo\\\\}"#);
+        insta::assert_display_snapshot!(p(r"title = 'foo\\'"), @r#"{title} = {foo\}"#);
+        insta::assert_display_snapshot!(p(r"title = 'foo\\\\'"), @r#"{title} = {foo\\}"#);
+        insta::assert_display_snapshot!(p(r"title = 'foo\\\\\\'"), @r#"{title} = {foo\\\}"#);
+        insta::assert_display_snapshot!(p(r"title = 'foo\\\\\\\\'"), @r#"{title} = {foo\\\\}"#);
        // but it also works with other sequencies
        insta::assert_display_snapshot!(p(r#"title = 'foo\x20\n\t\"\'"'"#), @"{title} = {foo \n\t\"\'\"}");
    }
--- a/filter-parser/src/value.rs
+++ b/filter-parser/src/value.rs
@ -270,8 +270,8 @@ pub mod test {
            ("aaaa", "", rtok("", "aaaa"), "aaaa"),
            (r#"aa"aa"#, r#""aa"#, rtok("", "aa"), "aa"),
            (r#"aa\"aa"#, r#""#, rtok("", r#"aa\"aa"#), r#"aa"aa"#),
-            (r#"aa\\\aa"#, r#""#, rtok("", r#"aa\\\aa"#), r#"aa\\\aa"#),
-            (r#"aa\\"\aa"#, r#""\aa"#, rtok("", r#"aa\\"#), r#"aa\\"#),
+            (r"aa\\\aa", r#""#, rtok("", r"aa\\\aa"), r"aa\\\aa"),
+            (r#"aa\\"\aa"#, r#""\aa"#, rtok("", r"aa\\"), r"aa\\"),
            (r#"aa\\\"\aa"#, r#""#, rtok("", r#"aa\\\"\aa"#), r#"aa\\"\aa"#),
            (r#"\"\""#, r#""#, rtok("", r#"\"\""#), r#""""#),
        ];
@ -301,12 +301,12 @@ pub mod test {
        );
        // simple quote
        assert_eq!(
-            unescape(Span::new_extra(r#"Hello \'World\'"#, ""), '\''),
+            unescape(Span::new_extra(r"Hello \'World\'", ""), '\''),
            r#"Hello 'World'"#.to_string()
        );
        assert_eq!(
-            unescape(Span::new_extra(r#"Hello \\\'World\\\'"#, ""), '\''),
-            r#"Hello \\'World\\'"#.to_string()
+            unescape(Span::new_extra(r"Hello \\\'World\\\'", ""), '\''),
+            r"Hello \\'World\\'".to_string()
        );
    }

@ -335,19 +335,19 @@ pub mod test {
            ("\"cha'nnel\"", "cha'nnel", false),
            ("I'm tamo", "I", false),
            // escaped thing but not quote
-            (r#""\\""#, r#"\"#, true),
-            (r#""\\\\\\""#, r#"\\\"#, true),
-            (r#""aa\\aa""#, r#"aa\aa"#, true),
+            (r#""\\""#, r"\", true),
+            (r#""\\\\\\""#, r"\\\", true),
+            (r#""aa\\aa""#, r"aa\aa", true),
            // with double quote
            (r#""Hello \"world\"""#, r#"Hello "world""#, true),
            (r#""Hello \\\"world\\\"""#, r#"Hello \"world\""#, true),
            (r#""I'm \"super\" tamo""#, r#"I'm "super" tamo"#, true),
            (r#""\"\"""#, r#""""#, true),
            // with simple quote
-            (r#"'Hello \'world\''"#, r#"Hello 'world'"#, true),
-            (r#"'Hello \\\'world\\\''"#, r#"Hello \'world\'"#, true),
+            (r"'Hello \'world\''", r#"Hello 'world'"#, true),
+            (r"'Hello \\\'world\\\''", r"Hello \'world\'", true),
            (r#"'I\'m "super" tamo'"#, r#"I'm "super" tamo"#, true),
-            (r#"'\'\''"#, r#"''"#, true),
+            (r"'\'\''", r#"''"#, true),
        ];

        for (input, expected, escaped) in test_case {
--- a/fuzzers/src/bin/fuzz-indexing.rs
+++ b/fuzzers/src/bin/fuzz-indexing.rs
@ -113,7 +113,7 @@ fn main() {
                            index.documents(&wtxn, res.documents_ids).unwrap();
                            progression.fetch_add(1, Ordering::Relaxed);
                        }
-                        wtxn.abort().unwrap();
+                        wtxn.abort();
                    });
                    if let err @ Err(_) = handle.join() {
                        stop.store(true, Ordering::Relaxed);
--- a/index-scheduler/Cargo.toml
+++ b/index-scheduler/Cargo.toml
@ -18,11 +18,12 @@ derive_builder = "0.12.0"
 dump = { path = "../dump" }
 enum-iterator = "1.4.0"
 file-store = { path = "../file-store" }
+flate2 = "1.0.28"
 log = "0.4.17"
 meilisearch-auth = { path = "../meilisearch-auth" }
 meilisearch-types = { path = "../meilisearch-types" }
 page_size = "0.5.0"
-puffin = "0.16.0"
+puffin = { version = "0.16.0", features = ["serialization"] }
 roaring = { version = "0.10.1", features = ["serde"] }
 serde = { version = "1.0.160", features = ["derive"] }
 serde_json = { version = "1.0.95", features = ["preserve_order"] }
@ -30,6 +31,7 @@ synchronoise = "1.0.1"
 tempfile = "3.5.0"
 thiserror = "1.0.40"
 time = { version = "0.3.20", features = ["serde-well-known", "formatting", "parsing", "macros"] }
+ureq = "2.9.1"
 uuid = { version = "1.3.1", features = ["serde", "v4"] }

 [dev-dependencies]
--- a/index-scheduler/src/batch.rs
+++ b/index-scheduler/src/batch.rs
@ -19,20 +19,20 @@ one indexing operation.

 use std::collections::{BTreeSet, HashSet};
 use std::ffi::OsStr;
+use std::fmt;
 use std::fs::{self, File};
 use std::io::BufWriter;

 use dump::IndexMetadata;
-use log::{debug, error, info};
+use log::{debug, error, info, trace};
 use meilisearch_types::error::Code;
 use meilisearch_types::heed::{RoTxn, RwTxn};
 use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader};
 use meilisearch_types::milli::heed::CompactionOption;
 use meilisearch_types::milli::update::{
-    DeleteDocuments, DocumentDeletionResult, IndexDocumentsConfig, IndexDocumentsMethod,
-    Settings as MilliSettings,
+    IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig, Settings as MilliSettings,
 };
-use meilisearch_types::milli::{self, Filter, BEU32};
+use meilisearch_types::milli::{self, Filter};
 use meilisearch_types::settings::{apply_settings_to_builder, Settings, Unchecked};
 use meilisearch_types::tasks::{Details, IndexSwap, Kind, KindWithContent, Status, Task};
 use meilisearch_types::{compression, Index, VERSION_FILE_NAME};
@ -43,7 +43,7 @@ use uuid::Uuid;

 use crate::autobatcher::{self, BatchKind};
 use crate::utils::{self, swap_index_uid_in_task};
-use crate::{Error, IndexScheduler, ProcessingTasks, Result, TaskId};
+use crate::{Error, IndexScheduler, MustStopProcessing, ProcessingTasks, Result, TaskId};

 /// Represents a combination of tasks that can all be processed at the same time.
 ///
@ -104,12 +104,6 @@ pub(crate) enum IndexOperation {
        operations: Vec<DocumentOperation>,
        tasks: Vec<Task>,
    },
-    DocumentDeletion {
-        index_uid: String,
-        // The vec associated with each document deletion tasks.
-        documents: Vec<Vec<String>>,
-        tasks: Vec<Task>,
-    },
    IndexDocumentDeletionByFilter {
        index_uid: String,
        task: Task,
@ -161,7 +155,6 @@ impl Batch {
            }
            Batch::IndexOperation { op, .. } => match op {
                IndexOperation::DocumentOperation { tasks, .. }
-                | IndexOperation::DocumentDeletion { tasks, .. }
                | IndexOperation::Settings { tasks, .. }
                | IndexOperation::DocumentClear { tasks, .. } => {
                    tasks.iter().map(|task| task.uid).collect()
@ -199,11 +192,33 @@ impl Batch {
    }
 }

+impl fmt::Display for Batch {
+    /// A text used when we debug the profiling reports.
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        let index_uid = self.index_uid();
+        let tasks = self.ids();
+        match self {
+            Batch::TaskCancelation { .. } => f.write_str("TaskCancelation")?,
+            Batch::TaskDeletion(_) => f.write_str("TaskDeletion")?,
+            Batch::SnapshotCreation(_) => f.write_str("SnapshotCreation")?,
+            Batch::Dump(_) => f.write_str("Dump")?,
+            Batch::IndexOperation { op, .. } => write!(f, "{op}")?,
+            Batch::IndexCreation { .. } => f.write_str("IndexCreation")?,
+            Batch::IndexUpdate { .. } => f.write_str("IndexUpdate")?,
+            Batch::IndexDeletion { .. } => f.write_str("IndexDeletion")?,
+            Batch::IndexSwap { .. } => f.write_str("IndexSwap")?,
+        };
+        match index_uid {
+            Some(name) => f.write_fmt(format_args!(" on {name:?} from tasks: {tasks:?}")),
+            None => f.write_fmt(format_args!(" from tasks: {tasks:?}")),
+        }
+    }
+}
+
 impl IndexOperation {
    pub fn index_uid(&self) -> &str {
        match self {
            IndexOperation::DocumentOperation { index_uid, .. }
-            | IndexOperation::DocumentDeletion { index_uid, .. }
            | IndexOperation::IndexDocumentDeletionByFilter { index_uid, .. }
            | IndexOperation::DocumentClear { index_uid, .. }
            | IndexOperation::Settings { index_uid, .. }
@ -213,6 +228,27 @@ impl IndexOperation {
    }
 }

+impl fmt::Display for IndexOperation {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match self {
+            IndexOperation::DocumentOperation { .. } => {
+                f.write_str("IndexOperation::DocumentOperation")
+            }
+            IndexOperation::IndexDocumentDeletionByFilter { .. } => {
+                f.write_str("IndexOperation::IndexDocumentDeletionByFilter")
+            }
+            IndexOperation::DocumentClear { .. } => f.write_str("IndexOperation::DocumentClear"),
+            IndexOperation::Settings { .. } => f.write_str("IndexOperation::Settings"),
+            IndexOperation::DocumentClearAndSetting { .. } => {
+                f.write_str("IndexOperation::DocumentClearAndSetting")
+            }
+            IndexOperation::SettingsAndDocumentOperation { .. } => {
+                f.write_str("IndexOperation::SettingsAndDocumentOperation")
+            }
+        }
+    }
+}
+
 impl IndexScheduler {
    /// Convert an [`BatchKind`](crate::autobatcher::BatchKind) into a [`Batch`].
    ///
@ -300,18 +336,27 @@ impl IndexScheduler {
            BatchKind::DocumentDeletion { deletion_ids } => {
                let tasks = self.get_existing_tasks(rtxn, deletion_ids)?;

-                let mut documents = Vec::new();
+                let mut operations = Vec::with_capacity(tasks.len());
+                let mut documents_counts = Vec::with_capacity(tasks.len());
                for task in &tasks {
                    match task.kind {
                        KindWithContent::DocumentDeletion { ref documents_ids, .. } => {
-                            documents.push(documents_ids.clone())
+                            operations.push(DocumentOperation::Delete(documents_ids.clone()));
+                            documents_counts.push(documents_ids.len() as u64);
                        }
                        _ => unreachable!(),
                    }
                }

                Ok(Some(Batch::IndexOperation {
-                    op: IndexOperation::DocumentDeletion { index_uid, documents, tasks },
+                    op: IndexOperation::DocumentOperation {
+                        index_uid,
+                        primary_key: None,
+                        method: IndexDocumentsMethod::ReplaceDocuments,
+                        documents_counts,
+                        operations,
+                        tasks,
+                    },
                    must_create_index,
                }))
            }
@ -539,7 +584,9 @@ impl IndexScheduler {
        let index_tasks = self.index_tasks(rtxn, index_name)? & enqueued;

        // If autobatching is disabled we only take one task at a time.
-        let tasks_limit = if self.autobatching_enabled { usize::MAX } else { 1 };
+        // Otherwise, we take only a maximum of tasks to create batches.
+        let tasks_limit =
+            if self.autobatching_enabled { self.max_number_of_batched_tasks } else { 1 };

        let enqueued = index_tasks
            .into_iter()
@ -581,7 +628,7 @@ impl IndexScheduler {
            self.breakpoint(crate::Breakpoint::InsideProcessBatch);
        }

-        puffin::profile_function!(format!("{:?}", batch));
+        puffin::profile_function!(batch.to_string());

        match batch {
            Batch::TaskCancelation { mut task, previous_started_at, previous_processing_tasks } => {
@ -670,7 +717,7 @@ impl IndexScheduler {

                // 2. Snapshot the index-scheduler LMDB env
                //
-                // When we call copy_to_path, LMDB opens a read transaction by itself,
+                // When we call copy_to_file, LMDB opens a read transaction by itself,
                // we can't provide our own. It is an issue as we would like to know
                // the update files to copy but new ones can be enqueued between the copy
                // of the env and the new transaction we open to retrieve the enqueued tasks.
@ -683,7 +730,7 @@ impl IndexScheduler {
                // 2.1 First copy the LMDB env of the index-scheduler
                let dst = temp_snapshot_dir.path().join("tasks");
                fs::create_dir_all(&dst)?;
-                self.env.copy_to_path(dst.join("data.mdb"), CompactionOption::Enabled)?;
+                self.env.copy_to_file(dst.join("data.mdb"), CompactionOption::Enabled)?;

                // 2.2 Create a read transaction on the index-scheduler
                let rtxn = self.env.read_txn()?;
@ -708,7 +755,7 @@ impl IndexScheduler {
                    let index = self.index_mapper.index(&rtxn, name)?;
                    let dst = temp_snapshot_dir.path().join("indexes").join(uuid.to_string());
                    fs::create_dir_all(&dst)?;
-                    index.copy_to_path(dst.join("data.mdb"), CompactionOption::Enabled)?;
+                    index.copy_to_file(dst.join("data.mdb"), CompactionOption::Enabled)?;
                }

                drop(rtxn);
@ -721,7 +768,7 @@ impl IndexScheduler {
                    .map_size(1024 * 1024 * 1024) // 1 GiB
                    .max_dbs(2)
                    .open(&self.auth_path)?;
-                auth.copy_to_path(dst.join("data.mdb"), CompactionOption::Enabled)?;
+                auth.copy_to_file(dst.join("data.mdb"), CompactionOption::Enabled)?;

                // 5. Copy and tarball the flat snapshot
                // 5.1 Find the original name of the database
@ -777,6 +824,10 @@ impl IndexScheduler {
                // 2. dump the tasks
                let mut dump_tasks = dump.create_tasks_queue()?;
                for ret in self.all_tasks.iter(&rtxn)? {
+                    if self.must_stop_processing.get() {
+                        return Err(Error::AbortedTask);
+                    }
+
                    let (_, mut t) = ret?;
                    let status = t.status;
                    let content_file = t.content_uuid();
@ -797,6 +848,9 @@ impl IndexScheduler {

                    // 2.1. Dump the `content_file` associated with the task if there is one and the task is not finished yet.
                    if let Some(content_file) = content_file {
+                        if self.must_stop_processing.get() {
+                            return Err(Error::AbortedTask);
+                        }
                        if status == Status::Enqueued {
                            let content_file = self.file_store.get_update(content_file)?;

@ -836,6 +890,9 @@ impl IndexScheduler {

                    // 3.1. Dump the documents
                    for ret in index.all_documents(&rtxn)? {
+                        if self.must_stop_processing.get() {
+                            return Err(Error::AbortedTask);
+                        }
                        let (_id, doc) = ret?;
                        let document = milli::obkv_to_json(&all_fields, &fields_ids_map, doc)?;
                        index_dumper.push_document(&document)?;
@ -848,13 +905,16 @@ impl IndexScheduler {
                })?;

                // 4. Dump experimental feature settings
-                let features = self.features()?.runtime_features();
+                let features = self.features().runtime_features();
                dump.create_experimental_features(features)?;

                let dump_uid = started_at.format(format_description!(
                    "[year repr:full][month repr:numerical][day padding:zero]-[hour padding:zero][minute padding:zero][second padding:zero][subsecond digits:3]"
                )).unwrap();

+                if self.must_stop_processing.get() {
+                    return Err(Error::AbortedTask);
+                }
                let path = self.dumps_path.join(format!("{}.dump", dump_uid));
                let file = File::create(path)?;
                dump.persist_to(BufWriter::new(file))?;
@ -875,6 +935,10 @@ impl IndexScheduler {
                    self.index_mapper.index(&rtxn, &index_uid)?
                };

+                // the index operation can take a long time, so save this handle to make it available to the search for the duration of the tick
+                self.index_mapper
+                    .set_currently_updating_index(Some((index_uid.clone(), index.clone())));
+
                let mut index_wtxn = index.write_txn()?;
                let tasks = self.apply_index_operation(&mut index_wtxn, &index, op)?;
                index_wtxn.commit()?;
@ -1044,7 +1108,7 @@ impl IndexScheduler {
        for task_id in &index_lhs_task_ids | &index_rhs_task_ids {
            let mut task = self.get_task(wtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;
            swap_index_uid_in_task(&mut task, (lhs, rhs));
-            self.all_tasks.put(wtxn, &BEU32::new(task_id), &task)?;
+            self.all_tasks.put(wtxn, &task_id, &task)?;
        }

        // 4. remove the task from indexuid = before_name
@ -1070,7 +1134,7 @@ impl IndexScheduler {
    /// The list of processed tasks.
    fn apply_index_operation<'i>(
        &self,
-        index_wtxn: &mut RwTxn<'i, '_>,
+        index_wtxn: &mut RwTxn<'i>,
        index: &'i Index,
        operation: IndexOperation,
    ) -> Result<Vec<Task>> {
@ -1138,12 +1202,16 @@ impl IndexScheduler {

                let config = IndexDocumentsConfig { update_method: method, ..Default::default() };

+                let embedder_configs = index.embedding_configs(index_wtxn)?;
+                // TODO: consider Arc'ing the map too (we only need read access + we'll be cloning it multiple times, so really makes sense)
+                let embedders = self.embedders(embedder_configs)?;
+
                let mut builder = milli::update::IndexDocuments::new(
                    index_wtxn,
                    index,
                    indexer_config,
                    config,
-                    |indexing_step| debug!("update: {:?}", indexing_step),
+                    |indexing_step| trace!("update: {:?}", indexing_step),
                    || must_stop_processing.get(),
                )?;

@ -1156,6 +1224,8 @@ impl IndexScheduler {
                            let (new_builder, user_result) = builder.add_documents(reader)?;
                            builder = new_builder;

+                            builder = builder.with_embedders(embedders.clone());
+
                            let received_documents =
                                if let Some(Details::DocumentAdditionOrUpdate {
                                    received_documents,
@ -1190,7 +1260,8 @@ impl IndexScheduler {
                            let (new_builder, user_result) =
                                builder.remove_documents(document_ids)?;
                            builder = new_builder;
-
+                            // Uses Invariant: remove documents actually always returns Ok for the inner result
+                            let count = user_result.unwrap();
                            let provided_ids =
                                if let Some(Details::DocumentDeletion { provided_ids, .. }) =
                                    task.details
@ -1201,23 +1272,11 @@ impl IndexScheduler {
                                    unreachable!();
                                };

-                            match user_result {
-                                Ok(count) => {
-                                    task.status = Status::Succeeded;
-                                    task.details = Some(Details::DocumentDeletion {
-                                        provided_ids,
-                                        deleted_documents: Some(count),
-                                    });
-                                }
-                                Err(e) => {
-                                    task.status = Status::Failed;
-                                    task.details = Some(Details::DocumentDeletion {
-                                        provided_ids,
-                                        deleted_documents: Some(0),
-                                    });
-                                    task.error = Some(milli::Error::from(e).into());
-                                }
-                            }
+                            task.status = Status::Succeeded;
+                            task.details = Some(Details::DocumentDeletion {
+                                provided_ids,
+                                deleted_documents: Some(count),
+                            });
                        }
                    }
                }
@ -1232,31 +1291,13 @@ impl IndexScheduler {
                        milli::update::Settings::new(index_wtxn, index, indexer_config);
                    builder.reset_primary_key();
                    builder.execute(
-                        |indexing_step| debug!("update: {:?}", indexing_step),
+                        |indexing_step| trace!("update: {:?}", indexing_step),
                        || must_stop_processing.clone().get(),
                    )?;
                }

                Ok(tasks)
            }
-            IndexOperation::DocumentDeletion { index_uid: _, documents, mut tasks } => {
-                let mut builder = milli::update::DeleteDocuments::new(index_wtxn, index)?;
-                documents.iter().flatten().for_each(|id| {
-                    builder.delete_external_id(id);
-                });
-
-                let DocumentDeletionResult { deleted_documents, .. } = builder.execute()?;
-
-                for (task, documents) in tasks.iter_mut().zip(documents) {
-                    task.status = Status::Succeeded;
-                    task.details = Some(Details::DocumentDeletion {
-                        provided_ids: documents.len(),
-                        deleted_documents: Some(deleted_documents.min(documents.len() as u64)),
-                    });
-                }
-
-                Ok(tasks)
-            }
            IndexOperation::IndexDocumentDeletionByFilter { mut task, index_uid: _ } => {
                let filter =
                    if let KindWithContent::DocumentDeletionByFilter { filter_expr, .. } =
@ -1266,7 +1307,13 @@ impl IndexScheduler {
                    } else {
                        unreachable!()
                    };
-                let deleted_documents = delete_document_by_filter(index_wtxn, filter, index);
+                let deleted_documents = delete_document_by_filter(
+                    index_wtxn,
+                    filter,
+                    self.index_mapper.indexer_config(),
+                    self.must_stop_processing.clone(),
+                    index,
+                );
                let original_filter = if let Some(Details::DocumentDeletionByFilter {
                    original_filter,
                    deleted_documents: _,
@ -1440,10 +1487,9 @@ impl IndexScheduler {
        }

        for task in to_delete_tasks.iter() {
-            self.all_tasks.delete(wtxn, &BEU32::new(task))?;
+            self.all_tasks.delete(wtxn, &task)?;
        }
        for canceled_by in affected_canceled_by {
-            let canceled_by = BEU32::new(canceled_by);
            if let Some(mut tasks) = self.canceled_by.get(wtxn, &canceled_by)? {
                tasks -= &to_delete_tasks;
                if tasks.is_empty() {
@ -1491,15 +1537,17 @@ impl IndexScheduler {
            task.details = task.details.map(|d| d.to_failed());
            self.update_task(wtxn, &task)?;
        }
-        self.canceled_by.put(wtxn, &BEU32::new(cancel_task_id), &tasks_to_cancel)?;
+        self.canceled_by.put(wtxn, &cancel_task_id, &tasks_to_cancel)?;

        Ok(content_files_to_delete)
    }
 }

 fn delete_document_by_filter<'a>(
-    wtxn: &mut RwTxn<'a, '_>,
+    wtxn: &mut RwTxn<'a>,
    filter: &serde_json::Value,
+    indexer_config: &IndexerConfig,
+    must_stop_processing: MustStopProcessing,
    index: &'a Index,
 ) -> Result<u64> {
    let filter = Filter::from_json(filter)?;
@ -1510,9 +1558,26 @@ fn delete_document_by_filter<'a>(
            }
            e => e.into(),
        })?;
-        let mut delete_operation = DeleteDocuments::new(wtxn, index)?;
-        delete_operation.delete_documents(&candidates);
-        delete_operation.execute().map(|result| result.deleted_documents)?
+
+        let config = IndexDocumentsConfig {
+            update_method: IndexDocumentsMethod::ReplaceDocuments,
+            ..Default::default()
+        };
+
+        let mut builder = milli::update::IndexDocuments::new(
+            wtxn,
+            index,
+            indexer_config,
+            config,
+            |indexing_step| debug!("update: {:?}", indexing_step),
+            || must_stop_processing.get(),
+        )?;
+
+        let (new_builder, count) = builder.remove_documents_from_db_no_batch(&candidates)?;
+        builder = new_builder;
+
+        let _ = builder.execute()?;
+        count
    } else {
        0
    })
--- a/index-scheduler/src/error.rs
+++ b/index-scheduler/src/error.rs
@ -108,6 +108,8 @@ pub enum Error {
    TaskDeletionWithEmptyQuery,
    #[error("Query parameters to filter the tasks to cancel are missing. Available query parameters are: `uids`, `indexUids`, `statuses`, `types`, `canceledBy`, `beforeEnqueuedAt`, `afterEnqueuedAt`, `beforeStartedAt`, `afterStartedAt`, `beforeFinishedAt`, `afterFinishedAt`.")]
    TaskCancelationWithEmptyQuery,
+    #[error("Aborted task")]
+    AbortedTask,

    #[error(transparent)]
    Dump(#[from] dump::Error),
@ -175,6 +177,7 @@ impl Error {
            | Error::TaskNotFound(_)
            | Error::TaskDeletionWithEmptyQuery
            | Error::TaskCancelationWithEmptyQuery
+            | Error::AbortedTask
            | Error::Dump(_)
            | Error::Heed(_)
            | Error::Milli(_)
@ -236,6 +239,9 @@ impl ErrorCode for Error {
            Error::TaskDatabaseUpdate(_) => Code::Internal,
            Error::CreateBatch(_) => Code::Internal,

+            // This one should never be seen by the end user
+            Error::AbortedTask => Code::Internal,
+
            #[cfg(test)]
            Error::PlannedFailure => Code::Internal,
        }
--- a/index-scheduler/src/features.rs
+++ b/index-scheduler/src/features.rs
@ -1,6 +1,8 @@
+use std::sync::{Arc, RwLock};
+
 use meilisearch_types::features::{InstanceTogglableFeatures, RuntimeTogglableFeatures};
 use meilisearch_types::heed::types::{SerdeJson, Str};
-use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn};
+use meilisearch_types::heed::{Database, Env, RwTxn};

 use crate::error::FeatureNotEnabledError;
 use crate::Result;
@ -9,20 +11,19 @@ const EXPERIMENTAL_FEATURES: &str = "experimental-features";

 #[derive(Clone)]
 pub(crate) struct FeatureData {
-    runtime: Database<Str, SerdeJson<RuntimeTogglableFeatures>>,
-    instance: InstanceTogglableFeatures,
+    persisted: Database<Str, SerdeJson<RuntimeTogglableFeatures>>,
+    runtime: Arc<RwLock<RuntimeTogglableFeatures>>,
 }

 #[derive(Debug, Clone, Copy)]
 pub struct RoFeatures {
    runtime: RuntimeTogglableFeatures,
-    instance: InstanceTogglableFeatures,
 }

 impl RoFeatures {
-    fn new(txn: RoTxn<'_>, data: &FeatureData) -> Result<Self> {
-        let runtime = data.runtime_features(txn)?;
-        Ok(Self { runtime, instance: data.instance })
+    fn new(data: &FeatureData) -> Self {
+        let runtime = data.runtime_features();
+        Self { runtime }
    }

    pub fn runtime_features(&self) -> RuntimeTogglableFeatures {
@ -43,39 +44,60 @@ impl RoFeatures {
    }

    pub fn check_metrics(&self) -> Result<()> {
-        if self.instance.metrics {
+        if self.runtime.metrics {
            Ok(())
        } else {
            Err(FeatureNotEnabledError {
                disabled_action: "Getting metrics",
                feature: "metrics",
-                issue_link: "https://github.com/meilisearch/meilisearch/discussions/3518",
+                issue_link: "https://github.com/meilisearch/product/discussions/625",
            }
            .into())
        }
    }

-    pub fn check_vector(&self) -> Result<()> {
+    pub fn check_vector(&self, disabled_action: &'static str) -> Result<()> {
        if self.runtime.vector_store {
            Ok(())
        } else {
            Err(FeatureNotEnabledError {
-                disabled_action: "Passing `vector` as a query parameter",
+                disabled_action,
                feature: "vector store",
                issue_link: "https://github.com/meilisearch/product/discussions/677",
            }
            .into())
        }
    }
+
+    pub fn check_puffin(&self) -> Result<()> {
+        if self.runtime.export_puffin_reports {
+            Ok(())
+        } else {
+            Err(FeatureNotEnabledError {
+                disabled_action: "Outputting Puffin reports to disk",
+                feature: "export puffin reports",
+                issue_link: "https://github.com/meilisearch/product/discussions/693",
+            }
+            .into())
+        }
+    }
 }

 impl FeatureData {
    pub fn new(env: &Env, instance_features: InstanceTogglableFeatures) -> Result<Self> {
        let mut wtxn = env.write_txn()?;
-        let runtime_features = env.create_database(&mut wtxn, Some(EXPERIMENTAL_FEATURES))?;
+        let runtime_features_db = env.create_database(&mut wtxn, Some(EXPERIMENTAL_FEATURES))?;
        wtxn.commit()?;

-        Ok(Self { runtime: runtime_features, instance: instance_features })
+        let txn = env.read_txn()?;
+        let persisted_features: RuntimeTogglableFeatures =
+            runtime_features_db.get(&txn, EXPERIMENTAL_FEATURES)?.unwrap_or_default();
+        let runtime = Arc::new(RwLock::new(RuntimeTogglableFeatures {
+            metrics: instance_features.metrics || persisted_features.metrics,
+            ..persisted_features
+        }));
+
+        Ok(Self { persisted: runtime_features_db, runtime })
    }

    pub fn put_runtime_features(
@ -83,16 +105,25 @@ impl FeatureData {
        mut wtxn: RwTxn,
        features: RuntimeTogglableFeatures,
    ) -> Result<()> {
-        self.runtime.put(&mut wtxn, EXPERIMENTAL_FEATURES, &features)?;
+        self.persisted.put(&mut wtxn, EXPERIMENTAL_FEATURES, &features)?;
        wtxn.commit()?;
+
+        // safe to unwrap, the lock will only fail if:
+        // 1. requested by the same thread concurrently -> it is called and released in methods that don't call each other
+        // 2. there's a panic while the thread is held -> it is only used for an assignment here.
+        let mut toggled_features = self.runtime.write().unwrap();
+        *toggled_features = features;
        Ok(())
    }

-    fn runtime_features(&self, txn: RoTxn) -> Result<RuntimeTogglableFeatures> {
-        Ok(self.runtime.get(&txn, EXPERIMENTAL_FEATURES)?.unwrap_or_default())
+    fn runtime_features(&self) -> RuntimeTogglableFeatures {
+        // sound to unwrap, the lock will only fail if:
+        // 1. requested by the same thread concurrently -> it is called and released in methods that don't call each other
+        // 2. there's a panic while the thread is held -> it is only used for copying the data here
+        *self.runtime.read().unwrap()
    }

-    pub fn features(&self, txn: RoTxn) -> Result<RoFeatures> {
-        RoFeatures::new(txn, self)
+    pub fn features(&self) -> RoFeatures {
+        RoFeatures::new(self)
    }
 }
--- a/index-scheduler/src/index_mapper/index_map.rs
+++ b/index-scheduler/src/index_mapper/index_map.rs
@ -1,12 +1,8 @@
-/// the map size to use when we don't succeed in reading it in indexes.
-const DEFAULT_MAP_SIZE: usize = 10 * 1024 * 1024 * 1024; // 10 GiB
-
 use std::collections::BTreeMap;
 use std::path::Path;
 use std::time::Duration;

-use meilisearch_types::heed::flags::Flags;
-use meilisearch_types::heed::{EnvClosingEvent, EnvOpenOptions};
+use meilisearch_types::heed::{EnvClosingEvent, EnvFlags, EnvOpenOptions};
 use meilisearch_types::milli::Index;
 use time::OffsetDateTime;
 use uuid::Uuid;
@ -236,7 +232,7 @@ impl IndexMap {
        enable_mdb_writemap: bool,
        map_size_growth: usize,
    ) {
-        let map_size = index.map_size().unwrap_or(DEFAULT_MAP_SIZE) + map_size_growth;
+        let map_size = index.map_size() + map_size_growth;
        let closing_event = index.prepare_for_closing();
        let generation = self.next_generation();
        self.unavailable.insert(
@ -309,7 +305,7 @@ fn create_or_open_index(
    options.map_size(clamp_to_page_size(map_size));
    options.max_readers(1024);
    if enable_mdb_writemap {
-        unsafe { options.flag(Flags::MdbWriteMap) };
+        unsafe { options.flags(EnvFlags::WRITE_MAP) };
    }

    if let Some((created, updated)) = date {
@ -388,7 +384,7 @@ mod tests {

    fn assert_index_size(index: Index, expected: usize) {
        let expected = clamp_to_page_size(expected);
-        let index_map_size = index.map_size().unwrap();
+        let index_map_size = index.map_size();
        assert_eq!(index_map_size, expected);
    }
 }
--- a/index-scheduler/src/index_mapper/mod.rs
+++ b/index-scheduler/src/index_mapper/mod.rs
@ -69,6 +69,10 @@ pub struct IndexMapper {
    /// Whether we open a meilisearch index with the MDB_WRITEMAP option or not.
    enable_mdb_writemap: bool,
    pub indexer_config: Arc<IndexerConfig>,
+
+    /// A few types of long running batches of tasks that act on a single index set this field
+    /// so that a handle to the index is available from other threads (search) in an optimized manner.
+    currently_updating_index: Arc<RwLock<Option<(String, Index)>>>,
 }

 /// Whether the index is available for use or is forbidden to be inserted back in the index map
@ -151,6 +155,7 @@ impl IndexMapper {
            index_growth_amount,
            enable_mdb_writemap,
            indexer_config: Arc::new(indexer_config),
+            currently_updating_index: Default::default(),
        })
    }

@ -303,6 +308,14 @@ impl IndexMapper {

    /// Return an index, may open it if it wasn't already opened.
    pub fn index(&self, rtxn: &RoTxn, name: &str) -> Result<Index> {
+        if let Some((current_name, current_index)) =
+            self.currently_updating_index.read().unwrap().as_ref()
+        {
+            if current_name == name {
+                return Ok(current_index.clone());
+            }
+        }
+
        let uuid = self
            .index_mapping
            .get(rtxn, name)?
@ -474,4 +487,8 @@ impl IndexMapper {
    pub fn indexer_config(&self) -> &IndexerConfig {
        &self.indexer_config
    }
+
+    pub fn set_currently_updating_index(&self, index: Option<(String, Index)>) {
+        *self.currently_updating_index.write().unwrap() = index;
+    }
 }
--- a/index-scheduler/src/insta_snapshot.rs
+++ b/index-scheduler/src/insta_snapshot.rs
@ -1,7 +1,7 @@
 use std::collections::BTreeSet;
 use std::fmt::Write;

-use meilisearch_types::heed::types::{OwnedType, SerdeBincode, SerdeJson, Str};
+use meilisearch_types::heed::types::{SerdeBincode, SerdeJson, Str};
 use meilisearch_types::heed::{Database, RoTxn};
 use meilisearch_types::milli::{CboRoaringBitmapCodec, RoaringBitmapCodec, BEU32};
 use meilisearch_types::tasks::{Details, Task};
@ -30,14 +30,19 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
        index_mapper,
        features: _,
        max_number_of_tasks: _,
+        max_number_of_batched_tasks: _,
+        puffin_frame: _,
        wake_up: _,
        dumps_path: _,
        snapshots_path: _,
        auth_path: _,
        version_file_path: _,
+        webhook_url: _,
+        webhook_authorization_header: _,
        test_breakpoint_sdr: _,
        planned_failures: _,
        run_loop_iteration: _,
+        embedders: _,
    } = scheduler;

    let rtxn = env.read_txn().unwrap();
@ -113,7 +118,7 @@ pub fn snapshot_bitmap(r: &RoaringBitmap) -> String {
    snap
 }

-pub fn snapshot_all_tasks(rtxn: &RoTxn, db: Database<OwnedType<BEU32>, SerdeJson<Task>>) -> String {
+pub fn snapshot_all_tasks(rtxn: &RoTxn, db: Database<BEU32, SerdeJson<Task>>) -> String {
    let mut snap = String::new();
    let iter = db.iter(rtxn).unwrap();
    for next in iter {
@ -123,10 +128,7 @@ pub fn snapshot_all_tasks(rtxn: &RoTxn, db: Database<OwnedType<BEU32>, SerdeJson
    snap
 }

-pub fn snapshot_date_db(
-    rtxn: &RoTxn,
-    db: Database<OwnedType<BEI128>, CboRoaringBitmapCodec>,
-) -> String {
+pub fn snapshot_date_db(rtxn: &RoTxn, db: Database<BEI128, CboRoaringBitmapCodec>) -> String {
    let mut snap = String::new();
    let iter = db.iter(rtxn).unwrap();
    for next in iter {
@ -246,10 +248,7 @@ pub fn snapshot_index_tasks(rtxn: &RoTxn, db: Database<Str, RoaringBitmapCodec>)
    }
    snap
 }
-pub fn snapshot_canceled_by(
-    rtxn: &RoTxn,
-    db: Database<OwnedType<BEU32>, RoaringBitmapCodec>,
-) -> String {
+pub fn snapshot_canceled_by(rtxn: &RoTxn, db: Database<BEU32, RoaringBitmapCodec>) -> String {
    let mut snap = String::new();
    let iter = db.iter(rtxn).unwrap();
    for next in iter {
--- a/index-scheduler/src/lib.rs
+++ b/index-scheduler/src/lib.rs
@ -27,12 +27,14 @@ mod index_mapper;
 mod insta_snapshot;
 mod lru;
 mod utils;
-mod uuid_codec;
+pub mod uuid_codec;

 pub type Result<T> = std::result::Result<T, Error>;
 pub type TaskId = u32;

 use std::collections::{BTreeMap, HashMap};
+use std::fs::File;
+use std::io::{self, BufReader, Read};
 use std::ops::{Bound, RangeBounds};
 use std::path::{Path, PathBuf};
 use std::sync::atomic::AtomicBool;
@ -44,14 +46,20 @@ use dump::{KindDump, TaskDump, UpdateFile};
 pub use error::Error;
 pub use features::RoFeatures;
 use file_store::FileStore;
+use flate2::bufread::GzEncoder;
+use flate2::Compression;
 use meilisearch_types::error::ResponseError;
 use meilisearch_types::features::{InstanceTogglableFeatures, RuntimeTogglableFeatures};
-use meilisearch_types::heed::types::{OwnedType, SerdeBincode, SerdeJson, Str};
-use meilisearch_types::heed::{self, Database, Env, RoTxn, RwTxn};
+use meilisearch_types::heed::byteorder::BE;
+use meilisearch_types::heed::types::{SerdeBincode, SerdeJson, Str, I128};
+use meilisearch_types::heed::{self, Database, Env, PutFlags, RoTxn, RwTxn};
 use meilisearch_types::milli::documents::DocumentsBatchBuilder;
 use meilisearch_types::milli::update::IndexerConfig;
+use meilisearch_types::milli::vector::{Embedder, EmbedderOptions, EmbeddingConfigs};
 use meilisearch_types::milli::{self, CboRoaringBitmapCodec, Index, RoaringBitmapCodec, BEU32};
+use meilisearch_types::task_view::TaskView;
 use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task};
+use puffin::FrameView;
 use roaring::RoaringBitmap;
 use synchronoise::SignalEvent;
 use time::format_description::well_known::Rfc3339;
@ -62,8 +70,7 @@ use uuid::Uuid;
 use crate::index_mapper::IndexMapper;
 use crate::utils::{check_index_swap_validity, clamp_to_page_size};

-pub(crate) type BEI128 =
-    meilisearch_types::heed::zerocopy::I128<meilisearch_types::heed::byteorder::BE>;
+pub(crate) type BEI128 = I128<BE>;

 /// Defines a subset of tasks to be retrieved from the [`IndexScheduler`].
 ///
@ -167,8 +174,8 @@ impl ProcessingTasks {
    }

    /// Set the processing tasks to an empty list
-    fn stop_processing(&mut self) {
-        self.processing = RoaringBitmap::new();
+    fn stop_processing(&mut self) -> RoaringBitmap {
+        std::mem::take(&mut self.processing)
    }

    /// Returns `true` if there, at least, is one task that is currently processing that we must stop.
@ -238,6 +245,10 @@ pub struct IndexSchedulerOptions {
    pub snapshots_path: PathBuf,
    /// The path to the folder containing the dumps.
    pub dumps_path: PathBuf,
+    /// The URL on which we must send the tasks statuses
+    pub webhook_url: Option<String>,
+    /// The value we will send into the Authorization HTTP header on the webhook URL
+    pub webhook_authorization_header: Option<String>,
    /// The maximum size, in bytes, of the task index.
    pub task_db_size: usize,
    /// The size, in bytes, with which a meilisearch index is opened the first time of each meilisearch index.
@ -256,6 +267,9 @@ pub struct IndexSchedulerOptions {
    /// The maximum number of tasks stored in the task queue before starting
    /// to auto schedule task deletions.
    pub max_number_of_tasks: usize,
+    /// If the autobatcher is allowed to automatically batch tasks
+    /// it will only batch this defined number of tasks at once.
+    pub max_number_of_batched_tasks: usize,
    /// The experimental features enabled for this instance.
    pub instance_features: InstanceTogglableFeatures,
 }
@ -276,7 +290,7 @@ pub struct IndexScheduler {
    pub(crate) file_store: FileStore,

    // The main database, it contains all the tasks accessible by their Id.
-    pub(crate) all_tasks: Database<OwnedType<BEU32>, SerdeJson<Task>>,
+    pub(crate) all_tasks: Database<BEU32, SerdeJson<Task>>,

    /// All the tasks ids grouped by their status.
    // TODO we should not be able to serialize a `Status::Processing` in this database.
@ -287,16 +301,16 @@ pub struct IndexScheduler {
    pub(crate) index_tasks: Database<Str, RoaringBitmapCodec>,

    /// Store the tasks that were canceled by a task uid
-    pub(crate) canceled_by: Database<OwnedType<BEU32>, RoaringBitmapCodec>,
+    pub(crate) canceled_by: Database<BEU32, RoaringBitmapCodec>,

    /// Store the task ids of tasks which were enqueued at a specific date
-    pub(crate) enqueued_at: Database<OwnedType<BEI128>, CboRoaringBitmapCodec>,
+    pub(crate) enqueued_at: Database<BEI128, CboRoaringBitmapCodec>,

    /// Store the task ids of finished tasks which started being processed at a specific date
-    pub(crate) started_at: Database<OwnedType<BEI128>, CboRoaringBitmapCodec>,
+    pub(crate) started_at: Database<BEI128, CboRoaringBitmapCodec>,

    /// Store the task ids of tasks which finished at a specific date
-    pub(crate) finished_at: Database<OwnedType<BEI128>, CboRoaringBitmapCodec>,
+    pub(crate) finished_at: Database<BEI128, CboRoaringBitmapCodec>,

    /// In charge of creating, opening, storing and returning indexes.
    pub(crate) index_mapper: IndexMapper,
@ -314,6 +328,17 @@ pub struct IndexScheduler {
    /// the finished tasks automatically.
    pub(crate) max_number_of_tasks: usize,

+    /// The maximum number of tasks that will be batched together.
+    pub(crate) max_number_of_batched_tasks: usize,
+
+    /// The webhook url we should send tasks to after processing every batches.
+    pub(crate) webhook_url: Option<String>,
+    /// The Authorization header to send to the webhook URL.
+    pub(crate) webhook_authorization_header: Option<String>,
+
+    /// A frame to output the indexation profiling files to disk.
+    pub(crate) puffin_frame: Arc<puffin::GlobalFrameView>,
+
    /// The path used to create the dumps.
    pub(crate) dumps_path: PathBuf,

@ -326,6 +351,8 @@ pub struct IndexScheduler {
    /// The path to the version file of Meilisearch.
    pub(crate) version_file_path: PathBuf,

+    embedders: Arc<RwLock<HashMap<EmbedderOptions, Arc<Embedder>>>>,
+
    // ================= test
    // The next entry is dedicated to the tests.
    /// Provide a way to set a breakpoint in multiple part of the scheduler.
@ -364,10 +391,15 @@ impl IndexScheduler {
            wake_up: self.wake_up.clone(),
            autobatching_enabled: self.autobatching_enabled,
            max_number_of_tasks: self.max_number_of_tasks,
+            max_number_of_batched_tasks: self.max_number_of_batched_tasks,
+            puffin_frame: self.puffin_frame.clone(),
            snapshots_path: self.snapshots_path.clone(),
            dumps_path: self.dumps_path.clone(),
            auth_path: self.auth_path.clone(),
            version_file_path: self.version_file_path.clone(),
+            webhook_url: self.webhook_url.clone(),
+            webhook_authorization_header: self.webhook_authorization_header.clone(),
+            embedders: self.embedders.clone(),
            #[cfg(test)]
            test_breakpoint_sdr: self.test_breakpoint_sdr.clone(),
            #[cfg(test)]
@ -457,12 +489,17 @@ impl IndexScheduler {
            env,
            // we want to start the loop right away in case meilisearch was ctrl+Ced while processing things
            wake_up: Arc::new(SignalEvent::auto(true)),
+            puffin_frame: Arc::new(puffin::GlobalFrameView::default()),
            autobatching_enabled: options.autobatching_enabled,
            max_number_of_tasks: options.max_number_of_tasks,
+            max_number_of_batched_tasks: options.max_number_of_batched_tasks,
            dumps_path: options.dumps_path,
            snapshots_path: options.snapshots_path,
            auth_path: options.auth_path,
            version_file_path: options.version_file_path,
+            webhook_url: options.webhook_url,
+            webhook_authorization_header: options.webhook_authorization_header,
+            embedders: Default::default(),

            #[cfg(test)]
            test_breakpoint_sdr,
@ -572,17 +609,46 @@ impl IndexScheduler {
                run.wake_up.wait();

                loop {
+                    let puffin_enabled = run.features().check_puffin().is_ok();
+                    puffin::set_scopes_on(puffin_enabled);
+                    puffin::GlobalProfiler::lock().new_frame();
+
                    match run.tick() {
                        Ok(TickOutcome::TickAgain(_)) => (),
                        Ok(TickOutcome::WaitForSignal) => run.wake_up.wait(),
                        Err(e) => {
-                            log::error!("{}", e);
+                            log::error!("{e}");
                            // Wait one second when an irrecoverable error occurs.
                            if !e.is_recoverable() {
                                std::thread::sleep(Duration::from_secs(1));
                            }
                        }
                    }
+
+                    // Let's write the previous frame to disk but only if
+                    // the user wanted to profile with puffin.
+                    if puffin_enabled {
+                        let mut frame_view = run.puffin_frame.lock();
+                        if !frame_view.is_empty() {
+                            let now = OffsetDateTime::now_utc();
+                            let mut file = match File::create(format!("{}.puffin", now)) {
+                                Ok(file) => file,
+                                Err(e) => {
+                                    log::error!("{e}");
+                                    continue;
+                                }
+                            };
+                            if let Err(e) = frame_view.save_to_writer(&mut file) {
+                                log::error!("{e}");
+                            }
+                            if let Err(e) = file.sync_all() {
+                                log::error!("{e}");
+                            }
+                            // We erase this frame view as it is no more useful. We want to
+                            // measure the new frames now that we exported the previous ones.
+                            *frame_view = FrameView::default();
+                        }
+                    }
                }
            })
            .unwrap();
@ -681,9 +747,7 @@ impl IndexScheduler {
        if let Some(canceled_by) = &query.canceled_by {
            let mut all_canceled_tasks = RoaringBitmap::new();
            for cancel_task_uid in canceled_by {
-                if let Some(canceled_by_uid) =
-                    self.canceled_by.get(rtxn, &BEU32::new(*cancel_task_uid))?
-                {
+                if let Some(canceled_by_uid) = self.canceled_by.get(rtxn, cancel_task_uid)? {
                    all_canceled_tasks |= canceled_by_uid;
                }
            }
@ -934,7 +998,7 @@ impl IndexScheduler {

        // if the task doesn't delete anything and 50% of the task queue is full, we must refuse to enqueue the incomming task
        if !matches!(&kind, KindWithContent::TaskDeletion { tasks, .. } if !tasks.is_empty())
-            && (self.env.non_free_pages_size()? * 100) / self.env.map_size()? as u64 > 50
+            && (self.env.non_free_pages_size()? * 100) / self.env.info().map_size as u64 > 50
        {
            return Err(Error::NoSpaceLeftInTaskQueue);
        }
@ -960,7 +1024,7 @@ impl IndexScheduler {
        // Get rid of the mutability.
        let task = task;

-        self.all_tasks.append(&mut wtxn, &BEU32::new(task.uid), &task)?;
+        self.all_tasks.put_with_flags(&mut wtxn, PutFlags::APPEND, &task.uid, &task)?;

        for index in task.indexes() {
            self.update_index(&mut wtxn, index, |bitmap| {
@ -1062,8 +1126,6 @@ impl IndexScheduler {
            self.breakpoint(Breakpoint::Start);
        }

-        puffin::GlobalProfiler::lock().new_frame();
-
        self.cleanup_task_queue()?;

        let rtxn = self.env.read_txn().map_err(Error::HeedTransaction)?;
@ -1099,6 +1161,9 @@ impl IndexScheduler {
            handle.join().unwrap_or(Err(Error::ProcessBatchPanicked))
        };

+        // Reset the currently updating index to relinquish the index handle
+        self.index_mapper.set_currently_updating_index(None);
+
        #[cfg(test)]
        self.maybe_fail(tests::FailureLocation::AcquiringWtxn)?;

@ -1133,10 +1198,11 @@ impl IndexScheduler {
            // If we have an abortion error we must stop the tick here and re-schedule tasks.
            Err(Error::Milli(milli::Error::InternalError(
                milli::InternalError::AbortedIndexation,
-            ))) => {
+            )))
+            | Err(Error::AbortedTask) => {
                #[cfg(test)]
                self.breakpoint(Breakpoint::AbortedIndexation);
-                wtxn.abort().map_err(Error::HeedTransaction)?;
+                wtxn.abort();

                // We make sure that we don't call `stop_processing` on the `processing_tasks`,
                // this is because we want to let the next tick call `create_next_batch` and keep
@ -1157,7 +1223,7 @@ impl IndexScheduler {
                let index_uid = index_uid.unwrap();
                // fixme: handle error more gracefully? not sure when this could happen
                self.index_mapper.resize_index(&wtxn, &index_uid)?;
-                wtxn.abort().map_err(Error::HeedTransaction)?;
+                wtxn.abort();

                return Ok(TickOutcome::TickAgain(0));
            }
@ -1189,19 +1255,99 @@ impl IndexScheduler {
            }
        }

-        self.processing_tasks.write().unwrap().stop_processing();
+        let processed = self.processing_tasks.write().unwrap().stop_processing();

        #[cfg(test)]
        self.maybe_fail(tests::FailureLocation::CommittingWtxn)?;

        wtxn.commit().map_err(Error::HeedTransaction)?;

+        // We shouldn't crash the tick function if we can't send data to the webhook.
+        let _ = self.notify_webhook(&processed);
+
        #[cfg(test)]
        self.breakpoint(Breakpoint::AfterProcessing);

        Ok(TickOutcome::TickAgain(processed_tasks))
    }

+    /// Once the tasks changes have been commited we must send all the tasks that were updated to our webhook if there is one.
+    fn notify_webhook(&self, updated: &RoaringBitmap) -> Result<()> {
+        if let Some(ref url) = self.webhook_url {
+            struct TaskReader<'a, 'b> {
+                rtxn: &'a RoTxn<'a>,
+                index_scheduler: &'a IndexScheduler,
+                tasks: &'b mut roaring::bitmap::Iter<'b>,
+                buffer: Vec<u8>,
+                written: usize,
+            }
+
+            impl<'a, 'b> Read for TaskReader<'a, 'b> {
+                fn read(&mut self, mut buf: &mut [u8]) -> std::io::Result<usize> {
+                    if self.buffer.is_empty() {
+                        match self.tasks.next() {
+                            None => return Ok(0),
+                            Some(task_id) => {
+                                let task = self
+                                    .index_scheduler
+                                    .get_task(self.rtxn, task_id)
+                                    .map_err(|err| io::Error::new(io::ErrorKind::Other, err))?
+                                    .ok_or_else(|| {
+                                        io::Error::new(
+                                            io::ErrorKind::Other,
+                                            Error::CorruptedTaskQueue,
+                                        )
+                                    })?;
+
+                                serde_json::to_writer(
+                                    &mut self.buffer,
+                                    &TaskView::from_task(&task),
+                                )?;
+                                self.buffer.push(b'\n');
+                            }
+                        }
+                    }
+
+                    let mut to_write = &self.buffer[self.written..];
+                    let wrote = io::copy(&mut to_write, &mut buf)?;
+                    self.written += wrote as usize;
+
+                    // we wrote everything and must refresh our buffer on the next call
+                    if self.written == self.buffer.len() {
+                        self.written = 0;
+                        self.buffer.clear();
+                    }
+
+                    Ok(wrote as usize)
+                }
+            }
+
+            let rtxn = self.env.read_txn()?;
+
+            let task_reader = TaskReader {
+                rtxn: &rtxn,
+                index_scheduler: self,
+                tasks: &mut updated.into_iter(),
+                buffer: Vec::with_capacity(50), // on average a task is around ~100 bytes
+                written: 0,
+            };
+
+            // let reader = GzEncoder::new(BufReader::new(task_reader), Compression::default());
+            let reader = GzEncoder::new(BufReader::new(task_reader), Compression::default());
+            let request = ureq::post(url).set("Content-Encoding", "gzip");
+            let request = match &self.webhook_authorization_header {
+                Some(header) => request.set("Authorization", header),
+                None => request,
+            };
+
+            if let Err(e) = request.send(reader) {
+                log::error!("While sending data to the webhook: {e}");
+            }
+        }
+
+        Ok(())
+    }
+
    /// Register a task to cleanup the task queue if needed
    fn cleanup_task_queue(&self) -> Result<()> {
        let rtxn = self.env.read_txn().map_err(Error::HeedTransaction)?;
@ -1259,9 +1405,8 @@ impl IndexScheduler {
        Ok(IndexStats { is_indexing, inner_stats: index_stats })
    }

-    pub fn features(&self) -> Result<RoFeatures> {
-        let rtxn = self.read_txn()?;
-        self.features.features(rtxn)
+    pub fn features(&self) -> RoFeatures {
+        self.features.features()
    }

    pub fn put_runtime_features(&self, features: RuntimeTogglableFeatures) -> Result<()> {
@ -1277,6 +1422,40 @@ impl IndexScheduler {
        }
    }

+    // TODO: consider using a type alias or a struct embedder/template
+    pub fn embedders(
+        &self,
+        embedding_configs: Vec<(String, milli::vector::EmbeddingConfig)>,
+    ) -> Result<EmbeddingConfigs> {
+        let res: Result<_> = embedding_configs
+            .into_iter()
+            .map(|(name, milli::vector::EmbeddingConfig { embedder_options, prompt })| {
+                let prompt =
+                    Arc::new(prompt.try_into().map_err(meilisearch_types::milli::Error::from)?);
+                // optimistically return existing embedder
+                {
+                    let embedders = self.embedders.read().unwrap();
+                    if let Some(embedder) = embedders.get(&embedder_options) {
+                        return Ok((name, (embedder.clone(), prompt)));
+                    }
+                }
+
+                // add missing embedder
+                let embedder = Arc::new(
+                    Embedder::new(embedder_options.clone())
+                        .map_err(meilisearch_types::milli::vector::Error::from)
+                        .map_err(meilisearch_types::milli::Error::from)?,
+                );
+                {
+                    let mut embedders = self.embedders.write().unwrap();
+                    embedders.insert(embedder_options, embedder.clone());
+                }
+                Ok((name, (embedder, prompt)))
+            })
+            .collect();
+        res.map(EmbeddingConfigs::new)
+    }
+
    /// Blocks the thread until the test handle asks to progress to/through this breakpoint.
    ///
    /// Two messages are sent through the channel for each breakpoint.
@ -1304,7 +1483,7 @@ impl IndexScheduler {

 pub struct Dump<'a> {
    index_scheduler: &'a IndexScheduler,
-    wtxn: RwTxn<'a, 'a>,
+    wtxn: RwTxn<'a>,

    indexes: HashMap<String, RoaringBitmap>,
    statuses: HashMap<Status, RoaringBitmap>,
@ -1419,7 +1598,7 @@ impl<'a> Dump<'a> {
            },
        };

-        self.index_scheduler.all_tasks.put(&mut self.wtxn, &BEU32::new(task.uid), &task)?;
+        self.index_scheduler.all_tasks.put(&mut self.wtxn, &task.uid, &task)?;

        for index in task.indexes() {
            match self.indexes.get_mut(index) {
@ -1461,8 +1640,8 @@ impl<'a> Dump<'a> {
            }
        }

-        self.statuses.entry(task.status).or_insert(RoaringBitmap::new()).insert(task.uid);
-        self.kinds.entry(task.kind.as_kind()).or_insert(RoaringBitmap::new()).insert(task.uid);
+        self.statuses.entry(task.status).or_default().insert(task.uid);
+        self.kinds.entry(task.kind.as_kind()).or_default().insert(task.uid);

        Ok(task)
    }
@ -1582,6 +1761,8 @@ mod tests {
                indexes_path: tempdir.path().join("indexes"),
                snapshots_path: tempdir.path().join("snapshots"),
                dumps_path: tempdir.path().join("dumps"),
+                webhook_url: None,
+                webhook_authorization_header: None,
                task_db_size: 1000 * 1000, // 1 MB, we don't use MiB on purpose.
                index_base_map_size: 1000 * 1000, // 1 MB, we don't use MiB on purpose.
                enable_mdb_writemap: false,
@ -1590,6 +1771,7 @@ mod tests {
                indexer_config,
                autobatching_enabled: true,
                max_number_of_tasks: 1_000_000,
+                max_number_of_batched_tasks: usize::MAX,
                instance_features: Default::default(),
            };
            configuration(&mut options);
@ -4290,4 +4472,26 @@ mod tests {
        }
        "###);
    }
+
+    #[test]
+    fn cancel_processing_dump() {
+        let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]);
+
+        let dump_creation = KindWithContent::DumpCreation { keys: Vec::new(), instance_uid: None };
+        let dump_cancellation = KindWithContent::TaskCancelation {
+            query: "cancel dump".to_owned(),
+            tasks: RoaringBitmap::from_iter([0]),
+        };
+        let _ = index_scheduler.register(dump_creation).unwrap();
+        snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_dump_register");
+        handle.advance_till([Start, BatchCreated, InsideProcessBatch]);
+
+        let _ = index_scheduler.register(dump_cancellation).unwrap();
+        snapshot!(snapshot_index_scheduler(&index_scheduler), name: "cancel_registered");
+
+        snapshot!(format!("{:?}", handle.advance()), @"AbortedIndexation");
+
+        handle.advance_one_successful_batch();
+        snapshot!(snapshot_index_scheduler(&index_scheduler), name: "cancel_processed");
+    }
 }
--- a/index-scheduler/src/snapshots/lib.rs/cancel_processing_dump/after_dump_register.snap
+++ b/index-scheduler/src/snapshots/lib.rs/cancel_processing_dump/after_dump_register.snap
@ -0,0 +1,35 @@
+---
+source: index-scheduler/src/lib.rs
+---
+### Autobatching Enabled = true
+### Processing Tasks:
+[]
+----------------------------------------------------------------------
+### All Tasks:
+0 {uid: 0, status: enqueued, details: { dump_uid: None }, kind: DumpCreation { keys: [], instance_uid: None }}
+----------------------------------------------------------------------
+### Status:
+enqueued [0,]
+----------------------------------------------------------------------
+### Kind:
+"dumpCreation" [0,]
+----------------------------------------------------------------------
+### Index Tasks:
+----------------------------------------------------------------------
+### Index Mapper:
+
+----------------------------------------------------------------------
+### Canceled By:
+
+----------------------------------------------------------------------
+### Enqueued At:
+[timestamp] [0,]
+----------------------------------------------------------------------
+### Started At:
+----------------------------------------------------------------------
+### Finished At:
+----------------------------------------------------------------------
+### File Store:
+
+----------------------------------------------------------------------
+
--- a/index-scheduler/src/snapshots/lib.rs/cancel_processing_dump/cancel_processed.snap
+++ b/index-scheduler/src/snapshots/lib.rs/cancel_processing_dump/cancel_processed.snap
@ -0,0 +1,45 @@
+---
+source: index-scheduler/src/lib.rs
+---
+### Autobatching Enabled = true
+### Processing Tasks:
+[]
+----------------------------------------------------------------------
+### All Tasks:
+0 {uid: 0, status: canceled, canceled_by: 1, details: { dump_uid: None }, kind: DumpCreation { keys: [], instance_uid: None }}
+1 {uid: 1, status: succeeded, details: { matched_tasks: 1, canceled_tasks: Some(0), original_filter: "cancel dump" }, kind: TaskCancelation { query: "cancel dump", tasks: RoaringBitmap<[0]> }}
+----------------------------------------------------------------------
+### Status:
+enqueued []
+succeeded [1,]
+canceled [0,]
+----------------------------------------------------------------------
+### Kind:
+"taskCancelation" [1,]
+"dumpCreation" [0,]
+----------------------------------------------------------------------
+### Index Tasks:
+----------------------------------------------------------------------
+### Index Mapper:
+
+----------------------------------------------------------------------
+### Canceled By:
+1 [0,]
+
+----------------------------------------------------------------------
+### Enqueued At:
+[timestamp] [0,]
+[timestamp] [1,]
+----------------------------------------------------------------------
+### Started At:
+[timestamp] [0,]
+[timestamp] [1,]
+----------------------------------------------------------------------
+### Finished At:
+[timestamp] [0,]
+[timestamp] [1,]
+----------------------------------------------------------------------
+### File Store:
+
+----------------------------------------------------------------------
+
--- a/index-scheduler/src/snapshots/lib.rs/cancel_processing_dump/cancel_registered.snap
+++ b/index-scheduler/src/snapshots/lib.rs/cancel_processing_dump/cancel_registered.snap
@ -0,0 +1,38 @@
+---
+source: index-scheduler/src/lib.rs
+---
+### Autobatching Enabled = true
+### Processing Tasks:
+[0,]
+----------------------------------------------------------------------
+### All Tasks:
+0 {uid: 0, status: enqueued, details: { dump_uid: None }, kind: DumpCreation { keys: [], instance_uid: None }}
+1 {uid: 1, status: enqueued, details: { matched_tasks: 1, canceled_tasks: None, original_filter: "cancel dump" }, kind: TaskCancelation { query: "cancel dump", tasks: RoaringBitmap<[0]> }}
+----------------------------------------------------------------------
+### Status:
+enqueued [0,1,]
+----------------------------------------------------------------------
+### Kind:
+"taskCancelation" [1,]
+"dumpCreation" [0,]
+----------------------------------------------------------------------
+### Index Tasks:
+----------------------------------------------------------------------
+### Index Mapper:
+
+----------------------------------------------------------------------
+### Canceled By:
+
+----------------------------------------------------------------------
+### Enqueued At:
+[timestamp] [0,]
+[timestamp] [1,]
+----------------------------------------------------------------------
+### Started At:
+----------------------------------------------------------------------
+### Finished At:
+----------------------------------------------------------------------
+### File Store:
+
+----------------------------------------------------------------------
+
--- a/index-scheduler/src/utils.rs
+++ b/index-scheduler/src/utils.rs
@ -3,9 +3,9 @@
 use std::collections::{BTreeSet, HashSet};
 use std::ops::Bound;

-use meilisearch_types::heed::types::{DecodeIgnore, OwnedType};
+use meilisearch_types::heed::types::DecodeIgnore;
 use meilisearch_types::heed::{Database, RoTxn, RwTxn};
-use meilisearch_types::milli::{CboRoaringBitmapCodec, BEU32};
+use meilisearch_types::milli::CboRoaringBitmapCodec;
 use meilisearch_types::tasks::{Details, IndexSwap, Kind, KindWithContent, Status};
 use roaring::{MultiOps, RoaringBitmap};
 use time::OffsetDateTime;
@ -18,7 +18,7 @@ impl IndexScheduler {
    }

    pub(crate) fn last_task_id(&self, rtxn: &RoTxn) -> Result<Option<TaskId>> {
-        Ok(self.all_tasks.remap_data_type::<DecodeIgnore>().last(rtxn)?.map(|(k, _)| k.get() + 1))
+        Ok(self.all_tasks.remap_data_type::<DecodeIgnore>().last(rtxn)?.map(|(k, _)| k + 1))
    }

    pub(crate) fn next_task_id(&self, rtxn: &RoTxn) -> Result<TaskId> {
@ -26,7 +26,7 @@ impl IndexScheduler {
    }

    pub(crate) fn get_task(&self, rtxn: &RoTxn, task_id: TaskId) -> Result<Option<Task>> {
-        Ok(self.all_tasks.get(rtxn, &BEU32::new(task_id))?)
+        Ok(self.all_tasks.get(rtxn, &task_id)?)
    }

    /// Convert an iterator to a `Vec` of tasks. The tasks MUST exist or a
@ -88,7 +88,7 @@ impl IndexScheduler {
            }
        }

-        self.all_tasks.put(wtxn, &BEU32::new(task.uid), task)?;
+        self.all_tasks.put(wtxn, &task.uid, task)?;
        Ok(())
    }

@ -169,11 +169,11 @@ impl IndexScheduler {

 pub(crate) fn insert_task_datetime(
    wtxn: &mut RwTxn,
-    database: Database<OwnedType<BEI128>, CboRoaringBitmapCodec>,
+    database: Database<BEI128, CboRoaringBitmapCodec>,
    time: OffsetDateTime,
    task_id: TaskId,
 ) -> Result<()> {
-    let timestamp = BEI128::new(time.unix_timestamp_nanos());
+    let timestamp = time.unix_timestamp_nanos();
    let mut task_ids = database.get(wtxn, &timestamp)?.unwrap_or_default();
    task_ids.insert(task_id);
    database.put(wtxn, &timestamp, &RoaringBitmap::from_iter(task_ids))?;
@ -182,11 +182,11 @@ pub(crate) fn insert_task_datetime(

 pub(crate) fn remove_task_datetime(
    wtxn: &mut RwTxn,
-    database: Database<OwnedType<BEI128>, CboRoaringBitmapCodec>,
+    database: Database<BEI128, CboRoaringBitmapCodec>,
    time: OffsetDateTime,
    task_id: TaskId,
 ) -> Result<()> {
-    let timestamp = BEI128::new(time.unix_timestamp_nanos());
+    let timestamp = time.unix_timestamp_nanos();
    if let Some(mut existing) = database.get(wtxn, &timestamp)? {
        existing.remove(task_id);
        if existing.is_empty() {
@ -202,7 +202,7 @@ pub(crate) fn remove_task_datetime(
 pub(crate) fn keep_tasks_within_datetimes(
    rtxn: &RoTxn,
    tasks: &mut RoaringBitmap,
-    database: Database<OwnedType<BEI128>, CboRoaringBitmapCodec>,
+    database: Database<BEI128, CboRoaringBitmapCodec>,
    after: Option<OffsetDateTime>,
    before: Option<OffsetDateTime>,
 ) -> Result<()> {
@ -213,8 +213,8 @@ pub(crate) fn keep_tasks_within_datetimes(
        (Some(after), Some(before)) => (Bound::Excluded(*after), Bound::Excluded(*before)),
    };
    let mut collected_task_ids = RoaringBitmap::new();
-    let start = map_bound(start, |b| BEI128::new(b.unix_timestamp_nanos()));
-    let end = map_bound(end, |b| BEI128::new(b.unix_timestamp_nanos()));
+    let start = map_bound(start, |b| b.unix_timestamp_nanos());
+    let end = map_bound(end, |b| b.unix_timestamp_nanos());
    let iter = database.range(rtxn, &(start, end))?;
    for r in iter {
        let (_timestamp, task_ids) = r?;
@ -337,8 +337,6 @@ impl IndexScheduler {
        let rtxn = self.env.read_txn().unwrap();
        for task in self.all_tasks.iter(&rtxn).unwrap() {
            let (task_id, task) = task.unwrap();
-            let task_id = task_id.get();
-
            let task_index_uid = task.index_uid().map(ToOwned::to_owned);

            let Task {
@ -361,16 +359,13 @@ impl IndexScheduler {
                    .unwrap()
                    .contains(task.uid));
            }
-            let db_enqueued_at = self
-                .enqueued_at
-                .get(&rtxn, &BEI128::new(enqueued_at.unix_timestamp_nanos()))
-                .unwrap()
-                .unwrap();
+            let db_enqueued_at =
+                self.enqueued_at.get(&rtxn, &enqueued_at.unix_timestamp_nanos()).unwrap().unwrap();
            assert!(db_enqueued_at.contains(task_id));
            if let Some(started_at) = started_at {
                let db_started_at = self
                    .started_at
-                    .get(&rtxn, &BEI128::new(started_at.unix_timestamp_nanos()))
+                    .get(&rtxn, &started_at.unix_timestamp_nanos())
                    .unwrap()
                    .unwrap();
                assert!(db_started_at.contains(task_id));
@ -378,7 +373,7 @@ impl IndexScheduler {
            if let Some(finished_at) = finished_at {
                let db_finished_at = self
                    .finished_at
-                    .get(&rtxn, &BEI128::new(finished_at.unix_timestamp_nanos()))
+                    .get(&rtxn, &finished_at.unix_timestamp_nanos())
                    .unwrap()
                    .unwrap();
                assert!(db_finished_at.contains(task_id));
--- a/index-scheduler/src/uuid_codec.rs
+++ b/index-scheduler/src/uuid_codec.rs
@ -1,7 +1,7 @@
 use std::borrow::Cow;
 use std::convert::TryInto;

-use meilisearch_types::heed::{BytesDecode, BytesEncode};
+use meilisearch_types::heed::{BoxedError, BytesDecode, BytesEncode};
 use uuid::Uuid;

 /// A heed codec for value of struct Uuid.
@ -10,15 +10,15 @@ pub struct UuidCodec;
 impl<'a> BytesDecode<'a> for UuidCodec {
    type DItem = Uuid;

-    fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
-        bytes.try_into().ok().map(Uuid::from_bytes)
+    fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
+        bytes.try_into().map(Uuid::from_bytes).map_err(Into::into)
    }
 }

 impl BytesEncode<'_> for UuidCodec {
    type EItem = Uuid;

-    fn bytes_encode(item: &Self::EItem) -> Option<Cow<[u8]>> {
-        Some(Cow::Borrowed(item.as_bytes()))
+    fn bytes_encode(item: &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
+        Ok(Cow::Borrowed(item.as_bytes()))
    }
 }
--- a/meilisearch-auth/src/store.rs
+++ b/meilisearch-auth/src/store.rs
@ -4,17 +4,20 @@ use std::collections::HashSet;
 use std::convert::{TryFrom, TryInto};
 use std::fs::create_dir_all;
 use std::path::Path;
+use std::result::Result as StdResult;
 use std::str;
 use std::str::FromStr;
 use std::sync::Arc;

 use hmac::{Hmac, Mac};
+use meilisearch_types::heed::BoxedError;
 use meilisearch_types::index_uid_pattern::IndexUidPattern;
 use meilisearch_types::keys::KeyId;
 use meilisearch_types::milli;
-use meilisearch_types::milli::heed::types::{ByteSlice, DecodeIgnore, SerdeJson};
+use meilisearch_types::milli::heed::types::{Bytes, DecodeIgnore, SerdeJson};
 use meilisearch_types::milli::heed::{Database, Env, EnvOpenOptions, RwTxn};
 use sha2::Sha256;
+use thiserror::Error;
 use time::OffsetDateTime;
 use uuid::fmt::Hyphenated;
 use uuid::Uuid;
@ -30,7 +33,7 @@ const KEY_ID_ACTION_INDEX_EXPIRATION_DB_NAME: &str = "keyid-action-index-expirat
 #[derive(Clone)]
 pub struct HeedAuthStore {
    env: Arc<Env>,
-    keys: Database<ByteSlice, SerdeJson<Key>>,
+    keys: Database<Bytes, SerdeJson<Key>>,
    action_keyid_index_expiration: Database<KeyIdActionCodec, SerdeJson<Option<OffsetDateTime>>>,
    should_close_on_drop: bool,
 }
@ -276,7 +279,7 @@ impl HeedAuthStore {
    fn delete_key_from_inverted_db(&self, wtxn: &mut RwTxn, key: &KeyId) -> Result<()> {
        let mut iter = self
            .action_keyid_index_expiration
-            .remap_types::<ByteSlice, DecodeIgnore>()
+            .remap_types::<Bytes, DecodeIgnore>()
            .prefix_iter_mut(wtxn, key.as_bytes())?;
        while iter.next().transpose()?.is_some() {
            // safety: we don't keep references from inside the LMDB database.
@ -294,23 +297,24 @@ pub struct KeyIdActionCodec;
 impl<'a> milli::heed::BytesDecode<'a> for KeyIdActionCodec {
    type DItem = (KeyId, Action, Option<&'a [u8]>);

-    fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
-        let (key_id_bytes, action_bytes) = try_split_array_at(bytes)?;
-        let (action_bytes, index) = match try_split_array_at(action_bytes)? {
-            (action, []) => (action, None),
-            (action, index) => (action, Some(index)),
-        };
+    fn bytes_decode(bytes: &'a [u8]) -> StdResult<Self::DItem, BoxedError> {
+        let (key_id_bytes, action_bytes) = try_split_array_at(bytes).ok_or(SliceTooShortError)?;
+        let (&action_byte, index) =
+            match try_split_array_at(action_bytes).ok_or(SliceTooShortError)? {
+                ([action], []) => (action, None),
+                ([action], index) => (action, Some(index)),
+            };
        let key_id = Uuid::from_bytes(*key_id_bytes);
-        let action = Action::from_repr(u8::from_be_bytes(*action_bytes))?;
+        let action = Action::from_repr(action_byte).ok_or(InvalidActionError { action_byte })?;

-        Some((key_id, action, index))
+        Ok((key_id, action, index))
    }
 }

 impl<'a> milli::heed::BytesEncode<'a> for KeyIdActionCodec {
    type EItem = (&'a KeyId, &'a Action, Option<&'a [u8]>);

-    fn bytes_encode((key_id, action, index): &Self::EItem) -> Option<Cow<[u8]>> {
+    fn bytes_encode((key_id, action, index): &Self::EItem) -> StdResult<Cow<[u8]>, BoxedError> {
        let mut bytes = Vec::new();

        bytes.extend_from_slice(key_id.as_bytes());
@ -320,10 +324,20 @@ impl<'a> milli::heed::BytesEncode<'a> for KeyIdActionCodec {
            bytes.extend_from_slice(index);
        }

-        Some(Cow::Owned(bytes))
+        Ok(Cow::Owned(bytes))
    }
 }

+#[derive(Error, Debug)]
+#[error("the slice is too short")]
+pub struct SliceTooShortError;
+
+#[derive(Error, Debug)]
+#[error("cannot construct a valid Action from {action_byte}")]
+pub struct InvalidActionError {
+    pub action_byte: u8,
+}
+
 pub fn generate_key_as_hexa(uid: Uuid, master_key: &[u8]) -> String {
    // format uid as hyphenated allowing user to generate their own keys.
    let mut uid_buffer = [0; Hyphenated::LENGTH];
--- a/meilisearch-types/Cargo.toml
+++ b/meilisearch-types/Cargo.toml
@ -15,7 +15,7 @@ actix-web = { version = "4.3.1", default-features = false }
 anyhow = "1.0.70"
 convert_case = "0.6.0"
 csv = "1.2.1"
-deserr = { version = "0.6.0", features = ["actix-web"]}
+deserr = { version = "0.6.0", features = ["actix-web"] }
 either = { version = "1.8.1", features = ["serde"] }
 enum-iterator = "1.4.0"
 file-store = { path = "../file-store" }
@ -50,6 +50,7 @@ hebrew = ["milli/hebrew"]
 japanese = ["milli/japanese"]
 # thai specialized tokenization
 thai = ["milli/thai"]
-
 # allow greek specialized tokenization
 greek = ["milli/greek"]
+# allow khmer specialized tokenization
+khmer = ["milli/khmer"]
--- a/meilisearch-types/src/deserr/mod.rs
+++ b/meilisearch-types/src/deserr/mod.rs
@ -188,3 +188,4 @@ merge_with_error_impl_take_error_message!(ParseOffsetDateTimeError);
 merge_with_error_impl_take_error_message!(ParseTaskKindError);
 merge_with_error_impl_take_error_message!(ParseTaskStatusError);
 merge_with_error_impl_take_error_message!(IndexUidFormatError);
+merge_with_error_impl_take_error_message!(InvalidSearchSemanticRatio);
--- a/meilisearch-types/src/error.rs
+++ b/meilisearch-types/src/error.rs
@ -222,6 +222,8 @@ InvalidVectorsType                    , InvalidRequest       , BAD_REQUEST ;
 InvalidDocumentId                     , InvalidRequest       , BAD_REQUEST ;
 InvalidDocumentLimit                  , InvalidRequest       , BAD_REQUEST ;
 InvalidDocumentOffset                 , InvalidRequest       , BAD_REQUEST ;
+InvalidEmbedder                       , InvalidRequest       , BAD_REQUEST ;
+InvalidHybridQuery                    , InvalidRequest       , BAD_REQUEST ;
 InvalidIndexLimit                     , InvalidRequest       , BAD_REQUEST ;
 InvalidIndexOffset                    , InvalidRequest       , BAD_REQUEST ;
 InvalidIndexPrimaryKey                , InvalidRequest       , BAD_REQUEST ;
@ -233,6 +235,7 @@ InvalidSearchAttributesToRetrieve     , InvalidRequest       , BAD_REQUEST ;
 InvalidSearchCropLength               , InvalidRequest       , BAD_REQUEST ;
 InvalidSearchCropMarker               , InvalidRequest       , BAD_REQUEST ;
 InvalidSearchFacets                   , InvalidRequest       , BAD_REQUEST ;
+InvalidSearchSemanticRatio            , InvalidRequest       , BAD_REQUEST ;
 InvalidFacetSearchFacetName           , InvalidRequest       , BAD_REQUEST ;
 InvalidSearchFilter                   , InvalidRequest       , BAD_REQUEST ;
 InvalidSearchHighlightPostTag         , InvalidRequest       , BAD_REQUEST ;
@ -252,9 +255,11 @@ InvalidSearchShowRankingScoreDetails  , InvalidRequest       , BAD_REQUEST ;
 InvalidSearchSort                     , InvalidRequest       , BAD_REQUEST ;
 InvalidSettingsDisplayedAttributes    , InvalidRequest       , BAD_REQUEST ;
 InvalidSettingsDistinctAttribute      , InvalidRequest       , BAD_REQUEST ;
+InvalidSettingsProximityPrecision     , InvalidRequest       , BAD_REQUEST ;
 InvalidSettingsFaceting               , InvalidRequest       , BAD_REQUEST ;
 InvalidSettingsFilterableAttributes   , InvalidRequest       , BAD_REQUEST ;
 InvalidSettingsPagination             , InvalidRequest       , BAD_REQUEST ;
+InvalidSettingsEmbedders              , InvalidRequest       , BAD_REQUEST ;
 InvalidSettingsRankingRules           , InvalidRequest       , BAD_REQUEST ;
 InvalidSettingsSearchableAttributes   , InvalidRequest       , BAD_REQUEST ;
 InvalidSettingsSortableAttributes     , InvalidRequest       , BAD_REQUEST ;
@ -294,15 +299,18 @@ MissingFacetSearchFacetName           , InvalidRequest       , BAD_REQUEST ;
 MissingIndexUid                       , InvalidRequest       , BAD_REQUEST ;
 MissingMasterKey                      , Auth                 , UNAUTHORIZED ;
 MissingPayload                        , InvalidRequest       , BAD_REQUEST ;
+MissingSearchHybrid                   , InvalidRequest       , BAD_REQUEST ;
 MissingSwapIndexes                    , InvalidRequest       , BAD_REQUEST ;
 MissingTaskFilters                    , InvalidRequest       , BAD_REQUEST ;
 NoSpaceLeftOnDevice                   , System               , UNPROCESSABLE_ENTITY;
 PayloadTooLarge                       , InvalidRequest       , PAYLOAD_TOO_LARGE ;
 TaskNotFound                          , InvalidRequest       , NOT_FOUND ;
 TooManyOpenFiles                      , System               , UNPROCESSABLE_ENTITY ;
+TooManyVectors                        , InvalidRequest       , BAD_REQUEST ;
 UnretrievableDocument                 , Internal             , BAD_REQUEST ;
 UnretrievableErrorCode                , InvalidRequest       , BAD_REQUEST ;
-UnsupportedMediaType                  , InvalidRequest       , UNSUPPORTED_MEDIA_TYPE
+UnsupportedMediaType                  , InvalidRequest       , UNSUPPORTED_MEDIA_TYPE ;
+VectorEmbeddingError                  , InvalidRequest       , BAD_REQUEST
 }

 impl ErrorCode for JoinError {
@ -324,7 +332,6 @@ impl ErrorCode for milli::Error {
                    UserError::SerdeJson(_)
                    | UserError::InvalidLmdbOpenOptions
                    | UserError::DocumentLimitReached
-                    | UserError::AccessingSoftDeletedDocument { .. }
                    | UserError::UnknownInternalDocumentId { .. } => Code::Internal,
                    UserError::InvalidStoreFile => Code::InvalidStoreFile,
                    UserError::NoSpaceLeftOnDevice => Code::NoSpaceLeftOnDevice,
@ -336,6 +343,13 @@ impl ErrorCode for milli::Error {
                    UserError::InvalidDocumentId { .. } | UserError::TooManyDocumentIds { .. } => {
                        Code::InvalidDocumentId
                    }
+                    UserError::MissingDocumentField(_) => Code::InvalidDocumentFields,
+                    UserError::InvalidFieldForSource { .. }
+                    | UserError::MissingFieldForSource { .. }
+                    | UserError::InvalidOpenAiModel { .. }
+                    | UserError::InvalidPrompt(_) => Code::InvalidSettingsEmbedders,
+                    UserError::TooManyEmbedders(_) => Code::InvalidSettingsEmbedders,
+                    UserError::InvalidPromptForEmbeddings(..) => Code::InvalidSettingsEmbedders,
                    UserError::NoPrimaryKeyCandidateFound => Code::IndexPrimaryKeyNoCandidateFound,
                    UserError::MultiplePrimaryKeyCandidatesFound { .. } => {
                        Code::IndexPrimaryKeyMultipleCandidatesFound
@ -353,11 +367,15 @@ impl ErrorCode for milli::Error {
                    UserError::CriterionError(_) => Code::InvalidSettingsRankingRules,
                    UserError::InvalidGeoField { .. } => Code::InvalidDocumentGeoField,
                    UserError::InvalidVectorDimensions { .. } => Code::InvalidVectorDimensions,
+                    UserError::InvalidVectorsMapType { .. } => Code::InvalidVectorsType,
                    UserError::InvalidVectorsType { .. } => Code::InvalidVectorsType,
+                    UserError::TooManyVectors(_, _) => Code::TooManyVectors,
                    UserError::SortError(_) => Code::InvalidSearchSort,
                    UserError::InvalidMinTypoWordLenSetting(_, _) => {
                        Code::InvalidSettingsTypoTolerance
                    }
+                    UserError::InvalidEmbedder(_) => Code::InvalidEmbedder,
+                    UserError::VectorEmbeddingError(_) => Code::VectorEmbeddingError,
                }
            }
        }
@ -387,11 +405,11 @@ impl ErrorCode for HeedError {
            HeedError::Mdb(MdbError::Invalid) => Code::InvalidStoreFile,
            HeedError::Io(e) => e.error_code(),
            HeedError::Mdb(_)
-            | HeedError::Encoding
-            | HeedError::Decoding
+            | HeedError::Encoding(_)
+            | HeedError::Decoding(_)
            | HeedError::InvalidDatabaseTyping
            | HeedError::DatabaseClosing
-            | HeedError::BadOpenOptions => Code::Internal,
+            | HeedError::BadOpenOptions { .. } => Code::Internal,
        }
    }
 }
@ -445,6 +463,15 @@ impl fmt::Display for DeserrParseIntError {
    }
 }

+impl fmt::Display for deserr_codes::InvalidSearchSemanticRatio {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(
+            f,
+            "the value of `semanticRatio` is invalid, expected a float between `0.0` and `1.0`."
+        )
+    }
+}
+
 #[macro_export]
 macro_rules! internal_error {
    ($target:ty : $($other:path), *) => {
--- a/meilisearch-types/src/features.rs
+++ b/meilisearch-types/src/features.rs
@ -5,6 +5,8 @@ use serde::{Deserialize, Serialize};
 pub struct RuntimeTogglableFeatures {
    pub score_details: bool,
    pub vector_store: bool,
+    pub metrics: bool,
+    pub export_puffin_reports: bool,
 }

 #[derive(Default, Debug, Clone, Copy)]
--- a/meilisearch-types/src/lib.rs
+++ b/meilisearch-types/src/lib.rs
@ -9,6 +9,7 @@ pub mod index_uid_pattern;
 pub mod keys;
 pub mod settings;
 pub mod star_or;
+pub mod task_view;
 pub mod tasks;
 pub mod versioning;
 pub use milli::{heed, Index};
--- a/meilisearch-types/src/settings.rs
+++ b/meilisearch-types/src/settings.rs
@ -8,6 +8,7 @@ use std::str::FromStr;

 use deserr::{DeserializeError, Deserr, ErrorKind, MergeWithError, ValuePointerRef};
 use fst::IntoStreamer;
+use milli::proximity::ProximityPrecision;
 use milli::update::Setting;
 use milli::{Criterion, CriterionError, Index, DEFAULT_VALUES_PER_FACET};
 use serde::{Deserialize, Serialize, Serializer};
@ -186,6 +187,9 @@ pub struct Settings<T> {
    #[deserr(default, error = DeserrJsonError<InvalidSettingsDistinctAttribute>)]
    pub distinct_attribute: Setting<String>,
    #[serde(default, skip_serializing_if = "Setting::is_not_set")]
+    #[deserr(default, error = DeserrJsonError<InvalidSettingsProximityPrecision>)]
+    pub proximity_precision: Setting<ProximityPrecisionView>,
+    #[serde(default, skip_serializing_if = "Setting::is_not_set")]
    #[deserr(default, error = DeserrJsonError<InvalidSettingsTypoTolerance>)]
    pub typo_tolerance: Setting<TypoSettings>,
    #[serde(default, skip_serializing_if = "Setting::is_not_set")]
@ -195,6 +199,10 @@ pub struct Settings<T> {
    #[deserr(default, error = DeserrJsonError<InvalidSettingsPagination>)]
    pub pagination: Setting<PaginationSettings>,

+    #[serde(default, skip_serializing_if = "Setting::is_not_set")]
+    #[deserr(default, error = DeserrJsonError<InvalidSettingsEmbedders>)]
+    pub embedders: Setting<BTreeMap<String, Setting<milli::vector::settings::EmbeddingSettings>>>,
+
    #[serde(skip)]
    #[deserr(skip)]
    pub _kind: PhantomData<T>,
@ -214,9 +222,11 @@ impl Settings<Checked> {
            separator_tokens: Setting::Reset,
            dictionary: Setting::Reset,
            distinct_attribute: Setting::Reset,
+            proximity_precision: Setting::Reset,
            typo_tolerance: Setting::Reset,
            faceting: Setting::Reset,
            pagination: Setting::Reset,
+            embedders: Setting::Reset,
            _kind: PhantomData,
        }
    }
@ -234,9 +244,11 @@ impl Settings<Checked> {
            dictionary,
            synonyms,
            distinct_attribute,
+            proximity_precision,
            typo_tolerance,
            faceting,
            pagination,
+            embedders,
            ..
        } = self;

@ -252,9 +264,11 @@ impl Settings<Checked> {
            dictionary,
            synonyms,
            distinct_attribute,
+            proximity_precision,
            typo_tolerance,
            faceting,
            pagination,
+            embedders,
            _kind: PhantomData,
        }
    }
@ -296,12 +310,29 @@ impl Settings<Unchecked> {
            separator_tokens: self.separator_tokens,
            dictionary: self.dictionary,
            distinct_attribute: self.distinct_attribute,
+            proximity_precision: self.proximity_precision,
            typo_tolerance: self.typo_tolerance,
            faceting: self.faceting,
            pagination: self.pagination,
+            embedders: self.embedders,
            _kind: PhantomData,
        }
    }
+
+    pub fn validate(self) -> Result<Self, milli::Error> {
+        self.validate_embedding_settings()
+    }
+
+    fn validate_embedding_settings(mut self) -> Result<Self, milli::Error> {
+        let Setting::Set(mut configs) = self.embedders else { return Ok(self) };
+        for (name, config) in configs.iter_mut() {
+            let config_to_check = std::mem::take(config);
+            let checked_config = milli::update::validate_embedding_settings(config_to_check, name)?;
+            *config = checked_config
+        }
+        self.embedders = Setting::Set(configs);
+        Ok(self)
+    }
 }

 #[derive(Debug, Clone, Serialize, Deserialize)]
@ -390,6 +421,12 @@ pub fn apply_settings_to_builder(
        Setting::NotSet => (),
    }

+    match settings.proximity_precision {
+        Setting::Set(ref precision) => builder.set_proximity_precision((*precision).into()),
+        Setting::Reset => builder.reset_proximity_precision(),
+        Setting::NotSet => (),
+    }
+
    match settings.typo_tolerance {
        Setting::Set(ref value) => {
            match value.enabled {
@ -476,6 +513,12 @@ pub fn apply_settings_to_builder(
        Setting::Reset => builder.reset_pagination_max_total_hits(),
        Setting::NotSet => (),
    }
+
+    match settings.embedders.clone() {
+        Setting::Set(value) => builder.set_embedder_settings(value),
+        Setting::Reset => builder.reset_embedder_settings(),
+        Setting::NotSet => (),
+    }
 }

 pub fn settings(
@ -509,6 +552,8 @@ pub fn settings(

    let distinct_field = index.distinct_field(rtxn)?.map(String::from);

+    let proximity_precision = index.proximity_precision(rtxn)?.map(ProximityPrecisionView::from);
+
    let synonyms = index.user_defined_synonyms(rtxn)?;

    let min_typo_word_len = MinWordSizeTyposSetting {
@ -532,7 +577,10 @@ pub fn settings(

    let faceting = FacetingSettings {
        max_values_per_facet: Setting::Set(
-            index.max_values_per_facet(rtxn)?.unwrap_or(DEFAULT_VALUES_PER_FACET),
+            index
+                .max_values_per_facet(rtxn)?
+                .map(|x| x as usize)
+                .unwrap_or(DEFAULT_VALUES_PER_FACET),
        ),
        sort_facet_values_by: Setting::Set(
            index
@ -545,10 +593,20 @@ pub fn settings(

    let pagination = PaginationSettings {
        max_total_hits: Setting::Set(
-            index.pagination_max_total_hits(rtxn)?.unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS),
+            index
+                .pagination_max_total_hits(rtxn)?
+                .map(|x| x as usize)
+                .unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS),
        ),
    };

+    let embedders: BTreeMap<_, _> = index
+        .embedding_configs(rtxn)?
+        .into_iter()
+        .map(|(name, config)| (name, Setting::Set(config.into())))
+        .collect();
+    let embedders = if embedders.is_empty() { Setting::NotSet } else { Setting::Set(embedders) };
+
    Ok(Settings {
        displayed_attributes: match displayed_attributes {
            Some(attrs) => Setting::Set(attrs),
@ -569,10 +627,12 @@ pub fn settings(
            Some(field) => Setting::Set(field),
            None => Setting::Reset,
        },
+        proximity_precision: Setting::Set(proximity_precision.unwrap_or_default()),
        synonyms: Setting::Set(synonyms),
        typo_tolerance: Setting::Set(typo_tolerance),
        faceting: Setting::Set(faceting),
        pagination: Setting::Set(pagination),
+        embedders,
        _kind: PhantomData,
    })
 }
@ -673,6 +733,32 @@ impl From<RankingRuleView> for Criterion {
    }
 }

+#[derive(Default, Debug, Clone, Copy, PartialEq, Eq, Deserr, Serialize, Deserialize)]
+#[serde(deny_unknown_fields, rename_all = "camelCase")]
+#[deserr(error = DeserrJsonError<InvalidSettingsProximityPrecision>, rename_all = camelCase, deny_unknown_fields)]
+pub enum ProximityPrecisionView {
+    #[default]
+    ByWord,
+    ByAttribute,
+}
+
+impl From<ProximityPrecision> for ProximityPrecisionView {
+    fn from(value: ProximityPrecision) -> Self {
+        match value {
+            ProximityPrecision::ByWord => ProximityPrecisionView::ByWord,
+            ProximityPrecision::ByAttribute => ProximityPrecisionView::ByAttribute,
+        }
+    }
+}
+impl From<ProximityPrecisionView> for ProximityPrecision {
+    fn from(value: ProximityPrecisionView) -> Self {
+        match value {
+            ProximityPrecisionView::ByWord => ProximityPrecision::ByWord,
+            ProximityPrecisionView::ByAttribute => ProximityPrecision::ByAttribute,
+        }
+    }
+}
+
 #[cfg(test)]
 pub(crate) mod test {
    use super::*;
@ -692,9 +778,11 @@ pub(crate) mod test {
            dictionary: Setting::NotSet,
            synonyms: Setting::NotSet,
            distinct_attribute: Setting::NotSet,
+            proximity_precision: Setting::NotSet,
            typo_tolerance: Setting::NotSet,
            faceting: Setting::NotSet,
            pagination: Setting::NotSet,
+            embedders: Setting::NotSet,
            _kind: PhantomData::<Unchecked>,
        };

@ -716,9 +804,11 @@ pub(crate) mod test {
            dictionary: Setting::NotSet,
            synonyms: Setting::NotSet,
            distinct_attribute: Setting::NotSet,
+            proximity_precision: Setting::NotSet,
            typo_tolerance: Setting::NotSet,
            faceting: Setting::NotSet,
            pagination: Setting::NotSet,
+            embedders: Setting::NotSet,
            _kind: PhantomData::<Unchecked>,
        };

--- a/meilisearch-types/src/task_view.rs
+++ b/meilisearch-types/src/task_view.rs
@ -0,0 +1,139 @@
+use serde::Serialize;
+use time::{Duration, OffsetDateTime};
+
+use crate::error::ResponseError;
+use crate::settings::{Settings, Unchecked};
+use crate::tasks::{serialize_duration, Details, IndexSwap, Kind, Status, Task, TaskId};
+
+#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
+#[serde(rename_all = "camelCase")]
+pub struct TaskView {
+    pub uid: TaskId,
+    #[serde(default)]
+    pub index_uid: Option<String>,
+    pub status: Status,
+    #[serde(rename = "type")]
+    pub kind: Kind,
+    pub canceled_by: Option<TaskId>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub details: Option<DetailsView>,
+    pub error: Option<ResponseError>,
+    #[serde(serialize_with = "serialize_duration", default)]
+    pub duration: Option<Duration>,
+    #[serde(with = "time::serde::rfc3339")]
+    pub enqueued_at: OffsetDateTime,
+    #[serde(with = "time::serde::rfc3339::option", default)]
+    pub started_at: Option<OffsetDateTime>,
+    #[serde(with = "time::serde::rfc3339::option", default)]
+    pub finished_at: Option<OffsetDateTime>,
+}
+
+impl TaskView {
+    pub fn from_task(task: &Task) -> TaskView {
+        TaskView {
+            uid: task.uid,
+            index_uid: task.index_uid().map(ToOwned::to_owned),
+            status: task.status,
+            kind: task.kind.as_kind(),
+            canceled_by: task.canceled_by,
+            details: task.details.clone().map(DetailsView::from),
+            error: task.error.clone(),
+            duration: task.started_at.zip(task.finished_at).map(|(start, end)| end - start),
+            enqueued_at: task.enqueued_at,
+            started_at: task.started_at,
+            finished_at: task.finished_at,
+        }
+    }
+}
+
+#[derive(Default, Debug, PartialEq, Eq, Clone, Serialize)]
+#[serde(rename_all = "camelCase")]
+pub struct DetailsView {
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub received_documents: Option<u64>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub indexed_documents: Option<Option<u64>>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub primary_key: Option<Option<String>>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub provided_ids: Option<usize>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub deleted_documents: Option<Option<u64>>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub matched_tasks: Option<u64>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub canceled_tasks: Option<Option<u64>>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub deleted_tasks: Option<Option<u64>>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub original_filter: Option<Option<String>>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub dump_uid: Option<Option<String>>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    #[serde(flatten)]
+    pub settings: Option<Box<Settings<Unchecked>>>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub swaps: Option<Vec<IndexSwap>>,
+}
+
+impl From<Details> for DetailsView {
+    fn from(details: Details) -> Self {
+        match details {
+            Details::DocumentAdditionOrUpdate { received_documents, indexed_documents } => {
+                DetailsView {
+                    received_documents: Some(received_documents),
+                    indexed_documents: Some(indexed_documents),
+                    ..DetailsView::default()
+                }
+            }
+            Details::SettingsUpdate { settings } => {
+                DetailsView { settings: Some(settings), ..DetailsView::default() }
+            }
+            Details::IndexInfo { primary_key } => {
+                DetailsView { primary_key: Some(primary_key), ..DetailsView::default() }
+            }
+            Details::DocumentDeletion {
+                provided_ids: received_document_ids,
+                deleted_documents,
+            } => DetailsView {
+                provided_ids: Some(received_document_ids),
+                deleted_documents: Some(deleted_documents),
+                original_filter: Some(None),
+                ..DetailsView::default()
+            },
+            Details::DocumentDeletionByFilter { original_filter, deleted_documents } => {
+                DetailsView {
+                    provided_ids: Some(0),
+                    original_filter: Some(Some(original_filter)),
+                    deleted_documents: Some(deleted_documents),
+                    ..DetailsView::default()
+                }
+            }
+            Details::ClearAll { deleted_documents } => {
+                DetailsView { deleted_documents: Some(deleted_documents), ..DetailsView::default() }
+            }
+            Details::TaskCancelation { matched_tasks, canceled_tasks, original_filter } => {
+                DetailsView {
+                    matched_tasks: Some(matched_tasks),
+                    canceled_tasks: Some(canceled_tasks),
+                    original_filter: Some(Some(original_filter)),
+                    ..DetailsView::default()
+                }
+            }
+            Details::TaskDeletion { matched_tasks, deleted_tasks, original_filter } => {
+                DetailsView {
+                    matched_tasks: Some(matched_tasks),
+                    deleted_tasks: Some(deleted_tasks),
+                    original_filter: Some(Some(original_filter)),
+                    ..DetailsView::default()
+                }
+            }
+            Details::Dump { dump_uid } => {
+                DetailsView { dump_uid: Some(dump_uid), ..DetailsView::default() }
+            }
+            Details::IndexSwap { swaps } => {
+                DetailsView { swaps: Some(swaps), ..Default::default() }
+            }
+        }
+    }
+}
--- a/meilisearch/Cargo.toml
+++ b/meilisearch/Cargo.toml
@ -39,7 +39,7 @@ byte-unit = { version = "4.0.19", default-features = false, features = [
 bytes = "1.4.0"
 clap = { version = "4.2.1", features = ["derive", "env"] }
 crossbeam-channel = "0.5.8"
-deserr = { version = "0.6.0", features = ["actix-web"]}
+deserr = { version = "0.6.0", features = ["actix-web"] }
 dump = { path = "../dump" }
 either = "1.8.1"
 env_logger = "0.10.0"
@ -69,8 +69,7 @@ permissive-json-pointer = { path = "../permissive-json-pointer" }
 pin-project-lite = "0.2.9"
 platform-dirs = "0.3.0"
 prometheus = { version = "0.13.3", features = ["process"] }
-puffin = "0.16.0"
-puffin_http = { version = "0.13.0", optional = true }
+puffin = { version = "0.16.0", features = ["serialization"] }
 rand = "0.8.5"
 rayon = "1.7.0"
 regex = "1.7.3"
@ -105,6 +104,7 @@ walkdir = "2.3.3"
 yaup = "0.2.1"
 serde_urlencoded = "0.7.1"
 termcolor = "1.2.0"
+url = { version = "2.5.0", features = ["serde"] }

 [dev-dependencies]
 actix-rt = "2.8.0"
@ -135,7 +135,6 @@ zip = { version = "0.6.4", optional = true }
 [features]
 default = ["analytics", "meilisearch-types/all-tokenizations", "mini-dashboard"]
 analytics = ["segment"]
-profile-with-puffin = ["dep:puffin_http"]
 mini-dashboard = [
    "actix-web-static-files",
    "static-files",
@ -152,7 +151,8 @@ hebrew = ["meilisearch-types/hebrew"]
 japanese = ["meilisearch-types/japanese"]
 thai = ["meilisearch-types/thai"]
 greek = ["meilisearch-types/greek"]
+khmer = ["meilisearch-types/khmer"]

 [package.metadata.mini-dashboard]
-assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.11/build.zip"
-sha1 = "83cd44ed1e5f97ecb581dc9f958a63f4ccc982d9"
+assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.13/build.zip"
+sha1 = "e20cc9b390003c6c844f4b8bcc5c5013191a77ff"
--- a/meilisearch/src/analytics/segment_analytics.rs
+++ b/meilisearch/src/analytics/segment_analytics.rs
@ -36,7 +36,7 @@ use crate::routes::{create_all_stats, Stats};
 use crate::search::{
    FacetSearchResult, MatchingStrategy, SearchQuery, SearchQueryWithIndex, SearchResult,
    DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG,
-    DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT,
+    DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, DEFAULT_SEMANTIC_RATIO,
 };
 use crate::Opt;

@ -251,6 +251,7 @@ struct Infos {
    env: String,
    experimental_enable_metrics: bool,
    experimental_reduce_indexing_memory_usage: bool,
+    experimental_max_number_of_batched_tasks: usize,
    db_path: bool,
    import_dump: bool,
    dump_dir: bool,
@ -263,6 +264,8 @@ struct Infos {
    ignore_snapshot_if_db_exists: bool,
    http_addr: bool,
    http_payload_size_limit: Byte,
+    task_queue_webhook: bool,
+    task_webhook_authorization_header: bool,
    log_level: String,
    max_indexing_memory: MaxMemory,
    max_indexing_threads: MaxThreads,
@ -285,9 +288,12 @@ impl From<Opt> for Infos {
            db_path,
            experimental_enable_metrics,
            experimental_reduce_indexing_memory_usage,
+            experimental_max_number_of_batched_tasks,
            http_addr,
            master_key: _,
            env,
+            task_webhook_url,
+            task_webhook_authorization_header,
            max_index_size: _,
            max_task_db_size: _,
            http_payload_size_limit,
@ -340,6 +346,9 @@ impl From<Opt> for Infos {
            ignore_snapshot_if_db_exists,
            http_addr: http_addr != default_http_addr(),
            http_payload_size_limit,
+            experimental_max_number_of_batched_tasks,
+            task_queue_webhook: task_webhook_url.is_some(),
+            task_webhook_authorization_header: task_webhook_authorization_header.is_some(),
            log_level: log_level.to_string(),
            max_indexing_memory,
            max_indexing_threads,
@ -583,6 +592,11 @@ pub struct SearchAggregator {
    // vector
    // The maximum number of floats in a vector request
    max_vector_size: usize,
+    // Whether the semantic ratio passed to a hybrid search equals the default ratio.
+    semantic_ratio: bool,
+    // Whether a non-default embedder was specified
+    embedder: bool,
+    hybrid: bool,

    // every time a search is done, we increment the counter linked to the used settings
    matching_strategy: HashMap<String, usize>,
@ -636,6 +650,7 @@ impl SearchAggregator {
            crop_marker,
            matching_strategy,
            attributes_to_search_on,
+            hybrid,
        } = query;

        let mut ret = Self::default();
@ -709,6 +724,12 @@ impl SearchAggregator {
        ret.show_ranking_score = *show_ranking_score;
        ret.show_ranking_score_details = *show_ranking_score_details;

+        if let Some(hybrid) = hybrid {
+            ret.semantic_ratio = hybrid.semantic_ratio != DEFAULT_SEMANTIC_RATIO();
+            ret.embedder = hybrid.embedder.is_some();
+            ret.hybrid = true;
+        }
+
        ret
    }

@ -762,6 +783,9 @@ impl SearchAggregator {
            facets_total_number_of_facets,
            show_ranking_score,
            show_ranking_score_details,
+            semantic_ratio,
+            embedder,
+            hybrid,
        } = other;

        if self.timestamp.is_none() {
@ -807,6 +831,9 @@ impl SearchAggregator {

        // vector
        self.max_vector_size = self.max_vector_size.max(max_vector_size);
+        self.semantic_ratio |= semantic_ratio;
+        self.hybrid |= hybrid;
+        self.embedder |= embedder;

        // pagination
        self.max_limit = self.max_limit.max(max_limit);
@ -875,6 +902,9 @@ impl SearchAggregator {
            facets_total_number_of_facets,
            show_ranking_score,
            show_ranking_score_details,
+            semantic_ratio,
+            embedder,
+            hybrid,
        } = self;

        if total_received == 0 {
@ -914,6 +944,11 @@ impl SearchAggregator {
                "vector": {
                    "max_vector_size": max_vector_size,
                },
+                "hybrid": {
+                    "enabled": hybrid,
+                    "semantic_ratio": semantic_ratio,
+                    "embedder": embedder,
+                },
                "pagination": {
                   "max_limit": max_limit,
                   "max_offset": max_offset,
@ -1009,6 +1044,7 @@ impl MultiSearchAggregator {
                    crop_marker: _,
                    matching_strategy: _,
                    attributes_to_search_on: _,
+                    hybrid: _,
                } = query;

                index_uid.as_str()
@ -1155,6 +1191,7 @@ impl FacetSearchAggregator {
            filter,
            matching_strategy,
            attributes_to_search_on,
+            hybrid,
        } = query;

        let mut ret = Self::default();
@ -1168,7 +1205,8 @@ impl FacetSearchAggregator {
            || vector.is_some()
            || filter.is_some()
            || *matching_strategy != MatchingStrategy::default()
-            || attributes_to_search_on.is_some();
+            || attributes_to_search_on.is_some()
+            || hybrid.is_some();

        ret
    }
--- a/meilisearch/src/error.rs
+++ b/meilisearch/src/error.rs
@ -51,6 +51,8 @@ pub enum MeilisearchHttpError {
    DocumentFormat(#[from] DocumentFormatError),
    #[error(transparent)]
    Join(#[from] JoinError),
+    #[error("Invalid request: missing `hybrid` parameter when both `q` and `vector` are present.")]
+    MissingSearchHybrid,
 }

 impl ErrorCode for MeilisearchHttpError {
@ -74,6 +76,7 @@ impl ErrorCode for MeilisearchHttpError {
            MeilisearchHttpError::FileStore(_) => Code::Internal,
            MeilisearchHttpError::DocumentFormat(e) => e.error_code(),
            MeilisearchHttpError::Join(_) => Code::Internal,
+            MeilisearchHttpError::MissingSearchHybrid => Code::MissingSearchHybrid,
        }
    }
 }
--- a/meilisearch/src/lib.rs
+++ b/meilisearch/src/lib.rs
@ -114,10 +114,7 @@ pub fn create_app(
        .configure(routes::configure)
        .configure(|s| dashboard(s, enable_dashboard));

-    let app = app.wrap(actix_web::middleware::Condition::new(
-        opt.experimental_enable_metrics,
-        middleware::RouteMetrics,
-    ));
+    let app = app.wrap(middleware::RouteMetrics);
    app.wrap(
        Cors::default()
            .send_wildcard()
@ -231,12 +228,15 @@ fn open_or_create_database_unchecked(
            indexes_path: opt.db_path.join("indexes"),
            snapshots_path: opt.snapshot_dir.clone(),
            dumps_path: opt.dump_dir.clone(),
+            webhook_url: opt.task_webhook_url.as_ref().map(|url| url.to_string()),
+            webhook_authorization_header: opt.task_webhook_authorization_header.clone(),
            task_db_size: opt.max_task_db_size.get_bytes() as usize,
            index_base_map_size: opt.max_index_size.get_bytes() as usize,
            enable_mdb_writemap: opt.experimental_reduce_indexing_memory_usage,
            indexer_config: (&opt.indexer_options).try_into()?,
            autobatching_enabled: true,
            max_number_of_tasks: 1_000_000,
+            max_number_of_batched_tasks: opt.experimental_max_number_of_batched_tasks,
            index_growth_amount: byte_unit::Byte::from_str("10GiB").unwrap().get_bytes() as usize,
            index_count: DEFAULT_INDEX_COUNT,
            instance_features,
@ -365,7 +365,7 @@ fn import_dump(
                update_method: IndexDocumentsMethod::ReplaceDocuments,
                ..Default::default()
            },
-            |indexing_step| log::debug!("update: {:?}", indexing_step),
+            |indexing_step| log::trace!("update: {:?}", indexing_step),
            || false,
        )?;

@ -400,6 +400,7 @@ pub fn configure_data(
        .app_data(web::Data::from(analytics))
        .app_data(
            web::JsonConfig::default()
+                .limit(http_payload_size_limit)
                .content_type(|mime| mime == mime::APPLICATION_JSON)
                .error_handler(|err, req: &HttpRequest| match err {
                    JsonPayloadError::ContentType => match req.headers().get(CONTENT_TYPE) {
--- a/meilisearch/src/main.rs
+++ b/meilisearch/src/main.rs
@ -19,7 +19,11 @@ static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
 /// does all the setup before meilisearch is launched
 fn setup(opt: &Opt) -> anyhow::Result<()> {
    let mut log_builder = env_logger::Builder::new();
-    log_builder.parse_filters(&opt.log_level.to_string());
+    let log_filters = format!(
+        "{},h2=warn,hyper=warn,tokio_util=warn,tracing=warn,rustls=warn,mio=warn,reqwest=warn",
+        opt.log_level
+    );
+    log_builder.parse_filters(&log_filters);

    log_builder.init();

@ -30,10 +34,6 @@ fn setup(opt: &Opt) -> anyhow::Result<()> {
 async fn main() -> anyhow::Result<()> {
    let (opt, config_read_from) = Opt::try_build()?;

-    #[cfg(feature = "profile-with-puffin")]
-    let _server = puffin_http::Server::new(&format!("0.0.0.0:{}", puffin_http::DEFAULT_PORT))?;
-    puffin::set_scopes_on(cfg!(feature = "profile-with-puffin"));
-
    anyhow::ensure!(
        !(cfg!(windows) && opt.experimental_reduce_indexing_memory_usage),
        "The `experimental-reduce-indexing-memory-usage` flag is not supported on Windows"
--- a/meilisearch/src/middleware.rs
+++ b/meilisearch/src/middleware.rs
@ -3,8 +3,10 @@
 use std::future::{ready, Ready};

 use actix_web::dev::{self, Service, ServiceRequest, ServiceResponse, Transform};
+use actix_web::web::Data;
 use actix_web::Error;
 use futures_util::future::LocalBoxFuture;
+use index_scheduler::IndexScheduler;
 use prometheus::HistogramTimer;

 pub struct RouteMetrics;
@ -47,19 +49,27 @@ where

    fn call(&self, req: ServiceRequest) -> Self::Future {
        let mut histogram_timer: Option<HistogramTimer> = None;
-        let request_path = req.path();
-        let is_registered_resource = req.resource_map().has_resource(request_path);
-        if is_registered_resource {
-            let request_method = req.method().to_string();
-            histogram_timer = Some(
-                crate::metrics::MEILISEARCH_HTTP_RESPONSE_TIME_SECONDS
+
+        // calling unwrap here is safe because index scheduler is added to app data while creating actix app.
+        // also, the tests will fail if this is not present.
+        let index_scheduler = req.app_data::<Data<IndexScheduler>>().unwrap();
+        let features = index_scheduler.features();
+
+        if features.check_metrics().is_ok() {
+            let request_path = req.path();
+            let is_registered_resource = req.resource_map().has_resource(request_path);
+            if is_registered_resource {
+                let request_method = req.method().to_string();
+                histogram_timer = Some(
+                    crate::metrics::MEILISEARCH_HTTP_RESPONSE_TIME_SECONDS
+                        .with_label_values(&[&request_method, request_path])
+                        .start_timer(),
+                );
+                crate::metrics::MEILISEARCH_HTTP_REQUESTS_TOTAL
                    .with_label_values(&[&request_method, request_path])
-                    .start_timer(),
-            );
-            crate::metrics::MEILISEARCH_HTTP_REQUESTS_TOTAL
-                .with_label_values(&[&request_method, request_path])
-                .inc();
-        }
+                    .inc();
+            }
+        };

        let fut = self.service.call(req);

--- a/meilisearch/src/option.rs
+++ b/meilisearch/src/option.rs
@ -21,6 +21,7 @@ use rustls::RootCertStore;
 use rustls_pemfile::{certs, pkcs8_private_keys, rsa_private_keys};
 use serde::{Deserialize, Serialize};
 use sysinfo::{RefreshKind, System, SystemExt};
+use url::Url;

 const POSSIBLE_ENV: [&str; 2] = ["development", "production"];

@ -28,6 +29,8 @@ const MEILI_DB_PATH: &str = "MEILI_DB_PATH";
 const MEILI_HTTP_ADDR: &str = "MEILI_HTTP_ADDR";
 const MEILI_MASTER_KEY: &str = "MEILI_MASTER_KEY";
 const MEILI_ENV: &str = "MEILI_ENV";
+const MEILI_TASK_WEBHOOK_URL: &str = "MEILI_TASK_WEBHOOK_URL";
+const MEILI_TASK_WEBHOOK_AUTHORIZATION_HEADER: &str = "MEILI_TASK_WEBHOOK_AUTHORIZATION_HEADER";
 #[cfg(feature = "analytics")]
 const MEILI_NO_ANALYTICS: &str = "MEILI_NO_ANALYTICS";
 const MEILI_HTTP_PAYLOAD_SIZE_LIMIT: &str = "MEILI_HTTP_PAYLOAD_SIZE_LIMIT";
@ -51,6 +54,8 @@ const MEILI_LOG_LEVEL: &str = "MEILI_LOG_LEVEL";
 const MEILI_EXPERIMENTAL_ENABLE_METRICS: &str = "MEILI_EXPERIMENTAL_ENABLE_METRICS";
 const MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE: &str =
    "MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE";
+const MEILI_EXPERIMENTAL_MAX_NUMBER_OF_BATCHED_TASKS: &str =
+    "MEILI_EXPERIMENTAL_MAX_NUMBER_OF_BATCHED_TASKS";

 const DEFAULT_CONFIG_FILE_PATH: &str = "./config.toml";
 const DEFAULT_DB_PATH: &str = "./data.ms";
@ -154,6 +159,14 @@ pub struct Opt {
    #[serde(default = "default_env")]
    pub env: String,

+    /// Called whenever a task finishes so a third party can be notified.
+    #[clap(long, env = MEILI_TASK_WEBHOOK_URL)]
+    pub task_webhook_url: Option<Url>,
+
+    /// The Authorization header to send on the webhook URL whenever a task finishes so a third party can be notified.
+    #[clap(long, env = MEILI_TASK_WEBHOOK_AUTHORIZATION_HEADER)]
+    pub task_webhook_authorization_header: Option<String>,
+
    /// Deactivates Meilisearch's built-in telemetry when provided.
    ///
    /// Meilisearch automatically collects data from all instances that do not opt out using this flag.
@ -301,6 +314,11 @@ pub struct Opt {
    #[serde(default)]
    pub experimental_reduce_indexing_memory_usage: bool,

+    /// Experimentally reduces the maximum number of tasks that will be processed at once, see: <https://github.com/orgs/meilisearch/discussions/713>
+    #[clap(long, env = MEILI_EXPERIMENTAL_MAX_NUMBER_OF_BATCHED_TASKS, default_value_t = default_limit_batched_tasks())]
+    #[serde(default = "default_limit_batched_tasks")]
+    pub experimental_max_number_of_batched_tasks: usize,
+
    #[serde(flatten)]
    #[clap(flatten)]
    pub indexer_options: IndexerOpts,
@ -368,9 +386,12 @@ impl Opt {
            http_addr,
            master_key,
            env,
+            task_webhook_url,
+            task_webhook_authorization_header,
            max_index_size: _,
            max_task_db_size: _,
            http_payload_size_limit,
+            experimental_max_number_of_batched_tasks,
            ssl_cert_path,
            ssl_key_path,
            ssl_auth_path,
@ -392,8 +413,8 @@ impl Opt {
            config_file_path: _,
            #[cfg(feature = "analytics")]
            no_analytics,
-            experimental_enable_metrics: enable_metrics_route,
-            experimental_reduce_indexing_memory_usage: reduce_indexing_memory_usage,
+            experimental_enable_metrics,
+            experimental_reduce_indexing_memory_usage,
        } = self;
        export_to_env_if_not_present(MEILI_DB_PATH, db_path);
        export_to_env_if_not_present(MEILI_HTTP_ADDR, http_addr);
@ -401,6 +422,16 @@ impl Opt {
            export_to_env_if_not_present(MEILI_MASTER_KEY, master_key);
        }
        export_to_env_if_not_present(MEILI_ENV, env);
+        if let Some(task_webhook_url) = task_webhook_url {
+            export_to_env_if_not_present(MEILI_TASK_WEBHOOK_URL, task_webhook_url.to_string());
+        }
+        if let Some(task_webhook_authorization_header) = task_webhook_authorization_header {
+            export_to_env_if_not_present(
+                MEILI_TASK_WEBHOOK_AUTHORIZATION_HEADER,
+                task_webhook_authorization_header,
+            );
+        }
+
        #[cfg(feature = "analytics")]
        {
            export_to_env_if_not_present(MEILI_NO_ANALYTICS, no_analytics.to_string());
@ -409,6 +440,10 @@ impl Opt {
            MEILI_HTTP_PAYLOAD_SIZE_LIMIT,
            http_payload_size_limit.to_string(),
        );
+        export_to_env_if_not_present(
+            MEILI_EXPERIMENTAL_MAX_NUMBER_OF_BATCHED_TASKS,
+            experimental_max_number_of_batched_tasks.to_string(),
+        );
        if let Some(ssl_cert_path) = ssl_cert_path {
            export_to_env_if_not_present(MEILI_SSL_CERT_PATH, ssl_cert_path);
        }
@ -433,11 +468,11 @@ impl Opt {
        export_to_env_if_not_present(MEILI_LOG_LEVEL, log_level.to_string());
        export_to_env_if_not_present(
            MEILI_EXPERIMENTAL_ENABLE_METRICS,
-            enable_metrics_route.to_string(),
+            experimental_enable_metrics.to_string(),
        );
        export_to_env_if_not_present(
            MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE,
-            reduce_indexing_memory_usage.to_string(),
+            experimental_reduce_indexing_memory_usage.to_string(),
        );
        indexer_options.export_to_env();
    }
@ -727,6 +762,10 @@ fn default_http_payload_size_limit() -> Byte {
    Byte::from_str(DEFAULT_HTTP_PAYLOAD_SIZE_LIMIT).unwrap()
 }

+fn default_limit_batched_tasks() -> usize {
+    usize::MAX
+}
+
 fn default_snapshot_dir() -> PathBuf {
    PathBuf::from(DEFAULT_SNAPSHOT_DIR)
 }
--- a/meilisearch/src/routes/features.rs
+++ b/meilisearch/src/routes/features.rs
@ -29,12 +29,12 @@ async fn get_features(
    >,
    req: HttpRequest,
    analytics: Data<dyn Analytics>,
-) -> Result<HttpResponse, ResponseError> {
-    let features = index_scheduler.features()?;
+) -> HttpResponse {
+    let features = index_scheduler.features();

    analytics.publish("Experimental features Seen".to_string(), json!(null), Some(&req));
    debug!("returns: {:?}", features.runtime_features());
-    Ok(HttpResponse::Ok().json(features.runtime_features()))
+    HttpResponse::Ok().json(features.runtime_features())
 }

 #[derive(Debug, Deserr)]
@ -44,6 +44,10 @@ pub struct RuntimeTogglableFeatures {
    pub score_details: Option<bool>,
    #[deserr(default)]
    pub vector_store: Option<bool>,
+    #[deserr(default)]
+    pub metrics: Option<bool>,
+    #[deserr(default)]
+    pub export_puffin_reports: Option<bool>,
 }

 async fn patch_features(
@ -55,26 +59,36 @@ async fn patch_features(
    req: HttpRequest,
    analytics: Data<dyn Analytics>,
 ) -> Result<HttpResponse, ResponseError> {
-    let features = index_scheduler.features()?;
+    let features = index_scheduler.features();

    let old_features = features.runtime_features();
-
    let new_features = meilisearch_types::features::RuntimeTogglableFeatures {
        score_details: new_features.0.score_details.unwrap_or(old_features.score_details),
        vector_store: new_features.0.vector_store.unwrap_or(old_features.vector_store),
+        metrics: new_features.0.metrics.unwrap_or(old_features.metrics),
+        export_puffin_reports: new_features
+            .0
+            .export_puffin_reports
+            .unwrap_or(old_features.export_puffin_reports),
    };

    // explicitly destructure for analytics rather than using the `Serialize` implementation, because
    // the it renames to camelCase, which we don't want for analytics.
    // **Do not** ignore fields with `..` or `_` here, because we want to add them in the future.
-    let meilisearch_types::features::RuntimeTogglableFeatures { score_details, vector_store } =
-        new_features;
+    let meilisearch_types::features::RuntimeTogglableFeatures {
+        score_details,
+        vector_store,
+        metrics,
+        export_puffin_reports,
+    } = new_features;

    analytics.publish(
        "Experimental features Updated".to_string(),
        json!({
            "score_details": score_details,
            "vector_store": vector_store,
+            "metrics": metrics,
+            "export_puffin_reports": export_puffin_reports,
        }),
        Some(&req),
    );
--- a/meilisearch/src/routes/indexes/documents.rs
+++ b/meilisearch/src/routes/indexes/documents.rs
@ -3,7 +3,7 @@ use std::io::ErrorKind;
 use actix_web::http::header::CONTENT_TYPE;
 use actix_web::web::Data;
 use actix_web::{web, HttpMessage, HttpRequest, HttpResponse};
-use bstr::ByteSlice;
+use bstr::ByteSlice as _;
 use deserr::actix_web::{AwebJson, AwebQueryParameter};
 use deserr::Deserr;
 use futures::StreamExt;
@ -612,8 +612,8 @@ fn retrieve_document<S: AsRef<str>>(
    let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();

    let internal_id = index
-        .external_documents_ids(&txn)?
-        .get(doc_id.as_bytes())
+        .external_documents_ids()
+        .get(&txn, doc_id)?
        .ok_or_else(|| MeilisearchHttpError::DocumentNotFound(doc_id.to_string()))?;

    let document = index
--- a/meilisearch/src/routes/indexes/facet_search.rs
+++ b/meilisearch/src/routes/indexes/facet_search.rs
@ -13,9 +13,9 @@ use crate::analytics::{Analytics, FacetSearchAggregator};
 use crate::extractors::authentication::policies::*;
 use crate::extractors::authentication::GuardedData;
 use crate::search::{
-    add_search_rules, perform_facet_search, MatchingStrategy, SearchQuery, DEFAULT_CROP_LENGTH,
-    DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG,
-    DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET,
+    add_search_rules, perform_facet_search, HybridQuery, MatchingStrategy, SearchQuery,
+    DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG,
+    DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET,
 };

 pub fn configure(cfg: &mut web::ServiceConfig) {
@ -36,6 +36,8 @@ pub struct FacetSearchQuery {
    pub q: Option<String>,
    #[deserr(default, error = DeserrJsonError<InvalidSearchVector>)]
    pub vector: Option<Vec<f32>>,
+    #[deserr(default, error = DeserrJsonError<InvalidHybridQuery>)]
+    pub hybrid: Option<HybridQuery>,
    #[deserr(default, error = DeserrJsonError<InvalidSearchFilter>)]
    pub filter: Option<Value>,
    #[deserr(default, error = DeserrJsonError<InvalidSearchMatchingStrategy>, default)]
@ -68,7 +70,7 @@ pub async fn search(
    }

    let index = index_scheduler.index(&index_uid)?;
-    let features = index_scheduler.features()?;
+    let features = index_scheduler.features();
    let search_result = tokio::task::spawn_blocking(move || {
        perform_facet_search(&index, search_query, facet_query, facet_name, features)
    })
@ -95,6 +97,7 @@ impl From<FacetSearchQuery> for SearchQuery {
            filter,
            matching_strategy,
            attributes_to_search_on,
+            hybrid,
        } = value;

        SearchQuery {
@ -119,6 +122,7 @@ impl From<FacetSearchQuery> for SearchQuery {
            matching_strategy,
            vector,
            attributes_to_search_on,
+            hybrid,
        }
    }
 }
--- a/meilisearch/src/routes/indexes/search.rs
+++ b/meilisearch/src/routes/indexes/search.rs
@ -2,12 +2,14 @@ use actix_web::web::Data;
 use actix_web::{web, HttpRequest, HttpResponse};
 use deserr::actix_web::{AwebJson, AwebQueryParameter};
 use index_scheduler::IndexScheduler;
-use log::debug;
+use log::{debug, warn};
 use meilisearch_types::deserr::query_params::Param;
 use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError};
 use meilisearch_types::error::deserr_codes::*;
 use meilisearch_types::error::ResponseError;
 use meilisearch_types::index_uid::IndexUid;
+use meilisearch_types::milli;
+use meilisearch_types::milli::vector::DistributionShift;
 use meilisearch_types::serde_cs::vec::CS;
 use serde_json::Value;

@ -16,9 +18,9 @@ use crate::extractors::authentication::policies::*;
 use crate::extractors::authentication::GuardedData;
 use crate::extractors::sequential_extractor::SeqHandler;
 use crate::search::{
-    add_search_rules, perform_search, MatchingStrategy, SearchQuery, DEFAULT_CROP_LENGTH,
-    DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG,
-    DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET,
+    add_search_rules, perform_search, HybridQuery, MatchingStrategy, SearchQuery, SemanticRatio,
+    DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG,
+    DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET, DEFAULT_SEMANTIC_RATIO,
 };

 pub fn configure(cfg: &mut web::ServiceConfig) {
@ -74,6 +76,31 @@ pub struct SearchQueryGet {
    matching_strategy: MatchingStrategy,
    #[deserr(default, error = DeserrQueryParamError<InvalidSearchAttributesToSearchOn>)]
    pub attributes_to_search_on: Option<CS<String>>,
+    #[deserr(default, error = DeserrQueryParamError<InvalidEmbedder>)]
+    pub hybrid_embedder: Option<String>,
+    #[deserr(default, error = DeserrQueryParamError<InvalidSearchSemanticRatio>)]
+    pub hybrid_semantic_ratio: Option<SemanticRatioGet>,
+}
+
+#[derive(Debug, Clone, Copy, Default, PartialEq, deserr::Deserr)]
+#[deserr(try_from(String) = TryFrom::try_from -> InvalidSearchSemanticRatio)]
+pub struct SemanticRatioGet(SemanticRatio);
+
+impl std::convert::TryFrom<String> for SemanticRatioGet {
+    type Error = InvalidSearchSemanticRatio;
+
+    fn try_from(s: String) -> Result<Self, Self::Error> {
+        let f: f32 = s.parse().map_err(|_| InvalidSearchSemanticRatio)?;
+        Ok(SemanticRatioGet(SemanticRatio::try_from(f)?))
+    }
+}
+
+impl std::ops::Deref for SemanticRatioGet {
+    type Target = SemanticRatio;
+
+    fn deref(&self) -> &Self::Target {
+        &self.0
+    }
 }

 impl From<SearchQueryGet> for SearchQuery {
@ -86,6 +113,20 @@ impl From<SearchQueryGet> for SearchQuery {
            None => None,
        };

+        let hybrid = match (other.hybrid_embedder, other.hybrid_semantic_ratio) {
+            (None, None) => None,
+            (None, Some(semantic_ratio)) => {
+                Some(HybridQuery { semantic_ratio: *semantic_ratio, embedder: None })
+            }
+            (Some(embedder), None) => Some(HybridQuery {
+                semantic_ratio: DEFAULT_SEMANTIC_RATIO(),
+                embedder: Some(embedder),
+            }),
+            (Some(embedder), Some(semantic_ratio)) => {
+                Some(HybridQuery { semantic_ratio: *semantic_ratio, embedder: Some(embedder) })
+            }
+        };
+
        Self {
            q: other.q,
            vector: other.vector.map(CS::into_inner),
@ -108,6 +149,7 @@ impl From<SearchQueryGet> for SearchQuery {
            crop_marker: other.crop_marker,
            matching_strategy: other.matching_strategy,
            attributes_to_search_on: other.attributes_to_search_on.map(|o| o.into_iter().collect()),
+            hybrid,
        }
    }
 }
@ -157,9 +199,13 @@ pub async fn search_with_url_query(
    let mut aggregate = SearchAggregator::from_query(&query, &req);

    let index = index_scheduler.index(&index_uid)?;
-    let features = index_scheduler.features()?;
+    let features = index_scheduler.features();
+
+    let distribution = embed(&mut query, index_scheduler.get_ref(), &index).await?;
+
    let search_result =
-        tokio::task::spawn_blocking(move || perform_search(&index, query, features)).await?;
+        tokio::task::spawn_blocking(move || perform_search(&index, query, features, distribution))
+            .await?;
    if let Ok(ref search_result) = search_result {
        aggregate.succeed(search_result);
    }
@ -192,9 +238,13 @@ pub async fn search_with_post(

    let index = index_scheduler.index(&index_uid)?;

-    let features = index_scheduler.features()?;
+    let features = index_scheduler.features();
+
+    let distribution = embed(&mut query, index_scheduler.get_ref(), &index).await?;
+
    let search_result =
-        tokio::task::spawn_blocking(move || perform_search(&index, query, features)).await?;
+        tokio::task::spawn_blocking(move || perform_search(&index, query, features, distribution))
+            .await?;
    if let Ok(ref search_result) = search_result {
        aggregate.succeed(search_result);
    }
@ -206,6 +256,80 @@ pub async fn search_with_post(
    Ok(HttpResponse::Ok().json(search_result))
 }

+pub async fn embed(
+    query: &mut SearchQuery,
+    index_scheduler: &IndexScheduler,
+    index: &milli::Index,
+) -> Result<Option<DistributionShift>, ResponseError> {
+    match (&query.hybrid, &query.vector, &query.q) {
+        (Some(HybridQuery { semantic_ratio: _, embedder }), None, Some(q))
+            if !q.trim().is_empty() =>
+        {
+            let embedder_configs = index.embedding_configs(&index.read_txn()?)?;
+            let embedders = index_scheduler.embedders(embedder_configs)?;
+
+            let embedder = if let Some(embedder_name) = embedder {
+                embedders.get(embedder_name)
+            } else {
+                embedders.get_default()
+            };
+
+            let embedder = embedder
+                .ok_or(milli::UserError::InvalidEmbedder("default".to_owned()))
+                .map_err(milli::Error::from)?
+                .0;
+
+            let distribution = embedder.distribution();
+
+            let embeddings = embedder
+                .embed(vec![q.to_owned()])
+                .await
+                .map_err(milli::vector::Error::from)
+                .map_err(milli::Error::from)?
+                .pop()
+                .expect("No vector returned from embedding");
+
+            if embeddings.iter().nth(1).is_some() {
+                warn!("Ignoring embeddings past the first one in long search query");
+                query.vector = Some(embeddings.iter().next().unwrap().to_vec());
+            } else {
+                query.vector = Some(embeddings.into_inner());
+            }
+            Ok(distribution)
+        }
+        (Some(hybrid), vector, _) => {
+            let embedder_configs = index.embedding_configs(&index.read_txn()?)?;
+            let embedders = index_scheduler.embedders(embedder_configs)?;
+
+            let embedder = if let Some(embedder_name) = &hybrid.embedder {
+                embedders.get(embedder_name)
+            } else {
+                embedders.get_default()
+            };
+
+            let embedder = embedder
+                .ok_or(milli::UserError::InvalidEmbedder("default".to_owned()))
+                .map_err(milli::Error::from)?
+                .0;
+
+            if let Some(vector) = vector {
+                if vector.len() != embedder.dimensions() {
+                    return Err(meilisearch_types::milli::Error::UserError(
+                        meilisearch_types::milli::UserError::InvalidVectorDimensions {
+                            expected: embedder.dimensions(),
+                            found: vector.len(),
+                        },
+                    )
+                    .into());
+                }
+            }
+
+            Ok(embedder.distribution())
+        }
+        _ => Ok(None),
+    }
+}
+
 #[cfg(test)]
 mod test {
    use super::*;
--- a/meilisearch/src/routes/indexes/settings.rs
+++ b/meilisearch/src/routes/indexes/settings.rs
@ -7,6 +7,7 @@ use meilisearch_types::deserr::DeserrJsonError;
 use meilisearch_types::error::ResponseError;
 use meilisearch_types::facet_values_sort::FacetValuesSort;
 use meilisearch_types::index_uid::IndexUid;
+use meilisearch_types::milli::update::Setting;
 use meilisearch_types::settings::{settings, RankingRuleView, Settings, Unchecked};
 use meilisearch_types::tasks::KindWithContent;
 use serde_json::json;
@ -78,6 +79,7 @@ macro_rules! make_setting_route {

                let body = body.into_inner();

+                #[allow(clippy::redundant_closure_call)]
                $analytics(&body, &req);

                let new_settings = Settings {
@ -88,6 +90,11 @@ macro_rules! make_setting_route {
                    ..Default::default()
                };

+                let new_settings = $crate::routes::indexes::settings::validate_settings(
+                    new_settings,
+                    &index_scheduler,
+                )?;
+
                let allow_index_creation =
                    index_scheduler.filters().allow_index_creation(&index_uid);

@ -434,6 +441,31 @@ make_setting_route!(
    }
 );

+make_setting_route!(
+    "/proximity-precision",
+    put,
+    meilisearch_types::settings::ProximityPrecisionView,
+    meilisearch_types::deserr::DeserrJsonError<
+        meilisearch_types::error::deserr_codes::InvalidSettingsProximityPrecision,
+    >,
+    proximity_precision,
+    "proximityPrecision",
+    analytics,
+    |precision: &Option<meilisearch_types::settings::ProximityPrecisionView>, req: &HttpRequest| {
+        use serde_json::json;
+        analytics.publish(
+            "ProximityPrecision Updated".to_string(),
+            json!({
+                "proximity_precision": {
+                    "set": precision.is_some(),
+                    "value": precision.unwrap_or_default(),
+                }
+            }),
+            Some(req),
+        );
+    }
+);
+
 make_setting_route!(
    "/ranking-rules",
    put,
@ -520,6 +552,67 @@ make_setting_route!(
    }
 );

+make_setting_route!(
+    "/embedders",
+    patch,
+    std::collections::BTreeMap<String, Setting<meilisearch_types::milli::vector::settings::EmbeddingSettings>>,
+    meilisearch_types::deserr::DeserrJsonError<
+        meilisearch_types::error::deserr_codes::InvalidSettingsEmbedders,
+    >,
+    embedders,
+    "embedders",
+    analytics,
+    |setting: &Option<std::collections::BTreeMap<String, Setting<meilisearch_types::milli::vector::settings::EmbeddingSettings>>>, req: &HttpRequest| {
+
+
+        analytics.publish(
+            "Embedders Updated".to_string(),
+            serde_json::json!({"embedders": crate::routes::indexes::settings::embedder_analytics(setting.as_ref())}),
+            Some(req),
+        );
+    }
+);
+
+fn embedder_analytics(
+    setting: Option<
+        &std::collections::BTreeMap<
+            String,
+            Setting<meilisearch_types::milli::vector::settings::EmbeddingSettings>,
+        >,
+    >,
+) -> serde_json::Value {
+    let mut sources = std::collections::HashSet::new();
+
+    if let Some(s) = &setting {
+        for source in s
+            .values()
+            .filter_map(|config| config.clone().set())
+            .filter_map(|config| config.source.set())
+        {
+            use meilisearch_types::milli::vector::settings::EmbedderSource;
+            match source {
+                EmbedderSource::OpenAi => sources.insert("openAi"),
+                EmbedderSource::HuggingFace => sources.insert("huggingFace"),
+                EmbedderSource::UserProvided => sources.insert("userProvided"),
+            };
+        }
+    };
+
+    let document_template_used = setting.as_ref().map(|map| {
+        map.values()
+            .filter_map(|config| config.clone().set())
+            .any(|config| config.document_template.set().is_some())
+    });
+
+    json!(
+        {
+            "total": setting.as_ref().map(|s| s.len()),
+            "sources": sources,
+            "document_template_used": document_template_used,
+        }
+    )
+}
+
 macro_rules! generate_configure {
    ($($mod:ident),*) => {
        pub fn configure(cfg: &mut web::ServiceConfig) {
@ -540,6 +633,7 @@ generate_configure!(
    displayed_attributes,
    searchable_attributes,
    distinct_attribute,
+    proximity_precision,
    stop_words,
    separator_tokens,
    non_separator_tokens,
@ -548,7 +642,8 @@ generate_configure!(
    ranking_rules,
    typo_tolerance,
    pagination,
-    faceting
+    faceting,
+    embedders
 );

 pub async fn update_all(
@ -561,6 +656,7 @@ pub async fn update_all(
    let index_uid = IndexUid::try_from(index_uid.into_inner())?;

    let new_settings = body.into_inner();
+    let new_settings = validate_settings(new_settings, &index_scheduler)?;

    analytics.publish(
        "Settings Updated".to_string(),
@ -593,6 +689,10 @@ pub async fn update_all(
            "distinct_attribute": {
                "set": new_settings.distinct_attribute.as_ref().set().is_some()
            },
+            "proximity_precision": {
+                "set": new_settings.proximity_precision.as_ref().set().is_some(),
+                "value": new_settings.proximity_precision.as_ref().set().copied().unwrap_or_default()
+            },
            "typo_tolerance": {
                "enabled": new_settings.typo_tolerance
                    .as_ref()
@ -652,6 +752,7 @@ pub async fn update_all(
            "synonyms": {
                "total": new_settings.synonyms.as_ref().set().map(|synonyms| synonyms.len()),
            },
+            "embedders": crate::routes::indexes::settings::embedder_analytics(new_settings.embedders.as_ref().set())
        }),
        Some(&req),
    );
@ -706,3 +807,13 @@ pub async fn delete_all(
    debug!("returns: {:?}", task);
    Ok(HttpResponse::Accepted().json(task))
 }
+
+fn validate_settings(
+    settings: Settings<Unchecked>,
+    index_scheduler: &IndexScheduler,
+) -> Result<Settings<Unchecked>, ResponseError> {
+    if matches!(settings.embedders, Setting::Set(_)) {
+        index_scheduler.features().check_vector("Passing `embedders` in settings")?
+    }
+    Ok(settings.validate()?)
+}
--- a/meilisearch/src/routes/metrics.rs
+++ b/meilisearch/src/routes/metrics.rs
@ -19,7 +19,7 @@ pub async fn get_metrics(
    index_scheduler: GuardedData<ActionPolicy<{ actions::METRICS_GET }>, Data<IndexScheduler>>,
    auth_controller: Data<AuthController>,
 ) -> Result<HttpResponse, ResponseError> {
-    index_scheduler.features()?.check_metrics()?;
+    index_scheduler.features().check_metrics()?;
    let auth_filters = index_scheduler.filters();
    if !auth_filters.all_indexes_authorized() {
        let mut error = ResponseError::from(AuthenticationError::InvalidToken);
--- a/meilisearch/src/routes/multi_search.rs
+++ b/meilisearch/src/routes/multi_search.rs
@ -13,6 +13,7 @@ use crate::analytics::{Analytics, MultiSearchAggregator};
 use crate::extractors::authentication::policies::ActionPolicy;
 use crate::extractors::authentication::{AuthenticationError, GuardedData};
 use crate::extractors::sequential_extractor::SeqHandler;
+use crate::routes::indexes::search::embed;
 use crate::search::{
    add_search_rules, perform_search, SearchQueryWithIndex, SearchResultWithIndex,
 };
@ -41,54 +42,56 @@ pub async fn multi_search_with_post(
    let queries = params.into_inner().queries;

    let mut multi_aggregate = MultiSearchAggregator::from_queries(&queries, &req);
-    let features = index_scheduler.features()?;
+    let features = index_scheduler.features();

    // Explicitly expect a `(ResponseError, usize)` for the error type rather than `ResponseError` only,
    // so that `?` doesn't work if it doesn't use `with_index`, ensuring that it is not forgotten in case of code
    // changes.
-    let search_results: Result<_, (ResponseError, usize)> = (|| {
-        async {
-            let mut search_results = Vec::with_capacity(queries.len());
-            for (query_index, (index_uid, mut query)) in
-                queries.into_iter().map(SearchQueryWithIndex::into_index_query).enumerate()
-            {
-                debug!("multi-search #{query_index}: called with params: {:?}", query);
+    let search_results: Result<_, (ResponseError, usize)> = async {
+        let mut search_results = Vec::with_capacity(queries.len());
+        for (query_index, (index_uid, mut query)) in
+            queries.into_iter().map(SearchQueryWithIndex::into_index_query).enumerate()
+        {
+            debug!("multi-search #{query_index}: called with params: {:?}", query);

-                // Check index from API key
-                if !index_scheduler.filters().is_index_authorized(&index_uid) {
-                    return Err(AuthenticationError::InvalidToken).with_index(query_index);
-                }
-                // Apply search rules from tenant token
-                if let Some(search_rules) =
-                    index_scheduler.filters().get_index_search_rules(&index_uid)
-                {
-                    add_search_rules(&mut query, search_rules);
-                }
-
-                let index = index_scheduler
-                    .index(&index_uid)
-                    .map_err(|err| {
-                        let mut err = ResponseError::from(err);
-                        // Patch the HTTP status code to 400 as it defaults to 404 for `index_not_found`, but
-                        // here the resource not found is not part of the URL.
-                        err.code = StatusCode::BAD_REQUEST;
-                        err
-                    })
-                    .with_index(query_index)?;
-
-                let search_result =
-                    tokio::task::spawn_blocking(move || perform_search(&index, query, features))
-                        .await
-                        .with_index(query_index)?;
-
-                search_results.push(SearchResultWithIndex {
-                    index_uid: index_uid.into_inner(),
-                    result: search_result.with_index(query_index)?,
-                });
+            // Check index from API key
+            if !index_scheduler.filters().is_index_authorized(&index_uid) {
+                return Err(AuthenticationError::InvalidToken).with_index(query_index);
            }
-            Ok(search_results)
+            // Apply search rules from tenant token
+            if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid)
+            {
+                add_search_rules(&mut query, search_rules);
+            }
+
+            let index = index_scheduler
+                .index(&index_uid)
+                .map_err(|err| {
+                    let mut err = ResponseError::from(err);
+                    // Patch the HTTP status code to 400 as it defaults to 404 for `index_not_found`, but
+                    // here the resource not found is not part of the URL.
+                    err.code = StatusCode::BAD_REQUEST;
+                    err
+                })
+                .with_index(query_index)?;
+
+            let distribution = embed(&mut query, index_scheduler.get_ref(), &index)
+                .await
+                .with_index(query_index)?;
+
+            let search_result = tokio::task::spawn_blocking(move || {
+                perform_search(&index, query, features, distribution)
+            })
+            .await
+            .with_index(query_index)?;
+
+            search_results.push(SearchResultWithIndex {
+                index_uid: index_uid.into_inner(),
+                result: search_result.with_index(query_index)?,
+            });
        }
-    })()
+        Ok(search_results)
+    }
    .await;

    if search_results.is_ok() {
--- a/meilisearch/src/routes/tasks.rs
+++ b/meilisearch/src/routes/tasks.rs
@ -8,11 +8,9 @@ use meilisearch_types::deserr::DeserrQueryParamError;
 use meilisearch_types::error::deserr_codes::*;
 use meilisearch_types::error::{InvalidTaskDateError, ResponseError};
 use meilisearch_types::index_uid::IndexUid;
-use meilisearch_types::settings::{Settings, Unchecked};
 use meilisearch_types::star_or::{OptionStarOr, OptionStarOrList};
-use meilisearch_types::tasks::{
-    serialize_duration, Details, IndexSwap, Kind, KindWithContent, Status, Task,
-};
+use meilisearch_types::task_view::TaskView;
+use meilisearch_types::tasks::{Kind, KindWithContent, Status};
 use serde::Serialize;
 use serde_json::json;
 use time::format_description::well_known::Rfc3339;
@ -37,140 +35,6 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
    .service(web::resource("/cancel").route(web::post().to(SeqHandler(cancel_tasks))))
    .service(web::resource("/{task_id}").route(web::get().to(SeqHandler(get_task))));
 }
-
-#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
-#[serde(rename_all = "camelCase")]
-pub struct TaskView {
-    pub uid: TaskId,
-    #[serde(default)]
-    pub index_uid: Option<String>,
-    pub status: Status,
-    #[serde(rename = "type")]
-    pub kind: Kind,
-    pub canceled_by: Option<TaskId>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub details: Option<DetailsView>,
-    pub error: Option<ResponseError>,
-    #[serde(serialize_with = "serialize_duration", default)]
-    pub duration: Option<Duration>,
-    #[serde(with = "time::serde::rfc3339")]
-    pub enqueued_at: OffsetDateTime,
-    #[serde(with = "time::serde::rfc3339::option", default)]
-    pub started_at: Option<OffsetDateTime>,
-    #[serde(with = "time::serde::rfc3339::option", default)]
-    pub finished_at: Option<OffsetDateTime>,
-}
-
-impl TaskView {
-    pub fn from_task(task: &Task) -> TaskView {
-        TaskView {
-            uid: task.uid,
-            index_uid: task.index_uid().map(ToOwned::to_owned),
-            status: task.status,
-            kind: task.kind.as_kind(),
-            canceled_by: task.canceled_by,
-            details: task.details.clone().map(DetailsView::from),
-            error: task.error.clone(),
-            duration: task.started_at.zip(task.finished_at).map(|(start, end)| end - start),
-            enqueued_at: task.enqueued_at,
-            started_at: task.started_at,
-            finished_at: task.finished_at,
-        }
-    }
-}
-
-#[derive(Default, Debug, PartialEq, Eq, Clone, Serialize)]
-#[serde(rename_all = "camelCase")]
-pub struct DetailsView {
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub received_documents: Option<u64>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub indexed_documents: Option<Option<u64>>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub primary_key: Option<Option<String>>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub provided_ids: Option<usize>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub deleted_documents: Option<Option<u64>>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub matched_tasks: Option<u64>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub canceled_tasks: Option<Option<u64>>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub deleted_tasks: Option<Option<u64>>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub original_filter: Option<Option<String>>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub dump_uid: Option<Option<String>>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    #[serde(flatten)]
-    pub settings: Option<Box<Settings<Unchecked>>>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub swaps: Option<Vec<IndexSwap>>,
-}
-
-impl From<Details> for DetailsView {
-    fn from(details: Details) -> Self {
-        match details {
-            Details::DocumentAdditionOrUpdate { received_documents, indexed_documents } => {
-                DetailsView {
-                    received_documents: Some(received_documents),
-                    indexed_documents: Some(indexed_documents),
-                    ..DetailsView::default()
-                }
-            }
-            Details::SettingsUpdate { settings } => {
-                DetailsView { settings: Some(settings), ..DetailsView::default() }
-            }
-            Details::IndexInfo { primary_key } => {
-                DetailsView { primary_key: Some(primary_key), ..DetailsView::default() }
-            }
-            Details::DocumentDeletion {
-                provided_ids: received_document_ids,
-                deleted_documents,
-            } => DetailsView {
-                provided_ids: Some(received_document_ids),
-                deleted_documents: Some(deleted_documents),
-                original_filter: Some(None),
-                ..DetailsView::default()
-            },
-            Details::DocumentDeletionByFilter { original_filter, deleted_documents } => {
-                DetailsView {
-                    provided_ids: Some(0),
-                    original_filter: Some(Some(original_filter)),
-                    deleted_documents: Some(deleted_documents),
-                    ..DetailsView::default()
-                }
-            }
-            Details::ClearAll { deleted_documents } => {
-                DetailsView { deleted_documents: Some(deleted_documents), ..DetailsView::default() }
-            }
-            Details::TaskCancelation { matched_tasks, canceled_tasks, original_filter } => {
-                DetailsView {
-                    matched_tasks: Some(matched_tasks),
-                    canceled_tasks: Some(canceled_tasks),
-                    original_filter: Some(Some(original_filter)),
-                    ..DetailsView::default()
-                }
-            }
-            Details::TaskDeletion { matched_tasks, deleted_tasks, original_filter } => {
-                DetailsView {
-                    matched_tasks: Some(matched_tasks),
-                    deleted_tasks: Some(deleted_tasks),
-                    original_filter: Some(Some(original_filter)),
-                    ..DetailsView::default()
-                }
-            }
-            Details::Dump { dump_uid } => {
-                DetailsView { dump_uid: Some(dump_uid), ..DetailsView::default() }
-            }
-            Details::IndexSwap { swaps } => {
-                DetailsView { swaps: Some(swaps), ..Default::default() }
-            }
-        }
-    }
-}
-
 #[derive(Debug, Deserr)]
 #[deserr(error = DeserrQueryParamError, rename_all = camelCase, deny_unknown_fields)]
 pub struct TasksFilterQuery {
--- a/meilisearch/src/search.rs
+++ b/meilisearch/src/search.rs
@ -7,24 +7,21 @@ use deserr::Deserr;
 use either::Either;
 use index_scheduler::RoFeatures;
 use indexmap::IndexMap;
-use log::warn;
 use meilisearch_auth::IndexSearchRules;
 use meilisearch_types::deserr::DeserrJsonError;
 use meilisearch_types::error::deserr_codes::*;
 use meilisearch_types::heed::RoTxn;
 use meilisearch_types::index_uid::IndexUid;
-use meilisearch_types::milli::score_details::{ScoreDetails, ScoringStrategy};
-use meilisearch_types::milli::{
-    dot_product_similarity, FacetValueHit, InternalError, OrderBy, SearchForFacetValues,
-};
+use meilisearch_types::milli::score_details::{self, ScoreDetails, ScoringStrategy};
+use meilisearch_types::milli::vector::DistributionShift;
+use meilisearch_types::milli::{FacetValueHit, OrderBy, SearchForFacetValues};
 use meilisearch_types::settings::DEFAULT_PAGINATION_MAX_TOTAL_HITS;
 use meilisearch_types::{milli, Document};
 use milli::tokenizer::TokenizerBuilder;
 use milli::{
    AscDesc, FieldId, FieldsIdsMap, Filter, FormatOptions, Index, MatchBounds, MatcherBuilder,
-    SortError, TermsMatchingStrategy, VectorOrArrayOfVectors, DEFAULT_VALUES_PER_FACET,
+    SortError, TermsMatchingStrategy, DEFAULT_VALUES_PER_FACET,
 };
-use ordered_float::OrderedFloat;
 use regex::Regex;
 use serde::Serialize;
 use serde_json::{json, Value};
@ -39,6 +36,7 @@ pub const DEFAULT_CROP_LENGTH: fn() -> usize = || 10;
 pub const DEFAULT_CROP_MARKER: fn() -> String = || "…".to_string();
 pub const DEFAULT_HIGHLIGHT_PRE_TAG: fn() -> String = || "<em>".to_string();
 pub const DEFAULT_HIGHLIGHT_POST_TAG: fn() -> String = || "</em>".to_string();
+pub const DEFAULT_SEMANTIC_RATIO: fn() -> SemanticRatio = || SemanticRatio(0.5);

 #[derive(Debug, Clone, Default, PartialEq, Deserr)]
 #[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
@ -47,6 +45,8 @@ pub struct SearchQuery {
    pub q: Option<String>,
    #[deserr(default, error = DeserrJsonError<InvalidSearchVector>)]
    pub vector: Option<Vec<f32>>,
+    #[deserr(default, error = DeserrJsonError<InvalidHybridQuery>)]
+    pub hybrid: Option<HybridQuery>,
    #[deserr(default = DEFAULT_SEARCH_OFFSET(), error = DeserrJsonError<InvalidSearchOffset>)]
    pub offset: usize,
    #[deserr(default = DEFAULT_SEARCH_LIMIT(), error = DeserrJsonError<InvalidSearchLimit>)]
@ -87,6 +87,48 @@ pub struct SearchQuery {
    pub attributes_to_search_on: Option<Vec<String>>,
 }

+#[derive(Debug, Clone, Default, PartialEq, Deserr)]
+#[deserr(error = DeserrJsonError<InvalidHybridQuery>, rename_all = camelCase, deny_unknown_fields)]
+pub struct HybridQuery {
+    /// TODO validate that sementic ratio is between 0.0 and 1,0
+    #[deserr(default, error = DeserrJsonError<InvalidSearchSemanticRatio>, default)]
+    pub semantic_ratio: SemanticRatio,
+    #[deserr(default, error = DeserrJsonError<InvalidEmbedder>, default)]
+    pub embedder: Option<String>,
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Deserr)]
+#[deserr(try_from(f32) = TryFrom::try_from -> InvalidSearchSemanticRatio)]
+pub struct SemanticRatio(f32);
+
+impl Default for SemanticRatio {
+    fn default() -> Self {
+        DEFAULT_SEMANTIC_RATIO()
+    }
+}
+
+impl std::convert::TryFrom<f32> for SemanticRatio {
+    type Error = InvalidSearchSemanticRatio;
+
+    fn try_from(f: f32) -> Result<Self, Self::Error> {
+        // the suggested "fix" is: `!(0.0..=1.0).contains(&f)`` which is allegedly less readable
+        #[allow(clippy::manual_range_contains)]
+        if f > 1.0 || f < 0.0 {
+            Err(InvalidSearchSemanticRatio)
+        } else {
+            Ok(SemanticRatio(f))
+        }
+    }
+}
+
+impl std::ops::Deref for SemanticRatio {
+    type Target = f32;
+
+    fn deref(&self) -> &Self::Target {
+        &self.0
+    }
+}
+
 impl SearchQuery {
    pub fn is_finite_pagination(&self) -> bool {
        self.page.or(self.hits_per_page).is_some()
@ -106,6 +148,8 @@ pub struct SearchQueryWithIndex {
    pub q: Option<String>,
    #[deserr(default, error = DeserrJsonError<InvalidSearchQ>)]
    pub vector: Option<Vec<f32>>,
+    #[deserr(default, error = DeserrJsonError<InvalidHybridQuery>)]
+    pub hybrid: Option<HybridQuery>,
    #[deserr(default = DEFAULT_SEARCH_OFFSET(), error = DeserrJsonError<InvalidSearchOffset>)]
    pub offset: usize,
    #[deserr(default = DEFAULT_SEARCH_LIMIT(), error = DeserrJsonError<InvalidSearchLimit>)]
@ -171,6 +215,7 @@ impl SearchQueryWithIndex {
            crop_marker,
            matching_strategy,
            attributes_to_search_on,
+            hybrid,
        } = self;
        (
            index_uid,
@ -196,6 +241,7 @@ impl SearchQueryWithIndex {
                crop_marker,
                matching_strategy,
                attributes_to_search_on,
+                hybrid,
                // do not use ..Default::default() here,
                // rather add any missing field from `SearchQuery` to `SearchQueryWithIndex`
            },
@ -335,19 +381,44 @@ fn prepare_search<'t>(
    rtxn: &'t RoTxn,
    query: &'t SearchQuery,
    features: RoFeatures,
+    distribution: Option<DistributionShift>,
 ) -> Result<(milli::Search<'t>, bool, usize, usize), MeilisearchHttpError> {
    let mut search = index.search(rtxn);

-    if query.vector.is_some() && query.q.is_some() {
-        warn!("Ignoring the query string `q` when used with the `vector` parameter.");
+    if query.vector.is_some() {
+        features.check_vector("Passing `vector` as a query parameter")?;
    }

+    if query.hybrid.is_some() {
+        features.check_vector("Passing `hybrid` as a query parameter")?;
+    }
+
+    if query.hybrid.is_none() && query.q.is_some() && query.vector.is_some() {
+        return Err(MeilisearchHttpError::MissingSearchHybrid);
+    }
+
+    search.distribution_shift(distribution);
+
    if let Some(ref vector) = query.vector {
-        search.vector(vector.clone());
+        match &query.hybrid {
+            // If semantic ratio is 0.0, only the query search will impact the search results,
+            // skip the vector
+            Some(hybrid) if *hybrid.semantic_ratio == 0.0 => (),
+            _otherwise => {
+                search.vector(vector.clone());
+            }
+        }
    }

-    if let Some(ref query) = query.q {
-        search.query(query);
+    if let Some(ref q) = query.q {
+        match &query.hybrid {
+            // If semantic ratio is 1.0, only the vector search will impact the search results,
+            // skip the query
+            Some(hybrid) if *hybrid.semantic_ratio == 1.0 => (),
+            _otherwise => {
+                search.query(q);
+            }
+        }
    }

    if let Some(ref searchable) = query.attributes_to_search_on {
@ -360,6 +431,7 @@ fn prepare_search<'t>(
    let max_total_hits = index
        .pagination_max_total_hits(rtxn)
        .map_err(milli::Error::from)?
+        .map(|x| x as usize)
        .unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS);

    search.exhaustive_number_hits(is_finite_pagination);
@ -373,8 +445,8 @@ fn prepare_search<'t>(
        features.check_score_details()?;
    }

-    if query.vector.is_some() {
-        features.check_vector()?;
+    if let Some(HybridQuery { embedder: Some(embedder), .. }) = &query.hybrid {
+        search.embedder_name(embedder);
    }

    // compute the offset on the limit depending on the pagination mode.
@ -420,15 +492,22 @@ pub fn perform_search(
    index: &Index,
    query: SearchQuery,
    features: RoFeatures,
+    distribution: Option<DistributionShift>,
 ) -> Result<SearchResult, MeilisearchHttpError> {
    let before_search = Instant::now();
    let rtxn = index.read_txn()?;

    let (search, is_finite_pagination, max_total_hits, offset) =
-        prepare_search(index, &rtxn, &query, features)?;
+        prepare_search(index, &rtxn, &query, features, distribution)?;

    let milli::SearchResult { documents_ids, matching_words, candidates, document_scores, .. } =
-        search.execute()?;
+        match &query.hybrid {
+            Some(hybrid) => match *hybrid.semantic_ratio {
+                ratio if ratio == 0.0 || ratio == 1.0 => search.execute()?,
+                ratio => search.execute_hybrid(ratio)?,
+            },
+            None => search.execute()?,
+        };

    let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();

@ -537,13 +616,17 @@ pub fn perform_search(
            insert_geo_distance(sort, &mut document);
        }

-        let semantic_score = match query.vector.as_ref() {
-            Some(vector) => match extract_field("_vectors", &fields_ids_map, obkv)? {
-                Some(vectors) => compute_semantic_score(vector, vectors)?,
-                None => None,
-            },
-            None => None,
-        };
+        let mut semantic_score = None;
+        for details in &score {
+            if let ScoreDetails::Vector(score_details::Vector {
+                target_vector: _,
+                value_similarity: Some((_matching_vector, similarity)),
+            }) = details
+            {
+                semantic_score = Some(*similarity);
+                break;
+            }
+        }

        let ranking_score =
            query.show_ranking_score.then(|| ScoreDetails::global_score(score.iter()));
@ -586,6 +669,7 @@ pub fn perform_search(
            let max_values_by_facet = index
                .max_values_per_facet(&rtxn)
                .map_err(milli::Error::from)?
+                .map(|x| x as usize)
                .unwrap_or(DEFAULT_VALUES_PER_FACET);
            facet_distribution.max_values_per_facet(max_values_by_facet);

@ -645,11 +729,15 @@ pub fn perform_facet_search(
    let before_search = Instant::now();
    let rtxn = index.read_txn()?;

-    let (search, _, _, _) = prepare_search(index, &rtxn, &search_query, features)?;
-    let mut facet_search = SearchForFacetValues::new(facet_name, search);
+    let (search, _, _, _) = prepare_search(index, &rtxn, &search_query, features, None)?;
+    let mut facet_search =
+        SearchForFacetValues::new(facet_name, search, search_query.hybrid.is_some());
    if let Some(facet_query) = &facet_query {
        facet_search.query(facet_query);
    }
+    if let Some(max_facets) = index.max_values_per_facet(&rtxn)? {
+        facet_search.max_values(max_facets as usize);
+    }

    Ok(FacetSearchResult {
        facet_hits: facet_search.execute()?,
@ -674,18 +762,6 @@ fn insert_geo_distance(sorts: &[String], document: &mut Document) {
    }
 }

-fn compute_semantic_score(query: &[f32], vectors: Value) -> milli::Result<Option<f32>> {
-    let vectors = serde_json::from_value(vectors)
-        .map(VectorOrArrayOfVectors::into_array_of_vectors)
-        .map_err(InternalError::SerdeJson)?;
-    Ok(vectors
-        .into_iter()
-        .flatten()
-        .map(|v| OrderedFloat(dot_product_similarity(query, &v)))
-        .max()
-        .map(OrderedFloat::into_inner))
-}
-
 fn compute_formatted_options(
    attr_to_highlight: &HashSet<String>,
    attr_to_crop: &[String],
@ -813,22 +889,6 @@ fn make_document(
    Ok(document)
 }

-/// Extract the JSON value under the field name specified
-/// but doesn't support nested objects.
-fn extract_field(
-    field_name: &str,
-    field_ids_map: &FieldsIdsMap,
-    obkv: obkv::KvReaderU16,
-) -> Result<Option<serde_json::Value>, MeilisearchHttpError> {
-    match field_ids_map.id(field_name) {
-        Some(fid) => match obkv.get(fid) {
-            Some(value) => Ok(serde_json::from_slice(value).map(Some)?),
-            None => Ok(None),
-        },
-        None => Ok(None),
-    }
-}
-
 fn format_fields<'a>(
    document: &Document,
    field_ids_map: &FieldsIdsMap,
@ -840,6 +900,14 @@ fn format_fields<'a>(
    let mut matches_position = compute_matches.then(BTreeMap::new);
    let mut document = document.clone();

+    // reduce the formatted option list to the attributes that should be formatted,
+    // instead of all the attributes to display.
+    let formatting_fields_options: Vec<_> = formatted_options
+        .iter()
+        .filter(|(_, option)| option.should_format())
+        .map(|(fid, option)| (field_ids_map.name(*fid).unwrap(), option))
+        .collect();
+
    // select the attributes to retrieve
    let displayable_names =
        displayable_ids.iter().map(|&fid| field_ids_map.name(fid).expect("Missing field name"));
@ -848,13 +916,15 @@ fn format_fields<'a>(
        // to the value and merge them together. eg. If a user said he wanted to highlight `doggo`
        // and crop `doggo.name`. `doggo.name` needs to be highlighted + cropped while `doggo.age` is only
        // highlighted.
-        let format = formatted_options
+        // Warn: The time to compute the format list scales with the number of fields to format;
+        // cumulated with map_leaf_values that iterates over all the nested fields, it gives a quadratic complexity:
+        // d*f where d is the total number of fields to display and f is the total number of fields to format.
+        let format = formatting_fields_options
            .iter()
-            .filter(|(field, _option)| {
-                let name = field_ids_map.name(**field).unwrap();
+            .filter(|(name, _option)| {
                milli::is_faceted_by(name, key) || milli::is_faceted_by(key, name)
            })
-            .map(|(_, option)| *option)
+            .map(|(_, option)| **option)
            .reduce(|acc, option| acc.merge(option));
        let mut infos = Vec::new();

@ -951,7 +1021,7 @@ fn format_value<'a>(
                    let value = matcher.format(format_options);
                    Value::String(value.into_owned())
                }
-                None => Value::Number(number),
+                None => Value::String(s),
            }
        }
        value => value,
--- a/meilisearch/tests/assets/v6_v1.6.0_use_deactivated_experimental_setting.dump
+++ b/meilisearch/tests/assets/v6_v1.6.0_use_deactivated_experimental_setting.dump
--- a/meilisearch/tests/auth/authorization.rs
+++ b/meilisearch/tests/auth/authorization.rs
@ -2,10 +2,12 @@ use std::collections::{HashMap, HashSet};

 use ::time::format_description::well_known::Rfc3339;
 use maplit::{hashmap, hashset};
+use meilisearch::Opt;
 use once_cell::sync::Lazy;
+use tempfile::TempDir;
 use time::{Duration, OffsetDateTime};

-use crate::common::{Server, Value};
+use crate::common::{default_settings, Server, Value};
 use crate::json;

 pub static AUTHORIZATIONS: Lazy<HashMap<(&'static str, &'static str), HashSet<&'static str>>> =
@ -195,7 +197,9 @@ async fn access_authorized_master_key() {

 #[actix_rt::test]
 async fn access_authorized_restricted_index() {
-    let mut server = Server::new_auth().await;
+    let dir = TempDir::new().unwrap();
+    let enable_metrics = Opt { experimental_enable_metrics: true, ..default_settings(dir.path()) };
+    let mut server = Server::new_auth_with_options(enable_metrics, dir).await;
    for ((method, route), actions) in AUTHORIZATIONS.iter() {
        for action in actions {
            // create a new API key letting only the needed action.
--- a/meilisearch/tests/common/mod.rs
+++ b/meilisearch/tests/common/mod.rs
@ -5,9 +5,11 @@ pub mod service;

 use std::fmt::{self, Display};

+#[allow(unused)]
 pub use index::{GetAllDocumentsOptions, GetDocumentOptions};
 use meili_snap::json_string;
 use serde::{Deserialize, Serialize};
+#[allow(unused)]
 pub use server::{default_settings, Server};

 #[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)]
@ -62,7 +64,7 @@ impl Display for Value {
        write!(
            f,
            "{}",
-            json_string!(self, { ".enqueuedAt" => "[date]", ".processedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]" })
+            json_string!(self, { ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]" })
        )
    }
 }
--- a/meilisearch/tests/common/server.rs
+++ b/meilisearch/tests/common/server.rs
@ -202,6 +202,10 @@ impl Server {
    pub async fn set_features(&self, value: Value) -> (Value, StatusCode) {
        self.service.patch("/experimental-features", value).await
    }
+
+    pub async fn get_metrics(&self) -> (Value, StatusCode) {
+        self.service.get("/metrics").await
+    }
 }

 pub fn default_settings(dir: impl AsRef<Path>) -> Opt {
@ -221,7 +225,7 @@ pub fn default_settings(dir: impl AsRef<Path>) -> Opt {
            skip_index_budget: true,
            ..Parser::parse_from(None as Option<&str>)
        },
-        experimental_enable_metrics: true,
+        experimental_enable_metrics: false,
        ..Parser::parse_from(None as Option<&str>)
    }
 }
--- a/meilisearch/tests/documents/add_documents.rs
+++ b/meilisearch/tests/documents/add_documents.rs
@ -1760,6 +1760,181 @@ async fn add_documents_invalid_geo_field() {
      "finishedAt": "[date]"
    }
    "###);
+
+    // The three next tests are related to #4333
+
+    // _geo has a lat and lng but set to `null`
+    let documents = json!([
+        {
+            "id": "12",
+            "_geo": { "lng": null, "lat": 67}
+        }
+    ]);
+
+    let (response, code) = index.add_documents(documents, None).await;
+    snapshot!(code, @"202 Accepted");
+    let response = index.wait_task(response.uid()).await;
+    snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }),
+        @r###"
+    {
+      "uid": 14,
+      "indexUid": "test",
+      "status": "failed",
+      "type": "documentAdditionOrUpdate",
+      "canceledBy": null,
+      "details": {
+        "receivedDocuments": 1,
+        "indexedDocuments": 0
+      },
+      "error": {
+        "message": "Could not parse longitude in the document with the id: `12`. Was expecting a finite number but instead got `null`.",
+        "code": "invalid_document_geo_field",
+        "type": "invalid_request",
+        "link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
+      },
+      "duration": "[duration]",
+      "enqueuedAt": "[date]",
+      "startedAt": "[date]",
+      "finishedAt": "[date]"
+    }
+    "###);
+
+    // _geo has a lat and lng but set to `null`
+    let documents = json!([
+        {
+            "id": "12",
+            "_geo": { "lng": 35, "lat": null }
+        }
+    ]);
+
+    let (response, code) = index.add_documents(documents, None).await;
+    snapshot!(code, @"202 Accepted");
+    let response = index.wait_task(response.uid()).await;
+    snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }),
+        @r###"
+    {
+      "uid": 15,
+      "indexUid": "test",
+      "status": "failed",
+      "type": "documentAdditionOrUpdate",
+      "canceledBy": null,
+      "details": {
+        "receivedDocuments": 1,
+        "indexedDocuments": 0
+      },
+      "error": {
+        "message": "Could not parse latitude in the document with the id: `12`. Was expecting a finite number but instead got `null`.",
+        "code": "invalid_document_geo_field",
+        "type": "invalid_request",
+        "link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
+      },
+      "duration": "[duration]",
+      "enqueuedAt": "[date]",
+      "startedAt": "[date]",
+      "finishedAt": "[date]"
+    }
+    "###);
+
+    // _geo has a lat and lng but set to `null`
+    let documents = json!([
+        {
+            "id": "13",
+            "_geo": { "lng": null, "lat": null }
+        }
+    ]);
+
+    let (response, code) = index.add_documents(documents, None).await;
+    snapshot!(code, @"202 Accepted");
+    let response = index.wait_task(response.uid()).await;
+    snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }),
+        @r###"
+    {
+      "uid": 16,
+      "indexUid": "test",
+      "status": "failed",
+      "type": "documentAdditionOrUpdate",
+      "canceledBy": null,
+      "details": {
+        "receivedDocuments": 1,
+        "indexedDocuments": 0
+      },
+      "error": {
+        "message": "Could not parse latitude nor longitude in the document with the id: `13`. Was expecting finite numbers but instead got `null` and `null`.",
+        "code": "invalid_document_geo_field",
+        "type": "invalid_request",
+        "link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
+      },
+      "duration": "[duration]",
+      "enqueuedAt": "[date]",
+      "startedAt": "[date]",
+      "finishedAt": "[date]"
+    }
+    "###);
+}
+
+// Related to #4333
+#[actix_rt::test]
+async fn add_invalid_geo_and_then_settings() {
+    let server = Server::new().await;
+    let index = server.index("test");
+    index.create(Some("id")).await;
+
+    // _geo is not an object
+    let documents = json!([
+        {
+            "id": "11",
+            "_geo": { "lat": null, "lng": null },
+        }
+    ]);
+    let (ret, code) = index.add_documents(documents, None).await;
+    snapshot!(code, @"202 Accepted");
+    let ret = index.wait_task(ret.uid()).await;
+    snapshot!(ret, @r###"
+    {
+      "uid": 1,
+      "indexUid": "test",
+      "status": "succeeded",
+      "type": "documentAdditionOrUpdate",
+      "canceledBy": null,
+      "details": {
+        "receivedDocuments": 1,
+        "indexedDocuments": 1
+      },
+      "error": null,
+      "duration": "[duration]",
+      "enqueuedAt": "[date]",
+      "startedAt": "[date]",
+      "finishedAt": "[date]"
+    }
+    "###);
+
+    let (ret, code) = index.update_settings(json!({"sortableAttributes": ["_geo"]})).await;
+    snapshot!(code, @"202 Accepted");
+    let ret = index.wait_task(ret.uid()).await;
+    snapshot!(ret, @r###"
+    {
+      "uid": 2,
+      "indexUid": "test",
+      "status": "failed",
+      "type": "settingsUpdate",
+      "canceledBy": null,
+      "details": {
+        "sortableAttributes": [
+          "_geo"
+        ]
+      },
+      "error": {
+        "message": "Could not parse latitude in the document with the id: `\"11\"`. Was expecting a finite number but instead got `null`.",
+        "code": "invalid_document_geo_field",
+        "type": "invalid_request",
+        "link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
+      },
+      "duration": "[duration]",
+      "enqueuedAt": "[date]",
+      "startedAt": "[date]",
+      "finishedAt": "[date]"
+    }
+    "###);
 }

 #[actix_rt::test]
--- a/meilisearch/tests/documents/delete_documents.rs
+++ b/meilisearch/tests/documents/delete_documents.rs
@ -397,7 +397,7 @@ async fn delete_document_by_complex_filter() {
      "canceledBy": null,
      "details": {
        "providedIds": 0,
-        "deletedDocuments": 4,
+        "deletedDocuments": 2,
        "originalFilter": "[[\"color = green\",\"color NOT EXISTS\"]]"
      },
      "error": null,
--- a/meilisearch/tests/dumps/data.rs
+++ b/meilisearch/tests/dumps/data.rs
@ -20,6 +20,8 @@ pub enum GetDump {
    RubyGemsWithSettingsV4,

    TestV5,
+
+    TestV6WithExperimental,
 }

 impl GetDump {
@ -68,6 +70,10 @@ impl GetDump {
            GetDump::TestV5 => {
                exist_relative_path!("tests/assets/v5_v0.28.0_test_dump.dump").into()
            }
+            GetDump::TestV6WithExperimental => exist_relative_path!(
+                "tests/assets/v6_v1.6.0_use_deactivated_experimental_setting.dump"
+            )
+            .into(),
        }
    }
 }
--- a/meilisearch/tests/dumps/mod.rs
+++ b/meilisearch/tests/dumps/mod.rs
@ -59,6 +59,7 @@ async fn import_dump_v1_movie_raw() {
      "dictionary": [],
      "synonyms": {},
      "distinctAttribute": null,
+      "proximityPrecision": "byWord",
      "typoTolerance": {
        "enabled": true,
        "minWordSizeForTypos": {
@ -219,6 +220,7 @@ async fn import_dump_v1_movie_with_settings() {
      "dictionary": [],
      "synonyms": {},
      "distinctAttribute": null,
+      "proximityPrecision": "byWord",
      "typoTolerance": {
        "enabled": true,
        "minWordSizeForTypos": {
@ -365,6 +367,7 @@ async fn import_dump_v1_rubygems_with_settings() {
      "dictionary": [],
      "synonyms": {},
      "distinctAttribute": null,
+      "proximityPrecision": "byWord",
      "typoTolerance": {
        "enabled": true,
        "minWordSizeForTypos": {
@ -497,6 +500,7 @@ async fn import_dump_v2_movie_raw() {
      "dictionary": [],
      "synonyms": {},
      "distinctAttribute": null,
+      "proximityPrecision": "byWord",
      "typoTolerance": {
        "enabled": true,
        "minWordSizeForTypos": {
@ -641,6 +645,7 @@ async fn import_dump_v2_movie_with_settings() {
      "dictionary": [],
      "synonyms": {},
      "distinctAttribute": null,
+      "proximityPrecision": "byWord",
      "typoTolerance": {
        "enabled": true,
        "minWordSizeForTypos": {
@ -784,6 +789,7 @@ async fn import_dump_v2_rubygems_with_settings() {
      "dictionary": [],
      "synonyms": {},
      "distinctAttribute": null,
+      "proximityPrecision": "byWord",
      "typoTolerance": {
        "enabled": true,
        "minWordSizeForTypos": {
@ -916,6 +922,7 @@ async fn import_dump_v3_movie_raw() {
      "dictionary": [],
      "synonyms": {},
      "distinctAttribute": null,
+      "proximityPrecision": "byWord",
      "typoTolerance": {
        "enabled": true,
        "minWordSizeForTypos": {
@ -1060,6 +1067,7 @@ async fn import_dump_v3_movie_with_settings() {
      "dictionary": [],
      "synonyms": {},
      "distinctAttribute": null,
+      "proximityPrecision": "byWord",
      "typoTolerance": {
        "enabled": true,
        "minWordSizeForTypos": {
@ -1203,6 +1211,7 @@ async fn import_dump_v3_rubygems_with_settings() {
      "dictionary": [],
      "synonyms": {},
      "distinctAttribute": null,
+      "proximityPrecision": "byWord",
      "typoTolerance": {
        "enabled": true,
        "minWordSizeForTypos": {
@ -1335,6 +1344,7 @@ async fn import_dump_v4_movie_raw() {
      "dictionary": [],
      "synonyms": {},
      "distinctAttribute": null,
+      "proximityPrecision": "byWord",
      "typoTolerance": {
        "enabled": true,
        "minWordSizeForTypos": {
@ -1479,6 +1489,7 @@ async fn import_dump_v4_movie_with_settings() {
      "dictionary": [],
      "synonyms": {},
      "distinctAttribute": null,
+      "proximityPrecision": "byWord",
      "typoTolerance": {
        "enabled": true,
        "minWordSizeForTypos": {
@ -1622,6 +1633,7 @@ async fn import_dump_v4_rubygems_with_settings() {
      "dictionary": [],
      "synonyms": {},
      "distinctAttribute": null,
+      "proximityPrecision": "byWord",
      "typoTolerance": {
        "enabled": true,
        "minWordSizeForTypos": {
@ -1810,3 +1822,107 @@ async fn import_dump_v5() {
        json_string!(tasks, { ".results[].details.dumpUid" => "[uid]",  ".results[].duration" => "[duration]" ,  ".results[].startedAt" => "[date]" ,  ".results[].finishedAt" => "[date]"  })
    );
 }
+
+#[actix_rt::test]
+async fn import_dump_v6_containing_experimental_features() {
+    let temp = tempfile::tempdir().unwrap();
+
+    let options = Opt {
+        import_dump: Some(GetDump::TestV6WithExperimental.path()),
+        ..default_settings(temp.path())
+    };
+    let mut server = Server::new_auth_with_options(options, temp).await;
+    server.use_api_key("MASTER_KEY");
+
+    let (indexes, code) = server.list_indexes(None, None).await;
+    assert_eq!(code, 200, "{indexes}");
+
+    assert_eq!(indexes["results"].as_array().unwrap().len(), 1);
+    assert_eq!(indexes["results"][0]["uid"], json!("movies"));
+    assert_eq!(indexes["results"][0]["primaryKey"], json!("id"));
+
+    let (response, code) = server.get_features().await;
+    meili_snap::snapshot!(code, @"200 OK");
+    meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
+    {
+      "scoreDetails": false,
+      "vectorStore": false,
+      "metrics": false,
+      "exportPuffinReports": false
+    }
+    "###);
+
+    let index = server.index("movies");
+
+    let (response, code) = index.settings().await;
+    meili_snap::snapshot!(code, @"200 OK");
+    meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
+    {
+      "displayedAttributes": [
+        "*"
+      ],
+      "searchableAttributes": [
+        "*"
+      ],
+      "filterableAttributes": [],
+      "sortableAttributes": [],
+      "rankingRules": [
+        "words",
+        "typo",
+        "proximity"
+      ],
+      "stopWords": [],
+      "nonSeparatorTokens": [],
+      "separatorTokens": [],
+      "dictionary": [],
+      "synonyms": {},
+      "distinctAttribute": null,
+      "proximityPrecision": "byAttribute",
+      "typoTolerance": {
+        "enabled": true,
+        "minWordSizeForTypos": {
+          "oneTypo": 5,
+          "twoTypos": 9
+        },
+        "disableOnWords": [],
+        "disableOnAttributes": []
+      },
+      "faceting": {
+        "maxValuesPerFacet": 100,
+        "sortFacetValuesBy": {
+          "*": "alpha"
+        }
+      },
+      "pagination": {
+        "maxTotalHits": 1000
+      }
+    }
+    "###);
+
+    // the expected order is [1, 3, 2] instead of [3, 1, 2]
+    // because the attribute scale doesn't make the difference between 1 and 3.
+    index
+        .search(json!({"q": "the soup of day"}), |response, code| {
+            snapshot!(code, @"200 OK");
+            snapshot!(json_string!(response["hits"]), @r###"
+            [
+              {
+                "id": 1,
+                "a": "Soup of the day",
+                "b": "many the fish"
+              },
+              {
+                "id": 3,
+                "a": "the Soup of day",
+                "b": "many the fish"
+              },
+              {
+                "id": 2,
+                "a": "Soup of day",
+                "b": "many the lazy fish"
+              }
+            ]
+            "###);
+        })
+        .await;
+}
--- a/meilisearch/tests/features/mod.rs
+++ b/meilisearch/tests/features/mod.rs
@ -1,4 +1,7 @@
-use crate::common::Server;
+use meilisearch::Opt;
+use tempfile::TempDir;
+
+use crate::common::{default_settings, Server};
 use crate::json;

 /// Feature name to test against.
@ -16,7 +19,9 @@ async fn experimental_features() {
    meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
    {
      "scoreDetails": false,
-      "vectorStore": false
+      "vectorStore": false,
+      "metrics": false,
+      "exportPuffinReports": false
    }
    "###);

@ -26,7 +31,9 @@ async fn experimental_features() {
    meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
    {
      "scoreDetails": false,
-      "vectorStore": true
+      "vectorStore": true,
+      "metrics": false,
+      "exportPuffinReports": false
    }
    "###);

@ -36,7 +43,9 @@ async fn experimental_features() {
    meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
    {
      "scoreDetails": false,
-      "vectorStore": true
+      "vectorStore": true,
+      "metrics": false,
+      "exportPuffinReports": false
    }
    "###);

@ -47,7 +56,9 @@ async fn experimental_features() {
    meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
    {
      "scoreDetails": false,
-      "vectorStore": true
+      "vectorStore": true,
+      "metrics": false,
+      "exportPuffinReports": false
    }
    "###);

@ -58,11 +69,73 @@ async fn experimental_features() {
    meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
    {
      "scoreDetails": false,
-      "vectorStore": true
+      "vectorStore": true,
+      "metrics": false,
+      "exportPuffinReports": false
    }
    "###);
 }

+#[actix_rt::test]
+async fn experimental_feature_metrics() {
+    // instance flag for metrics enables metrics at startup
+    let dir = TempDir::new().unwrap();
+    let enable_metrics = Opt { experimental_enable_metrics: true, ..default_settings(dir.path()) };
+    let server = Server::new_with_options(enable_metrics).await.unwrap();
+
+    let (response, code) = server.get_features().await;
+
+    meili_snap::snapshot!(code, @"200 OK");
+    meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
+    {
+      "scoreDetails": false,
+      "vectorStore": false,
+      "metrics": true,
+      "exportPuffinReports": false
+    }
+    "###);
+
+    let (response, code) = server.get_metrics().await;
+    meili_snap::snapshot!(code, @"200 OK");
+
+    // metrics are not returned in json format
+    // so the test server will return null
+    meili_snap::snapshot!(response, @"null");
+
+    // disabling metrics results in invalid request
+    let (response, code) = server.set_features(json!({"metrics": false})).await;
+    meili_snap::snapshot!(code, @"200 OK");
+    meili_snap::snapshot!(response["metrics"], @"false");
+
+    let (response, code) = server.get_metrics().await;
+    meili_snap::snapshot!(code, @"400 Bad Request");
+    meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
+    {
+      "message": "Getting metrics requires enabling the `metrics` experimental feature. See https://github.com/meilisearch/product/discussions/625",
+      "code": "feature_not_enabled",
+      "type": "invalid_request",
+      "link": "https://docs.meilisearch.com/errors#feature_not_enabled"
+    }
+    "###);
+
+    // enabling metrics via HTTP results in valid request
+    let (response, code) = server.set_features(json!({"metrics": true})).await;
+    meili_snap::snapshot!(code, @"200 OK");
+    meili_snap::snapshot!(response["metrics"], @"true");
+
+    let (response, code) = server.get_metrics().await;
+    meili_snap::snapshot!(code, @"200 OK");
+    meili_snap::snapshot!(response, @"null");
+
+    // startup without flag respects persisted metrics value
+    let disable_metrics =
+        Opt { experimental_enable_metrics: false, ..default_settings(dir.path()) };
+    let server_no_flag = Server::new_with_options(disable_metrics).await.unwrap();
+    let (response, code) = server_no_flag.get_metrics().await;
+    meili_snap::snapshot!(code, @"200 OK");
+    meili_snap::snapshot!(response, @"null");
+}
+
 #[actix_rt::test]
 async fn errors() {
    let server = Server::new().await;
@ -73,7 +146,7 @@ async fn errors() {
    meili_snap::snapshot!(code, @"400 Bad Request");
    meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
    {
-      "message": "Unknown field `NotAFeature`: expected one of `scoreDetails`, `vectorStore`",
+      "message": "Unknown field `NotAFeature`: expected one of `scoreDetails`, `vectorStore`, `metrics`, `exportPuffinReports`",
      "code": "bad_request",
      "type": "invalid_request",
      "link": "https://docs.meilisearch.com/errors#bad_request"
--- a/meilisearch/tests/search/distinct.rs
+++ b/meilisearch/tests/search/distinct.rs
@ -0,0 +1,241 @@
+use meili_snap::snapshot;
+use once_cell::sync::Lazy;
+
+use crate::common::{Server, Value};
+use crate::json;
+
+static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
+    json!([
+      {
+        "id": 1,
+        "description": "Leather Jacket",
+        "brand": "Lee Jeans",
+        "product_id": "123456",
+        "color": "Brown"
+      },
+      {
+        "id": 2,
+        "description": "Leather Jacket",
+        "brand": "Lee Jeans",
+        "product_id": "123456",
+        "color": "Black"
+      },
+      {
+        "id": 3,
+        "description": "Leather Jacket",
+        "brand": "Lee Jeans",
+        "product_id": "123456",
+        "color": "Blue"
+      },
+      {
+        "id": 4,
+        "description": "T-Shirt",
+        "brand": "Nike",
+        "product_id": "789012",
+        "color": "Red"
+      },
+      {
+        "id": 5,
+        "description": "T-Shirt",
+        "brand": "Nike",
+        "product_id": "789012",
+        "color": "Blue"
+      },
+      {
+        "id": 6,
+        "description": "Running Shoes",
+        "brand": "Adidas",
+        "product_id": "456789",
+        "color": "Black"
+      },
+      {
+        "id": 7,
+        "description": "Running Shoes",
+        "brand": "Adidas",
+        "product_id": "456789",
+        "color": "White"
+      },
+      {
+        "id": 8,
+        "description": "Hoodie",
+        "brand": "Puma",
+        "product_id": "987654",
+        "color": "Gray"
+      },
+      {
+        "id": 9,
+        "description": "Sweater",
+        "brand": "Gap",
+        "product_id": "234567",
+        "color": "Green"
+      },
+      {
+        "id": 10,
+        "description": "Sweater",
+        "brand": "Gap",
+        "product_id": "234567",
+        "color": "Red"
+      },
+      {
+        "id": 11,
+        "description": "Sweater",
+        "brand": "Gap",
+        "product_id": "234567",
+        "color": "Blue"
+      },
+      {
+        "id": 12,
+        "description": "Jeans",
+        "brand": "Levi's",
+        "product_id": "345678",
+        "color": "Indigo"
+      },
+      {
+        "id": 13,
+        "description": "Jeans",
+        "brand": "Levi's",
+        "product_id": "345678",
+        "color": "Black"
+      },
+      {
+        "id": 14,
+        "description": "Jeans",
+        "brand": "Levi's",
+        "product_id": "345678",
+        "color": "Stone Wash"
+      }
+    ])
+});
+
+static DOCUMENT_PRIMARY_KEY: &str = "id";
+static DOCUMENT_DISTINCT_KEY: &str = "product_id";
+
+/// testing: https://github.com/meilisearch/meilisearch/issues/4078
+#[actix_rt::test]
+async fn distinct_search_with_offset_no_ranking() {
+    let server = Server::new().await;
+    let index = server.index("test");
+
+    let documents = DOCUMENTS.clone();
+    index.add_documents(documents, Some(DOCUMENT_PRIMARY_KEY)).await;
+    index.update_distinct_attribute(json!(DOCUMENT_DISTINCT_KEY)).await;
+    index.wait_task(1).await;
+
+    fn get_hits(response: &Value) -> Vec<&str> {
+        let hits_array = response["hits"].as_array().unwrap();
+        hits_array.iter().map(|h| h[DOCUMENT_DISTINCT_KEY].as_str().unwrap()).collect::<Vec<_>>()
+    }
+
+    let (response, code) = index.search_post(json!({"offset": 0, "limit": 2})).await;
+    let hits = get_hits(&response);
+    snapshot!(code, @"200 OK");
+    snapshot!(hits.len(), @"2");
+    snapshot!(format!("{:?}", hits), @r#"["123456", "789012"]"#);
+    snapshot!(response["estimatedTotalHits"] , @"11");
+
+    let (response, code) = index.search_post(json!({"offset": 2, "limit": 2})).await;
+    let hits = get_hits(&response);
+    snapshot!(code, @"200 OK");
+    snapshot!(hits.len(), @"2");
+    snapshot!(format!("{:?}", hits), @r#"["456789", "987654"]"#);
+    snapshot!(response["estimatedTotalHits"], @"10");
+
+    let (response, code) = index.search_post(json!({"offset": 4, "limit": 2})).await;
+    let hits = get_hits(&response);
+    snapshot!(code, @"200 OK");
+    snapshot!(hits.len(), @"2");
+    snapshot!(format!("{:?}", hits), @r#"["234567", "345678"]"#);
+    snapshot!(response["estimatedTotalHits"], @"6");
+
+    let (response, code) = index.search_post(json!({"offset": 5, "limit": 2})).await;
+    let hits = get_hits(&response);
+    snapshot!(code, @"200 OK");
+    snapshot!(hits.len(), @"1");
+    snapshot!(format!("{:?}", hits), @r#"["345678"]"#);
+    snapshot!(response["estimatedTotalHits"], @"6");
+
+    let (response, code) = index.search_post(json!({"offset": 6, "limit": 2})).await;
+    let hits = get_hits(&response);
+    snapshot!(code, @"200 OK");
+    snapshot!(hits.len(), @"0");
+    snapshot!(format!("{:?}", hits), @r#"[]"#);
+    snapshot!(response["estimatedTotalHits"], @"6");
+
+    let (response, code) = index.search_post(json!({"offset": 7, "limit": 2})).await;
+    let hits = get_hits(&response);
+    snapshot!(code, @"200 OK");
+    snapshot!(hits.len(), @"0");
+    snapshot!(format!("{:?}", hits), @r#"[]"#);
+    snapshot!(response["estimatedTotalHits"], @"6");
+}
+
+/// testing: https://github.com/meilisearch/meilisearch/issues/4130
+#[actix_rt::test]
+async fn distinct_search_with_pagination_no_ranking() {
+    let server = Server::new().await;
+    let index = server.index("test");
+
+    let documents = DOCUMENTS.clone();
+    index.add_documents(documents, Some(DOCUMENT_PRIMARY_KEY)).await;
+    index.update_distinct_attribute(json!(DOCUMENT_DISTINCT_KEY)).await;
+    index.wait_task(1).await;
+
+    fn get_hits(response: &Value) -> Vec<&str> {
+        let hits_array = response["hits"].as_array().unwrap();
+        hits_array.iter().map(|h| h[DOCUMENT_DISTINCT_KEY].as_str().unwrap()).collect::<Vec<_>>()
+    }
+
+    let (response, code) = index.search_post(json!({"page": 0, "hitsPerPage": 2})).await;
+    let hits = get_hits(&response);
+    snapshot!(code, @"200 OK");
+    snapshot!(hits.len(), @"0");
+    snapshot!(format!("{:?}", hits), @r#"[]"#);
+    snapshot!(response["page"], @"0");
+    snapshot!(response["totalPages"], @"3");
+    snapshot!(response["totalHits"], @"6");
+
+    let (response, code) = index.search_post(json!({"page": 1, "hitsPerPage": 2})).await;
+    let hits = get_hits(&response);
+    snapshot!(code, @"200 OK");
+    snapshot!(hits.len(), @"2");
+    snapshot!(format!("{:?}", hits), @r#"["123456", "789012"]"#);
+    snapshot!(response["page"], @"1");
+    snapshot!(response["totalPages"], @"3");
+    snapshot!(response["totalHits"], @"6");
+
+    let (response, code) = index.search_post(json!({"page": 2, "hitsPerPage": 2})).await;
+    let hits = get_hits(&response);
+    snapshot!(code, @"200 OK");
+    snapshot!(hits.len(), @"2");
+    snapshot!(format!("{:?}", hits), @r#"["456789", "987654"]"#);
+    snapshot!(response["page"], @"2");
+    snapshot!(response["totalPages"], @"3");
+    snapshot!(response["totalHits"], @"6");
+
+    let (response, code) = index.search_post(json!({"page": 3, "hitsPerPage": 2})).await;
+    let hits = get_hits(&response);
+    snapshot!(code, @"200 OK");
+    snapshot!(hits.len(), @"2");
+    snapshot!(format!("{:?}", hits), @r#"["234567", "345678"]"#);
+    snapshot!(response["page"], @"3");
+    snapshot!(response["totalPages"], @"3");
+    snapshot!(response["totalHits"], @"6");
+
+    let (response, code) = index.search_post(json!({"page": 4, "hitsPerPage": 2})).await;
+    let hits = get_hits(&response);
+    snapshot!(code, @"200 OK");
+    snapshot!(hits.len(), @"0");
+    snapshot!(format!("{:?}", hits), @r#"[]"#);
+    snapshot!(response["page"], @"4");
+    snapshot!(response["totalPages"], @"3");
+    snapshot!(response["totalHits"], @"6");
+
+    let (response, code) = index.search_post(json!({"page": 2, "hitsPerPage": 3})).await;
+    let hits = get_hits(&response);
+    snapshot!(code, @"200 OK");
+    snapshot!(hits.len(), @"3");
+    snapshot!(format!("{:?}", hits), @r#"["987654", "234567", "345678"]"#);
+    snapshot!(response["page"], @"2");
+    snapshot!(response["totalPages"], @"2");
+    snapshot!(response["totalHits"], @"6");
+}
--- a/meilisearch/tests/search/facet_search.rs
+++ b/meilisearch/tests/search/facet_search.rs
@ -4,7 +4,7 @@ use once_cell::sync::Lazy;
 use crate::common::{Server, Value};
 use crate::json;

-pub(self) static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
+static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
    json!([
        {
            "title": "Shazam!",
@ -105,6 +105,24 @@ async fn more_advanced_facet_search() {
    snapshot!(response["facetHits"].as_array().unwrap().len(), @"1");
 }

+#[actix_rt::test]
+async fn simple_facet_search_with_max_values() {
+    let server = Server::new().await;
+    let index = server.index("test");
+
+    let documents = DOCUMENTS.clone();
+    index.update_settings_faceting(json!({ "maxValuesPerFacet": 1 })).await;
+    index.update_settings_filterable_attributes(json!(["genres"])).await;
+    index.add_documents(documents, None).await;
+    index.wait_task(2).await;
+
+    let (response, code) =
+        index.facet_search(json!({"facetName": "genres", "facetQuery": "a"})).await;
+
+    assert_eq!(code, 200, "{}", response);
+    assert_eq!(dbg!(response)["facetHits"].as_array().unwrap().len(), 1);
+}
+
 #[actix_rt::test]
 async fn non_filterable_facet_search_error() {
    let server = Server::new().await;
--- a/meilisearch/tests/search/geo.rs
+++ b/meilisearch/tests/search/geo.rs
@ -4,7 +4,7 @@ use once_cell::sync::Lazy;
 use crate::common::{Server, Value};
 use crate::json;

-pub(self) static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
+static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
    json!([
        {
            "id": 1,
--- a/meilisearch/tests/search/hybrid.rs
+++ b/meilisearch/tests/search/hybrid.rs
@ -0,0 +1,221 @@
+use meili_snap::snapshot;
+use once_cell::sync::Lazy;
+
+use crate::common::index::Index;
+use crate::common::{Server, Value};
+use crate::json;
+
+async fn index_with_documents<'a>(server: &'a Server, documents: &Value) -> Index<'a> {
+    let index = server.index("test");
+
+    let (response, code) = server.set_features(json!({"vectorStore": true})).await;
+
+    meili_snap::snapshot!(code, @"200 OK");
+    meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
+    {
+      "scoreDetails": false,
+      "vectorStore": true,
+      "metrics": false,
+      "exportPuffinReports": false
+    }
+    "###);
+
+    let (response, code) = index
+        .update_settings(json!({ "embedders": {"default": {
+                "source": "userProvided",
+                "dimensions": 2}}} ))
+        .await;
+    assert_eq!(202, code, "{:?}", response);
+    index.wait_task(response.uid()).await;
+
+    let (response, code) = index.add_documents(documents.clone(), None).await;
+    assert_eq!(202, code, "{:?}", response);
+    index.wait_task(response.uid()).await;
+    index
+}
+
+static SIMPLE_SEARCH_DOCUMENTS: Lazy<Value> = Lazy::new(|| {
+    json!([
+    {
+        "title": "Shazam!",
+        "desc": "a Captain Marvel ersatz",
+        "id": "1",
+        "_vectors": {"default": [1.0, 3.0]},
+    },
+    {
+        "title": "Captain Planet",
+        "desc": "He's not part of the Marvel Cinematic Universe",
+        "id": "2",
+        "_vectors": {"default": [1.0, 2.0]},
+    },
+    {
+        "title": "Captain Marvel",
+        "desc": "a Shazam ersatz",
+        "id": "3",
+        "_vectors": {"default": [2.0, 3.0]},
+    }])
+});
+
+static SINGLE_DOCUMENT: Lazy<Value> = Lazy::new(|| {
+    json!([{
+            "title": "Shazam!",
+            "desc": "a Captain Marvel ersatz",
+            "id": "1",
+            "_vectors": {"default": [1.0, 3.0]},
+    }])
+});
+
+#[actix_rt::test]
+async fn simple_search() {
+    let server = Server::new().await;
+    let index = index_with_documents(&server, &SIMPLE_SEARCH_DOCUMENTS).await;
+
+    let (response, code) = index
+        .search_post(
+            json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.2}}),
+        )
+        .await;
+    snapshot!(code, @"200 OK");
+    snapshot!(response["hits"], @r###"[{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]}},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]}},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]}}]"###);
+
+    let (response, code) = index
+        .search_post(
+            json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.8}}),
+        )
+        .await;
+    snapshot!(code, @"200 OK");
+    snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_semanticScore":0.99029034},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_semanticScore":0.97434163},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_semanticScore":0.9472136}]"###);
+}
+
+#[actix_rt::test]
+async fn highlighter() {
+    let server = Server::new().await;
+    let index = index_with_documents(&server, &SIMPLE_SEARCH_DOCUMENTS).await;
+
+    let (response, code) = index
+        .search_post(json!({"q": "Captain Marvel", "vector": [1.0, 1.0],
+            "hybrid": {"semanticRatio": 0.2},
+            "attributesToHighlight": [
+                     "desc"
+                   ],
+                   "highlightPreTag": "**BEGIN**",
+                   "highlightPostTag": "**END**"
+        }))
+        .await;
+    snapshot!(code, @"200 OK");
+    snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_formatted":{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":["2.0","3.0"]}}},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_formatted":{"title":"Shazam!","desc":"a **BEGIN**Captain**END** **BEGIN**Marvel**END** ersatz","id":"1","_vectors":{"default":["1.0","3.0"]}}},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_formatted":{"title":"Captain Planet","desc":"He's not part of the **BEGIN**Marvel**END** Cinematic Universe","id":"2","_vectors":{"default":["1.0","2.0"]}}}]"###);
+
+    let (response, code) = index
+        .search_post(json!({"q": "Captain Marvel", "vector": [1.0, 1.0],
+            "hybrid": {"semanticRatio": 0.8},
+            "attributesToHighlight": [
+                     "desc"
+                   ],
+                   "highlightPreTag": "**BEGIN**",
+                   "highlightPostTag": "**END**"
+        }))
+        .await;
+    snapshot!(code, @"200 OK");
+    snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_formatted":{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":["2.0","3.0"]}},"_semanticScore":0.99029034},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_formatted":{"title":"Captain Planet","desc":"He's not part of the **BEGIN**Marvel**END** Cinematic Universe","id":"2","_vectors":{"default":["1.0","2.0"]}},"_semanticScore":0.97434163},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_formatted":{"title":"Shazam!","desc":"a **BEGIN**Captain**END** **BEGIN**Marvel**END** ersatz","id":"1","_vectors":{"default":["1.0","3.0"]}},"_semanticScore":0.9472136}]"###);
+
+    // no highlighting on full semantic
+    let (response, code) = index
+        .search_post(json!({"q": "Captain Marvel", "vector": [1.0, 1.0],
+            "hybrid": {"semanticRatio": 1.0},
+            "attributesToHighlight": [
+                     "desc"
+                   ],
+                   "highlightPreTag": "**BEGIN**",
+                   "highlightPostTag": "**END**"
+        }))
+        .await;
+    snapshot!(code, @"200 OK");
+    snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":[2.0,3.0]},"_formatted":{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":["2.0","3.0"]}},"_semanticScore":0.99029034},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":[1.0,2.0]},"_formatted":{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":["1.0","2.0"]}},"_semanticScore":0.97434163},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_formatted":{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":["1.0","3.0"]}}}]"###);
+}
+
+#[actix_rt::test]
+async fn invalid_semantic_ratio() {
+    let server = Server::new().await;
+    let index = index_with_documents(&server, &SIMPLE_SEARCH_DOCUMENTS).await;
+
+    let (response, code) = index
+        .search_post(
+            json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 1.2}}),
+        )
+        .await;
+    snapshot!(code, @"400 Bad Request");
+    snapshot!(response, @r###"
+    {
+      "message": "Invalid value at `.hybrid.semanticRatio`: the value of `semanticRatio` is invalid, expected a float between `0.0` and `1.0`.",
+      "code": "invalid_search_semantic_ratio",
+      "type": "invalid_request",
+      "link": "https://docs.meilisearch.com/errors#invalid_search_semantic_ratio"
+    }
+    "###);
+
+    let (response, code) = index
+        .search_post(
+            json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": -0.8}}),
+        )
+        .await;
+    snapshot!(code, @"400 Bad Request");
+    snapshot!(response, @r###"
+    {
+      "message": "Invalid value at `.hybrid.semanticRatio`: the value of `semanticRatio` is invalid, expected a float between `0.0` and `1.0`.",
+      "code": "invalid_search_semantic_ratio",
+      "type": "invalid_request",
+      "link": "https://docs.meilisearch.com/errors#invalid_search_semantic_ratio"
+    }
+    "###);
+
+    let (response, code) = index
+        .search_get(
+            &yaup::to_string(
+                &json!({"q": "Captain", "vector": [1.0, 1.0], "hybridSemanticRatio": 1.2}),
+            )
+            .unwrap(),
+        )
+        .await;
+    snapshot!(code, @"400 Bad Request");
+    snapshot!(response, @r###"
+    {
+      "message": "Invalid value in parameter `hybridSemanticRatio`: the value of `semanticRatio` is invalid, expected a float between `0.0` and `1.0`.",
+      "code": "invalid_search_semantic_ratio",
+      "type": "invalid_request",
+      "link": "https://docs.meilisearch.com/errors#invalid_search_semantic_ratio"
+    }
+    "###);
+
+    let (response, code) = index
+        .search_get(
+            &yaup::to_string(
+                &json!({"q": "Captain", "vector": [1.0, 1.0], "hybridSemanticRatio": -0.2}),
+            )
+            .unwrap(),
+        )
+        .await;
+    snapshot!(code, @"400 Bad Request");
+    snapshot!(response, @r###"
+    {
+      "message": "Invalid value in parameter `hybridSemanticRatio`: the value of `semanticRatio` is invalid, expected a float between `0.0` and `1.0`.",
+      "code": "invalid_search_semantic_ratio",
+      "type": "invalid_request",
+      "link": "https://docs.meilisearch.com/errors#invalid_search_semantic_ratio"
+    }
+    "###);
+}
+
+#[actix_rt::test]
+async fn single_document() {
+    let server = Server::new().await;
+    let index = index_with_documents(&server, &SINGLE_DOCUMENT).await;
+
+    let (response, code) = index
+    .search_post(
+        json!({"vector": [1.0, 3.0], "hybrid": {"semanticRatio": 1.0}, "showRankingScore": true}),
+    )
+    .await;
+
+    snapshot!(code, @"200 OK");
+    snapshot!(response["hits"][0], @r###"{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":[1.0,3.0]},"_rankingScore":1.0,"_semanticScore":1.0}"###);
+}
--- a/meilisearch/tests/search/mod.rs
+++ b/meilisearch/tests/search/mod.rs
@ -1,10 +1,12 @@
 // This modules contains all the test concerning search. Each particular feature of the search
 // should be tested in its own module to isolate tests and keep the tests readable.

+mod distinct;
 mod errors;
 mod facet_search;
 mod formatted;
 mod geo;
+mod hybrid;
 mod multi;
 mod pagination;
 mod restrict_searchable;
@ -14,32 +16,37 @@ use once_cell::sync::Lazy;
 use crate::common::{Server, Value};
 use crate::json;

-pub(self) static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
+static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
    json!([
        {
            "title": "Shazam!",
            "id": "287947",
+            "_vectors": { "manual": [1, 2, 3]},
        },
        {
            "title": "Captain Marvel",
            "id": "299537",
+            "_vectors": { "manual": [1, 2, 54] },
        },
        {
            "title": "Escape Room",
            "id": "522681",
+            "_vectors": { "manual": [10, -23, 32] },
        },
        {
            "title": "How to Train Your Dragon: The Hidden World",
            "id": "166428",
+            "_vectors": { "manual": [-100, 231, 32] },
        },
        {
            "title": "Gläss",
            "id": "450465",
+            "_vectors": { "manual": [-100, 340, 90] },
        }
    ])
 });

-pub(self) static NESTED_DOCUMENTS: Lazy<Value> = Lazy::new(|| {
+static NESTED_DOCUMENTS: Lazy<Value> = Lazy::new(|| {
    json!([
        {
            "id": 852,
@ -56,6 +63,7 @@ pub(self) static NESTED_DOCUMENTS: Lazy<Value> = Lazy::new(|| {
                },
            ],
            "cattos": "pésti",
+            "_vectors": { "manual": [1, 2, 3]},
        },
        {
            "id": 654,
@ -68,12 +76,14 @@ pub(self) static NESTED_DOCUMENTS: Lazy<Value> = Lazy::new(|| {
                },
            ],
            "cattos": ["simba", "pestiféré"],
+            "_vectors": { "manual": [1, 2, 54] },
        },
        {
            "id": 750,
            "father": "romain",
            "mother": "michelle",
            "cattos": ["enigma"],
+            "_vectors": { "manual": [10, 23, 32] },
        },
        {
            "id": 951,
@ -90,6 +100,7 @@ pub(self) static NESTED_DOCUMENTS: Lazy<Value> = Lazy::new(|| {
                },
            ],
            "cattos": ["moumoute", "gomez"],
+            "_vectors": { "manual": [10, 23, 32] },
        },
    ])
 });
@ -801,6 +812,13 @@ async fn experimental_feature_score_details() {
                  {
                    "title": "How to Train Your Dragon: The Hidden World",
                    "id": "166428",
+                    "_vectors": {
+                      "manual": [
+                        -100,
+                        231,
+                        32
+                      ]
+                    },
                    "_rankingScoreDetails": {
                      "words": {
                        "order": 0,
@ -816,13 +834,13 @@ async fn experimental_feature_score_details() {
                      },
                      "proximity": {
                        "order": 2,
-                        "score": 0.875
+                        "score": 0.75
                      },
                      "attribute": {
                        "order": 3,
                        "attributeRankingOrderScore": 1.0,
                        "queryWordDistanceScore": 0.8095238095238095,
-                        "score": 0.9365079365079364
+                        "score": 0.9727891156462584
                      },
                      "exactness": {
                        "order": 4,
@ -869,13 +887,100 @@ async fn experimental_feature_vector_store() {
    meili_snap::snapshot!(code, @"200 OK");
    meili_snap::snapshot!(response["vectorStore"], @"true");

+    let (response, code) = index
+        .update_settings(json!({"embedders": {
+            "manual": {
+                "source": "userProvided",
+                "dimensions": 3,
+            }
+        }}))
+        .await;
+
+    meili_snap::snapshot!(response, @r###"
+    {
+      "taskUid": 1,
+      "indexUid": "test",
+      "status": "enqueued",
+      "type": "settingsUpdate",
+      "enqueuedAt": "[date]"
+    }
+    "###);
+    meili_snap::snapshot!(code, @"202 Accepted");
+    let response = index.wait_task(response.uid()).await;
+
+    meili_snap::snapshot!(meili_snap::json_string!(response["status"]), @"\"succeeded\"");
+
    let (response, code) = index
        .search_post(json!({
            "vector": [1.0, 2.0, 3.0],
        }))
        .await;
+
    meili_snap::snapshot!(code, @"200 OK");
-    meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @"[]");
+    // vector search returns all documents that don't have vectors in the last bucket, like all sorts
+    meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @r###"
+    [
+      {
+        "title": "Shazam!",
+        "id": "287947",
+        "_vectors": {
+          "manual": [
+            1,
+            2,
+            3
+          ]
+        },
+        "_semanticScore": 1.0
+      },
+      {
+        "title": "Captain Marvel",
+        "id": "299537",
+        "_vectors": {
+          "manual": [
+            1,
+            2,
+            54
+          ]
+        },
+        "_semanticScore": 0.9129112
+      },
+      {
+        "title": "Gläss",
+        "id": "450465",
+        "_vectors": {
+          "manual": [
+            -100,
+            340,
+            90
+          ]
+        },
+        "_semanticScore": 0.8106413
+      },
+      {
+        "title": "How to Train Your Dragon: The Hidden World",
+        "id": "166428",
+        "_vectors": {
+          "manual": [
+            -100,
+            231,
+            32
+          ]
+        },
+        "_semanticScore": 0.74120104
+      },
+      {
+        "title": "Escape Room",
+        "id": "522681",
+        "_vectors": {
+          "manual": [
+            10,
+            -23,
+            32
+          ]
+        }
+      }
+    ]
+    "###);
 }

 #[cfg(feature = "default")]
@ -1125,7 +1230,14 @@ async fn simple_search_with_strange_synonyms() {
            [
              {
                "title": "How to Train Your Dragon: The Hidden World",
-                "id": "166428"
+                "id": "166428",
+                "_vectors": {
+                  "manual": [
+                    -100,
+                    231,
+                    32
+                  ]
+                }
              }
            ]
            "###);
@ -1139,7 +1251,14 @@ async fn simple_search_with_strange_synonyms() {
            [
              {
                "title": "How to Train Your Dragon: The Hidden World",
-                "id": "166428"
+                "id": "166428",
+                "_vectors": {
+                  "manual": [
+                    -100,
+                    231,
+                    32
+                  ]
+                }
              }
            ]
            "###);
@ -1153,7 +1272,14 @@ async fn simple_search_with_strange_synonyms() {
            [
              {
                "title": "How to Train Your Dragon: The Hidden World",
-                "id": "166428"
+                "id": "166428",
+                "_vectors": {
+                  "manual": [
+                    -100,
+                    231,
+                    32
+                  ]
+                }
              }
            ]
            "###);
--- a/meilisearch/tests/search/multi.rs
+++ b/meilisearch/tests/search/multi.rs
@ -72,7 +72,14 @@ async fn simple_search_single_index() {
        "hits": [
          {
            "title": "Gläss",
-            "id": "450465"
+            "id": "450465",
+            "_vectors": {
+              "manual": [
+                -100,
+                340,
+                90
+              ]
+            }
          }
        ],
        "query": "glass",
@ -86,7 +93,14 @@ async fn simple_search_single_index() {
        "hits": [
          {
            "title": "Captain Marvel",
-            "id": "299537"
+            "id": "299537",
+            "_vectors": {
+              "manual": [
+                1,
+                2,
+                54
+              ]
+            }
          }
        ],
        "query": "captain",
@ -177,7 +191,14 @@ async fn simple_search_two_indexes() {
        "hits": [
          {
            "title": "Gläss",
-            "id": "450465"
+            "id": "450465",
+            "_vectors": {
+              "manual": [
+                -100,
+                340,
+                90
+              ]
+            }
          }
        ],
        "query": "glass",
@ -203,7 +224,14 @@ async fn simple_search_two_indexes() {
                "age": 4
              }
            ],
-            "cattos": "pésti"
+            "cattos": "pésti",
+            "_vectors": {
+              "manual": [
+                1,
+                2,
+                3
+              ]
+            }
          },
          {
            "id": 654,
@ -218,7 +246,14 @@ async fn simple_search_two_indexes() {
            "cattos": [
              "simba",
              "pestiféré"
-            ]
+            ],
+            "_vectors": {
+              "manual": [
+                1,
+                2,
+                54
+              ]
+            }
          }
        ],
        "query": "pésti",
--- a/meilisearch/tests/search/restrict_searchable.rs
+++ b/meilisearch/tests/search/restrict_searchable.rs
@ -335,3 +335,35 @@ async fn exactness_ranking_rule_order() {
        })
        .await;
 }
+
+#[actix_rt::test]
+async fn search_on_exact_field() {
+    let server = Server::new().await;
+    let index = index_with_documents(
+        &server,
+        &json!([
+        {
+            "title": "Captain Marvel",
+            "exact": "Captain Marivel",
+            "id": "1",
+        },
+        {
+            "title": "Captain Marivel",
+            "exact": "Captain the Marvel",
+            "id": "2",
+        }]),
+    )
+    .await;
+
+    let (response, code) =
+        index.update_settings_typo_tolerance(json!({ "disableOnAttributes": ["exact"] })).await;
+    assert_eq!(202, code, "{:?}", response);
+    index.wait_task(1).await;
+    // Searching on an exact attribute should only return the document matching without typo.
+    index
+        .search(json!({"q": "Marvel", "attributesToSearchOn": ["exact"]}), |response, code| {
+            snapshot!(code, @"200 OK");
+            snapshot!(response["hits"].as_array().unwrap().len(), @"1");
+        })
+        .await;
+}
--- a/meilisearch/tests/settings/get_settings.rs
+++ b/meilisearch/tests/settings/get_settings.rs
@ -54,7 +54,7 @@ async fn get_settings() {
    let (response, code) = index.settings().await;
    assert_eq!(code, 200);
    let settings = response.as_object().unwrap();
-    assert_eq!(settings.keys().len(), 14);
+    assert_eq!(settings.keys().len(), 15);
    assert_eq!(settings["displayedAttributes"], json!(["*"]));
    assert_eq!(settings["searchableAttributes"], json!(["*"]));
    assert_eq!(settings["filterableAttributes"], json!([]));
@ -83,6 +83,7 @@ async fn get_settings() {
            "maxTotalHits": 1000,
        })
    );
+    assert_eq!(settings["proximityPrecision"], json!("byWord"));
 }

 #[actix_rt::test]
--- a/meilisearch/tests/settings/mod.rs
+++ b/meilisearch/tests/settings/mod.rs
@ -1,4 +1,5 @@
 mod distinct;
 mod errors;
 mod get_settings;
+mod proximity_settings;
 mod tokenizer_customization;
--- a/meilisearch/tests/settings/proximity_settings.rs
+++ b/meilisearch/tests/settings/proximity_settings.rs
@ -0,0 +1,352 @@
+use meili_snap::{json_string, snapshot};
+use once_cell::sync::Lazy;
+
+use crate::common::Server;
+use crate::json;
+
+static DOCUMENTS: Lazy<crate::common::Value> = Lazy::new(|| {
+    json!([
+        {
+            "id": 1,
+            "a": "Soup of the day",
+            "b": "many the fish",
+        },
+        {
+            "id": 2,
+            "a": "Soup of day",
+            "b": "many the lazy fish",
+        },
+        {
+            "id": 3,
+            "a": "the Soup of day",
+            "b": "many the fish",
+        },
+    ])
+});
+
+#[actix_rt::test]
+async fn attribute_scale_search() {
+    let server = Server::new().await;
+    let index = server.index("test");
+
+    index.add_documents(DOCUMENTS.clone(), None).await;
+    index.wait_task(0).await;
+
+    let (response, code) = index
+        .update_settings(json!({
+            "proximityPrecision": "byAttribute",
+            "rankingRules": ["words", "typo", "proximity"],
+        }))
+        .await;
+    assert_eq!("202", code.as_str(), "{:?}", response);
+    index.wait_task(1).await;
+
+    // the expected order is [1, 3, 2] instead of [3, 1, 2]
+    // because the attribute scale doesn't make the difference between 1 and 3.
+    index
+        .search(json!({"q": "the soup of day"}), |response, code| {
+            snapshot!(code, @"200 OK");
+            snapshot!(json_string!(response["hits"]), @r###"
+            [
+              {
+                "id": 1,
+                "a": "Soup of the day",
+                "b": "many the fish"
+              },
+              {
+                "id": 3,
+                "a": "the Soup of day",
+                "b": "many the fish"
+              },
+              {
+                "id": 2,
+                "a": "Soup of day",
+                "b": "many the lazy fish"
+              }
+            ]
+            "###);
+        })
+        .await;
+
+    // the expected order is [1, 2, 3] instead of [1, 3, 2]
+    // because the attribute scale sees all the word in the same attribute
+    // and so doesn't make the difference between the documents.
+    index
+        .search(json!({"q": "many the fish"}), |response, code| {
+            snapshot!(code, @"200 OK");
+            snapshot!(json_string!(response["hits"]), @r###"
+            [
+              {
+                "id": 1,
+                "a": "Soup of the day",
+                "b": "many the fish"
+              },
+              {
+                "id": 2,
+                "a": "Soup of day",
+                "b": "many the lazy fish"
+              },
+              {
+                "id": 3,
+                "a": "the Soup of day",
+                "b": "many the fish"
+              }
+            ]
+            "###);
+        })
+        .await;
+}
+
+#[actix_rt::test]
+async fn attribute_scale_phrase_search() {
+    let server = Server::new().await;
+    let index = server.index("test");
+
+    index.add_documents(DOCUMENTS.clone(), None).await;
+    index.wait_task(0).await;
+
+    let (_response, _code) = index
+        .update_settings(json!({
+            "proximityPrecision": "byAttribute",
+            "rankingRules": ["words", "typo", "proximity"],
+        }))
+        .await;
+    index.wait_task(1).await;
+
+    // the expected order is [1, 3] instead of [3, 1]
+    // because the attribute scale doesn't make the difference between 1 and 3.
+    // But 2 shouldn't be returned because "the" is not in the same attribute.
+    index
+        .search(json!({"q": "\"the soup of day\""}), |response, code| {
+            snapshot!(code, @"200 OK");
+            snapshot!(json_string!(response["hits"]), @r###"
+            [
+              {
+                "id": 1,
+                "a": "Soup of the day",
+                "b": "many the fish"
+              },
+              {
+                "id": 3,
+                "a": "the Soup of day",
+                "b": "many the fish"
+              }
+            ]
+            "###);
+        })
+        .await;
+
+    // the expected order is [1, 2, 3] instead of [1, 3]
+    // because the attribute scale sees all the word in the same attribute
+    // and so doesn't make the difference between the documents.
+    index
+        .search(json!({"q": "\"many the fish\""}), |response, code| {
+            snapshot!(code, @"200 OK");
+            snapshot!(json_string!(response["hits"]), @r###"
+            [
+              {
+                "id": 1,
+                "a": "Soup of the day",
+                "b": "many the fish"
+              },
+              {
+                "id": 2,
+                "a": "Soup of day",
+                "b": "many the lazy fish"
+              },
+              {
+                "id": 3,
+                "a": "the Soup of day",
+                "b": "many the fish"
+              }
+            ]
+            "###);
+        })
+        .await;
+}
+
+#[actix_rt::test]
+async fn word_scale_set_and_reset() {
+    let server = Server::new().await;
+    let index = server.index("test");
+
+    index.add_documents(DOCUMENTS.clone(), None).await;
+    index.wait_task(0).await;
+
+    // Set and reset the setting ensuring the swap between the 2 settings is applied.
+    let (_response, _code) = index
+        .update_settings(json!({
+            "proximityPrecision": "byAttribute",
+            "rankingRules": ["words", "typo", "proximity"],
+        }))
+        .await;
+    index.wait_task(1).await;
+
+    let (_response, _code) = index
+        .update_settings(json!({
+            "proximityPrecision": "byWord",
+            "rankingRules": ["words", "typo", "proximity"],
+        }))
+        .await;
+    index.wait_task(2).await;
+
+    // [3, 1, 2]
+    index
+        .search(json!({"q": "the soup of day"}), |response, code| {
+            snapshot!(code, @"200 OK");
+            snapshot!(json_string!(response["hits"]), @r###"
+            [
+              {
+                "id": 3,
+                "a": "the Soup of day",
+                "b": "many the fish"
+              },
+              {
+                "id": 1,
+                "a": "Soup of the day",
+                "b": "many the fish"
+              },
+              {
+                "id": 2,
+                "a": "Soup of day",
+                "b": "many the lazy fish"
+              }
+            ]
+            "###);
+        })
+        .await;
+
+    // [1, 3, 2]
+    index
+        .search(json!({"q": "many the fish"}), |response, code| {
+            snapshot!(code, @"200 OK");
+            snapshot!(json_string!(response["hits"]), @r###"
+            [
+              {
+                "id": 1,
+                "a": "Soup of the day",
+                "b": "many the fish"
+              },
+              {
+                "id": 3,
+                "a": "the Soup of day",
+                "b": "many the fish"
+              },
+              {
+                "id": 2,
+                "a": "Soup of day",
+                "b": "many the lazy fish"
+              }
+            ]
+            "###);
+        })
+        .await;
+
+    // [3]
+    index
+        .search(json!({"q": "\"the soup of day\""}), |response, code| {
+            snapshot!(code, @"200 OK");
+            snapshot!(json_string!(response["hits"]), @r###"
+            [
+              {
+                "id": 3,
+                "a": "the Soup of day",
+                "b": "many the fish"
+              }
+            ]
+            "###);
+        })
+        .await;
+
+    // [1, 3]
+    index
+        .search(json!({"q": "\"many the fish\""}), |response, code| {
+            snapshot!(code, @"200 OK");
+            snapshot!(json_string!(response["hits"]), @r###"
+            [
+              {
+                "id": 1,
+                "a": "Soup of the day",
+                "b": "many the fish"
+              },
+              {
+                "id": 3,
+                "a": "the Soup of day",
+                "b": "many the fish"
+              }
+            ]
+            "###);
+        })
+        .await;
+}
+
+#[actix_rt::test]
+async fn attribute_scale_default_ranking_rules() {
+    let server = Server::new().await;
+    let index = server.index("test");
+
+    index.add_documents(DOCUMENTS.clone(), None).await;
+    index.wait_task(0).await;
+
+    let (response, code) = index
+        .update_settings(json!({
+            "proximityPrecision": "byAttribute"
+        }))
+        .await;
+    assert_eq!("202", code.as_str(), "{:?}", response);
+    index.wait_task(1).await;
+
+    // the expected order is [3, 1, 2]
+    index
+        .search(json!({"q": "the soup of day"}), |response, code| {
+            snapshot!(code, @"200 OK");
+            snapshot!(json_string!(response["hits"]), @r###"
+            [
+              {
+                "id": 3,
+                "a": "the Soup of day",
+                "b": "many the fish"
+              },
+              {
+                "id": 1,
+                "a": "Soup of the day",
+                "b": "many the fish"
+              },
+              {
+                "id": 2,
+                "a": "Soup of day",
+                "b": "many the lazy fish"
+              }
+            ]
+            "###);
+        })
+        .await;
+
+    // the expected order is [1, 3, 2] instead of [1, 3]
+    // because the attribute scale sees all the word in the same attribute
+    // and so doesn't remove the document 2.
+    index
+        .search(json!({"q": "\"many the fish\""}), |response, code| {
+            snapshot!(code, @"200 OK");
+            snapshot!(json_string!(response["hits"]), @r###"
+            [
+              {
+                "id": 1,
+                "a": "Soup of the day",
+                "b": "many the fish"
+              },
+              {
+                "id": 3,
+                "a": "the Soup of day",
+                "b": "many the fish"
+              },
+              {
+                "id": 2,
+                "a": "Soup of day",
+                "b": "many the lazy fish"
+              }
+            ]
+            "###);
+        })
+        .await;
+}
--- a/meilisearch/tests/tasks/mod.rs
+++ b/meilisearch/tests/tasks/mod.rs
@ -1,4 +1,5 @@
 mod errors;
+mod webhook;

 use meili_snap::insta::assert_json_snapshot;
 use time::format_description::well_known::Rfc3339;
--- a/meilisearch/tests/tasks/webhook.rs
+++ b/meilisearch/tests/tasks/webhook.rs
@ -0,0 +1,123 @@
+//! To test the webhook, we need to spawn a new server with a URL listening for
+//! post requests. The webhook handle starts a server and forwards all the
+//! received requests into a channel for you to handle.
+
+use std::sync::Arc;
+
+use actix_http::body::MessageBody;
+use actix_web::dev::{ServiceFactory, ServiceResponse};
+use actix_web::web::{Bytes, Data};
+use actix_web::{post, App, HttpResponse, HttpServer};
+use meili_snap::{json_string, snapshot};
+use meilisearch::Opt;
+use tokio::sync::mpsc;
+use url::Url;
+
+use crate::common::{default_settings, Server};
+use crate::json;
+
+#[post("/")]
+async fn forward_body(sender: Data<mpsc::UnboundedSender<Vec<u8>>>, body: Bytes) -> HttpResponse {
+    let body = body.to_vec();
+    sender.send(body).unwrap();
+    HttpResponse::Ok().into()
+}
+
+fn create_app(
+    sender: Arc<mpsc::UnboundedSender<Vec<u8>>>,
+) -> actix_web::App<
+    impl ServiceFactory<
+        actix_web::dev::ServiceRequest,
+        Config = (),
+        Response = ServiceResponse<impl MessageBody>,
+        Error = actix_web::Error,
+        InitError = (),
+    >,
+> {
+    App::new().service(forward_body).app_data(Data::from(sender))
+}
+
+struct WebhookHandle {
+    pub server_handle: tokio::task::JoinHandle<Result<(), std::io::Error>>,
+    pub url: String,
+    pub receiver: mpsc::UnboundedReceiver<Vec<u8>>,
+}
+
+async fn create_webhook_server() -> WebhookHandle {
+    let mut log_builder = env_logger::Builder::new();
+    log_builder.parse_filters("info");
+    log_builder.init();
+
+    let (sender, receiver) = mpsc::unbounded_channel();
+    let sender = Arc::new(sender);
+
+    // By listening on the port 0, the system will give us any available port.
+    let server =
+        HttpServer::new(move || create_app(sender.clone())).bind(("127.0.0.1", 0)).unwrap();
+    let (ip, scheme) = server.addrs_with_scheme()[0];
+    let url = format!("{scheme}://{ip}/");
+
+    let server_handle = tokio::spawn(server.run());
+    WebhookHandle { server_handle, url, receiver }
+}
+
+#[actix_web::test]
+async fn test_basic_webhook() {
+    let WebhookHandle { server_handle, url, mut receiver } = create_webhook_server().await;
+
+    let db_path = tempfile::tempdir().unwrap();
+    let server = Server::new_with_options(Opt {
+        task_webhook_url: Some(Url::parse(&url).unwrap()),
+        ..default_settings(db_path.path())
+    })
+    .await
+    .unwrap();
+
+    let index = server.index("tamo");
+    // May be flaky: we're relying on the fact that while the first document addition is processed, the other
+    // operations will be received and will be batched together. If it doesn't happen it's not a problem
+    // the rest of the test won't assume anything about the number of tasks per batch.
+    for i in 0..5 {
+        let (_, _status) = index.add_documents(json!({ "id": i, "doggo": "bone" }), None).await;
+    }
+
+    let mut nb_tasks = 0;
+    while let Some(payload) = receiver.recv().await {
+        let payload = String::from_utf8(payload).unwrap();
+        let jsonl = payload.split('\n');
+        for json in jsonl {
+            if json.is_empty() {
+                break; // we reached EOF
+            }
+            nb_tasks += 1;
+            let json: serde_json::Value = serde_json::from_str(json).unwrap();
+            snapshot!(
+                json_string!(json, { ".uid" => "[uid]", ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }),
+            @r###"
+            {
+              "uid": "[uid]",
+              "indexUid": "tamo",
+              "status": "succeeded",
+              "type": "documentAdditionOrUpdate",
+              "canceledBy": null,
+              "details": {
+                "receivedDocuments": 1,
+                "indexedDocuments": 1
+              },
+              "error": null,
+              "duration": "[duration]",
+              "enqueuedAt": "[date]",
+              "startedAt": "[date]",
+              "finishedAt": "[date]"
+            }
+            "###);
+        }
+        if nb_tasks == 5 {
+            break;
+        }
+    }
+
+    assert!(nb_tasks == 5, "We should have received the 5 tasks but only received {nb_tasks}");
+
+    server_handle.abort();
+}
--- a/meilitool/Cargo.toml
+++ b/meilitool/Cargo.toml
@ -0,0 +1,19 @@
+[package]
+name = "meilitool"
+description = "A CLI to edit a Meilisearch database from the command line"
+version.workspace = true
+authors.workspace = true
+homepage.workspace = true
+readme.workspace = true
+edition.workspace = true
+license.workspace = true
+
+[dependencies]
+anyhow = "1.0.75"
+clap = { version = "4.2.1", features = ["derive"] }
+dump = { path = "../dump" }
+file-store = { path = "../file-store" }
+meilisearch-auth = { path = "../meilisearch-auth" }
+meilisearch-types = { path = "../meilisearch-types" }
+time = { version = "0.3.30", features = ["formatting"] }
+uuid = { version = "1.5.0", features = ["v4"], default-features = false }
--- a/meilitool/src/main.rs
+++ b/meilitool/src/main.rs
@ -0,0 +1,314 @@
+use std::fs::{read_dir, read_to_string, remove_file, File};
+use std::io::BufWriter;
+use std::path::PathBuf;
+
+use anyhow::Context;
+use clap::{Parser, Subcommand};
+use dump::{DumpWriter, IndexMetadata};
+use file_store::FileStore;
+use meilisearch_auth::AuthController;
+use meilisearch_types::heed::types::{SerdeJson, Str};
+use meilisearch_types::heed::{Database, Env, EnvOpenOptions, RoTxn, RwTxn, Unspecified};
+use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader};
+use meilisearch_types::milli::{obkv_to_json, BEU32};
+use meilisearch_types::tasks::{Status, Task};
+use meilisearch_types::versioning::check_version_file;
+use meilisearch_types::Index;
+use time::macros::format_description;
+use time::OffsetDateTime;
+use uuid_codec::UuidCodec;
+
+mod uuid_codec;
+
+#[derive(Parser)]
+#[command(author, version, about, long_about = None)]
+struct Cli {
+    /// The database path where the Meilisearch is running.
+    #[arg(long, default_value = "data.ms/")]
+    db_path: PathBuf,
+
+    #[command(subcommand)]
+    command: Command,
+}
+
+#[derive(Subcommand)]
+enum Command {
+    /// Clears the task queue and make it empty.
+    ///
+    /// This command can be safely executed even if Meilisearch is running and processing tasks.
+    /// Once the task queue is empty you can restart Meilisearch and no more tasks must be visible,
+    /// even the ones that were processing. However, it's highly possible that you see the processing
+    /// tasks in the queue again with an associated internal error message.
+    ClearTaskQueue,
+
+    /// Exports a dump from the Meilisearch database.
+    ///
+    /// Make sure to run this command when Meilisearch is not running or running but not processing tasks.
+    /// If tasks are being processed while a dump is being exported there are chances for the dump to be
+    /// malformed with missing tasks.
+    ///
+    /// TODO Verify this claim or make sure it cannot happen and we can export dumps
+    ///      without caring about killing Meilisearch first!
+    ExportADump {
+        /// The directory in which the dump will be created.
+        #[arg(long, default_value = "dumps/")]
+        dump_dir: PathBuf,
+
+        /// Skip dumping the enqueued or processing tasks.
+        ///
+        /// Can be useful when there are a lot of them and it is not particularly useful
+        /// to keep them. Note that only the enqueued tasks takes up space so skipping
+        /// the processed ones is not particularly interesting.
+        #[arg(long)]
+        skip_enqueued_tasks: bool,
+    },
+}
+
+fn main() -> anyhow::Result<()> {
+    let Cli { db_path, command } = Cli::parse();
+
+    check_version_file(&db_path).context("While checking the version file")?;
+
+    match command {
+        Command::ClearTaskQueue => clear_task_queue(db_path),
+        Command::ExportADump { dump_dir, skip_enqueued_tasks } => {
+            export_a_dump(db_path, dump_dir, skip_enqueued_tasks)
+        }
+    }
+}
+
+/// Clears the task queue located at `db_path`.
+fn clear_task_queue(db_path: PathBuf) -> anyhow::Result<()> {
+    let path = db_path.join("tasks");
+    let env = EnvOpenOptions::new()
+        .max_dbs(100)
+        .open(&path)
+        .with_context(|| format!("While trying to open {:?}", path.display()))?;
+
+    eprintln!("Deleting tasks from the database...");
+
+    let mut wtxn = env.write_txn()?;
+    let all_tasks = try_opening_poly_database(&env, &wtxn, "all-tasks")?;
+    let total = all_tasks.len(&wtxn)?;
+    let status = try_opening_poly_database(&env, &wtxn, "status")?;
+    let kind = try_opening_poly_database(&env, &wtxn, "kind")?;
+    let index_tasks = try_opening_poly_database(&env, &wtxn, "index-tasks")?;
+    let canceled_by = try_opening_poly_database(&env, &wtxn, "canceled_by")?;
+    let enqueued_at = try_opening_poly_database(&env, &wtxn, "enqueued-at")?;
+    let started_at = try_opening_poly_database(&env, &wtxn, "started-at")?;
+    let finished_at = try_opening_poly_database(&env, &wtxn, "finished-at")?;
+
+    try_clearing_poly_database(&mut wtxn, all_tasks, "all-tasks")?;
+    try_clearing_poly_database(&mut wtxn, status, "status")?;
+    try_clearing_poly_database(&mut wtxn, kind, "kind")?;
+    try_clearing_poly_database(&mut wtxn, index_tasks, "index-tasks")?;
+    try_clearing_poly_database(&mut wtxn, canceled_by, "canceled_by")?;
+    try_clearing_poly_database(&mut wtxn, enqueued_at, "enqueued-at")?;
+    try_clearing_poly_database(&mut wtxn, started_at, "started-at")?;
+    try_clearing_poly_database(&mut wtxn, finished_at, "finished-at")?;
+
+    wtxn.commit().context("While committing the transaction")?;
+
+    eprintln!("Successfully deleted {total} tasks from the tasks database!");
+    eprintln!("Deleting the content files from disk...");
+
+    let mut count = 0usize;
+    let update_files = db_path.join("update_files");
+    let entries = read_dir(&update_files).with_context(|| {
+        format!("While trying to read the content of {:?}", update_files.display())
+    })?;
+    for result in entries {
+        match result {
+            Ok(ent) => match remove_file(ent.path()) {
+                Ok(_) => count += 1,
+                Err(e) => eprintln!("Error while deleting {:?}: {}", ent.path().display(), e),
+            },
+            Err(e) => {
+                eprintln!("Error while reading a file in {:?}: {}", update_files.display(), e)
+            }
+        }
+    }
+
+    eprintln!("Sucessfully deleted {count} content files from disk!");
+
+    Ok(())
+}
+
+fn try_opening_database<KC: 'static, DC: 'static>(
+    env: &Env,
+    rtxn: &RoTxn,
+    db_name: &str,
+) -> anyhow::Result<Database<KC, DC>> {
+    env.open_database(rtxn, Some(db_name))
+        .with_context(|| format!("While opening the {db_name:?} database"))?
+        .with_context(|| format!("Missing the {db_name:?} database"))
+}
+
+fn try_opening_poly_database(
+    env: &Env,
+    rtxn: &RoTxn,
+    db_name: &str,
+) -> anyhow::Result<Database<Unspecified, Unspecified>> {
+    env.database_options()
+        .name(db_name)
+        .open(rtxn)
+        .with_context(|| format!("While opening the {db_name:?} poly database"))?
+        .with_context(|| format!("Missing the {db_name:?} poly database"))
+}
+
+fn try_clearing_poly_database(
+    wtxn: &mut RwTxn,
+    database: Database<Unspecified, Unspecified>,
+    db_name: &str,
+) -> anyhow::Result<()> {
+    database.clear(wtxn).with_context(|| format!("While clearing the {db_name:?} database"))
+}
+
+/// Exports a dump into the dump directory.
+fn export_a_dump(
+    db_path: PathBuf,
+    dump_dir: PathBuf,
+    skip_enqueued_tasks: bool,
+) -> Result<(), anyhow::Error> {
+    let started_at = OffsetDateTime::now_utc();
+
+    // 1. Extracts the instance UID from disk
+    let instance_uid_path = db_path.join("instance-uid");
+    let instance_uid = match read_to_string(&instance_uid_path) {
+        Ok(content) => match content.trim().parse() {
+            Ok(uuid) => Some(uuid),
+            Err(e) => {
+                eprintln!("Impossible to parse instance-uid: {e}");
+                None
+            }
+        },
+        Err(e) => {
+            eprintln!("Impossible to read {}: {}", instance_uid_path.display(), e);
+            None
+        }
+    };
+
+    let dump = DumpWriter::new(instance_uid).context("While creating a new dump")?;
+    let file_store =
+        FileStore::new(db_path.join("update_files")).context("While opening the FileStore")?;
+
+    let index_scheduler_path = db_path.join("tasks");
+    let env = EnvOpenOptions::new()
+        .max_dbs(100)
+        .open(&index_scheduler_path)
+        .with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?;
+
+    eprintln!("Dumping the keys...");
+
+    // 2. dump the keys
+    let auth_store = AuthController::new(&db_path, &None)
+        .with_context(|| format!("While opening the auth store at {}", db_path.display()))?;
+    let mut dump_keys = dump.create_keys()?;
+    let mut count = 0;
+    for key in auth_store.list_keys()? {
+        dump_keys.push_key(&key)?;
+        count += 1;
+    }
+    dump_keys.flush()?;
+
+    eprintln!("Successfully dumped {count} keys!");
+
+    let rtxn = env.read_txn()?;
+    let all_tasks: Database<BEU32, SerdeJson<Task>> =
+        try_opening_database(&env, &rtxn, "all-tasks")?;
+    let index_mapping: Database<Str, UuidCodec> =
+        try_opening_database(&env, &rtxn, "index-mapping")?;
+
+    if skip_enqueued_tasks {
+        eprintln!("Skip dumping the enqueued tasks...");
+    } else {
+        eprintln!("Dumping the enqueued tasks...");
+
+        // 3. dump the tasks
+        let mut dump_tasks = dump.create_tasks_queue()?;
+        let mut count = 0;
+        for ret in all_tasks.iter(&rtxn)? {
+            let (_, t) = ret?;
+            let status = t.status;
+            let content_file = t.content_uuid();
+            let mut dump_content_file = dump_tasks.push_task(&t.into())?;
+
+            // 3.1. Dump the `content_file` associated with the task if there is one and the task is not finished yet.
+            if let Some(content_file_uuid) = content_file {
+                if status == Status::Enqueued {
+                    let content_file = file_store.get_update(content_file_uuid)?;
+
+                    let reader =
+                        DocumentsBatchReader::from_reader(content_file).with_context(|| {
+                            format!("While reading content file {:?}", content_file_uuid)
+                        })?;
+
+                    let (mut cursor, documents_batch_index) = reader.into_cursor_and_fields_index();
+                    while let Some(doc) = cursor.next_document().with_context(|| {
+                        format!("While iterating on content file {:?}", content_file_uuid)
+                    })? {
+                        dump_content_file
+                            .push_document(&obkv_to_object(&doc, &documents_batch_index)?)?;
+                    }
+                    dump_content_file.flush()?;
+                    count += 1;
+                }
+            }
+        }
+        dump_tasks.flush()?;
+
+        eprintln!("Successfully dumped {count} enqueued tasks!");
+    }
+
+    eprintln!("Dumping the indexes...");
+
+    // 4. Dump the indexes
+    let mut count = 0;
+    for result in index_mapping.iter(&rtxn)? {
+        let (uid, uuid) = result?;
+        let index_path = db_path.join("indexes").join(uuid.to_string());
+        let index = Index::new(EnvOpenOptions::new(), &index_path).with_context(|| {
+            format!("While trying to open the index at path {:?}", index_path.display())
+        })?;
+
+        let rtxn = index.read_txn()?;
+        let metadata = IndexMetadata {
+            uid: uid.to_owned(),
+            primary_key: index.primary_key(&rtxn)?.map(String::from),
+            created_at: index.created_at(&rtxn)?,
+            updated_at: index.updated_at(&rtxn)?,
+        };
+        let mut index_dumper = dump.create_index(uid, &metadata)?;
+
+        let fields_ids_map = index.fields_ids_map(&rtxn)?;
+        let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
+
+        // 4.1. Dump the documents
+        for ret in index.all_documents(&rtxn)? {
+            let (_id, doc) = ret?;
+            let document = obkv_to_json(&all_fields, &fields_ids_map, doc)?;
+            index_dumper.push_document(&document)?;
+        }
+
+        // 4.2. Dump the settings
+        let settings = meilisearch_types::settings::settings(&index, &rtxn)?;
+        index_dumper.settings(&settings)?;
+        count += 1;
+    }
+
+    eprintln!("Successfully dumped {count} indexes!");
+    // We will not dump experimental feature settings
+    eprintln!("The tool is not dumping experimental features, please set them by hand afterward");
+
+    let dump_uid = started_at.format(format_description!(
+        "[year repr:full][month repr:numerical][day padding:zero]-[hour padding:zero][minute padding:zero][second padding:zero][subsecond digits:3]"
+    )).unwrap();
+
+    let path = dump_dir.join(format!("{}.dump", dump_uid));
+    let file = File::create(&path)?;
+    dump.persist_to(BufWriter::new(file))?;
+
+    eprintln!("Dump exported at path {:?}", path.display());
+
+    Ok(())
+}
--- a/meilitool/src/uuid_codec.rs
+++ b/meilitool/src/uuid_codec.rs
@ -0,0 +1,24 @@
+use std::borrow::Cow;
+use std::convert::TryInto;
+
+use meilisearch_types::heed::{BoxedError, BytesDecode, BytesEncode};
+use uuid::Uuid;
+
+/// A heed codec for value of struct Uuid.
+pub struct UuidCodec;
+
+impl<'a> BytesDecode<'a> for UuidCodec {
+    type DItem = Uuid;
+
+    fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
+        bytes.try_into().map(Uuid::from_bytes).map_err(Into::into)
+    }
+}
+
+impl BytesEncode<'_> for UuidCodec {
+    type EItem = Uuid;
+
+    fn bytes_encode(item: &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
+        Ok(Cow::Borrowed(item.as_bytes()))
+    }
+}
--- a/milli/Cargo.toml
+++ b/milli/Cargo.toml
@ -17,23 +17,25 @@ bincode = "1.3.3"
 bstr = "1.4.0"
 bytemuck = { version = "1.13.1", features = ["extern_crate_alloc"] }
 byteorder = "1.4.3"
-charabia = { version = "0.8.3", default-features = false }
+charabia = { version = "0.8.5", default-features = false }
 concat-arrays = "0.1.2"
 crossbeam-channel = "0.5.8"
-deserr = { version = "0.6.0", features = ["actix-web"]}
+deserr = "0.6.0"
 either = { version = "1.8.1", features = ["serde"] }
 flatten-serde-json = { path = "../flatten-serde-json" }
 fst = "0.4.7"
 fxhash = "0.2.1"
 geoutils = "0.5.1"
-grenad = { version = "0.4.4", default-features = false, features = [
+grenad = { version = "0.4.5", default-features = false, features = [
+    "rayon",
    "tempfile",
 ] }
-heed = { git = "https://github.com/meilisearch/heed", tag = "v0.12.7", default-features = false, features = [
-    "lmdb", "read-txn-no-tls"
+heed = { version = "0.20.0-alpha.9", default-features = false, features = [
+    "serde-json",
+    "serde-bincode",
+    "read-txn-no-tls",
 ] }
 indexmap = { version = "2.0.0", features = ["serde"] }
-instant-distance = { version = "0.6.1", features = ["with-serde"] }
 json-depth-checker = { path = "../json-depth-checker" }
 levenshtein_automata = { version = "0.2.1", features = ["fst_automaton"] }
 memmap2 = "0.7.1"
@ -72,6 +74,23 @@ puffin = "0.16.0"
 log = "0.4.17"
 logging_timer = "1.1.0"
 csv = "1.2.1"
+candle-core = { git = "https://github.com/huggingface/candle.git", version = "0.3.1" }
+candle-transformers = { git = "https://github.com/huggingface/candle.git", version = "0.3.1" }
+candle-nn = { git = "https://github.com/huggingface/candle.git", version = "0.3.1" }
+tokenizers = { git = "https://github.com/huggingface/tokenizers.git", tag = "v0.14.1", version = "0.14.1", default_features = false, features = ["onig"] }
+hf-hub = { git = "https://github.com/dureuill/hf-hub.git", branch = "rust_tls", default_features = false, features = [
+    "online",
+] }
+tokio = { version = "1.34.0", features = ["rt"] }
+futures = "0.3.29"
+reqwest = { version = "0.11.16", features = [
+    "rustls-tls",
+    "json",
+], default-features = false }
+tiktoken-rs = "0.5.7"
+liquid = "0.26.4"
+arroy = { git = "https://github.com/meilisearch/arroy.git", version = "0.1.0" }
+rand = "0.8.5"

 [dev-dependencies]
 mimalloc = { version = "0.1.37", default-features = false }
@ -79,10 +98,19 @@ big_s = "1.0.2"
 insta = "1.29.0"
 maplit = "1.0.2"
 md5 = "0.7.0"
+meili-snap = { path = "../meili-snap" }
 rand = { version = "0.8.5", features = ["small_rng"] }

 [features]
-all-tokenizations = ["charabia/chinese", "charabia/hebrew", "charabia/japanese", "charabia/thai", "charabia/korean", "charabia/greek"]
+all-tokenizations = [
+    "charabia/chinese",
+    "charabia/hebrew",
+    "charabia/japanese",
+    "charabia/thai",
+    "charabia/korean",
+    "charabia/greek",
+    "charabia/khmer",
+]

 # Use POSIX semaphores instead of SysV semaphores in LMDB
 # For more information on this feature, see heed's Cargo.toml
@ -106,3 +134,6 @@ thai = ["charabia/thai"]

 # allow greek specialized tokenization
 greek = ["charabia/greek"]
+
+# allow khmer specialized tokenization
+khmer = ["charabia/khmer"]
--- a/milli/examples/search.rs
+++ b/milli/examples/search.rs
@ -5,8 +5,8 @@ use std::time::Instant;

 use heed::EnvOpenOptions;
 use milli::{
-    execute_search, DefaultSearchLogger, GeoSortStrategy, Index, SearchContext, SearchLogger,
-    TermsMatchingStrategy,
+    execute_search, filtered_universe, DefaultSearchLogger, GeoSortStrategy, Index, SearchContext,
+    SearchLogger, TermsMatchingStrategy,
 };

 #[global_allocator]
@ -49,14 +49,15 @@ fn main() -> Result<(), Box<dyn Error>> {
            let start = Instant::now();

            let mut ctx = SearchContext::new(&index, &txn);
+            let universe = filtered_universe(&ctx, &None)?;
+
            let docs = execute_search(
                &mut ctx,
-                &(!query.trim().is_empty()).then(|| query.trim().to_owned()),
-                &None,
+                (!query.trim().is_empty()).then(|| query.trim()),
                TermsMatchingStrategy::Last,
                milli::score_details::ScoringStrategy::Skip,
                false,
-                &None,
+                universe,
                &None,
                GeoSortStrategy::default(),
                0,
--- a/milli/src/distance.rs
+++ b/milli/src/distance.rs
@ -1,41 +0,0 @@
-use std::ops;
-
-use instant_distance::Point;
-use serde::{Deserialize, Serialize};
-
-use crate::normalize_vector;
-
-#[derive(Debug, Default, Clone, Serialize, Deserialize)]
-pub struct NDotProductPoint(Vec<f32>);
-
-impl NDotProductPoint {
-    pub fn new(point: Vec<f32>) -> Self {
-        NDotProductPoint(normalize_vector(point))
-    }
-
-    pub fn into_inner(self) -> Vec<f32> {
-        self.0
-    }
-}
-
-impl ops::Deref for NDotProductPoint {
-    type Target = [f32];
-
-    fn deref(&self) -> &Self::Target {
-        self.0.as_slice()
-    }
-}
-
-impl Point for NDotProductPoint {
-    fn distance(&self, other: &Self) -> f32 {
-        let dist = 1.0 - dot_product_similarity(&self.0, &other.0);
-        debug_assert!(!dist.is_nan());
-        dist
-    }
-}
-
-/// Returns the dot product similarity score that will between 0.0 and 1.0
-/// if both vectors are normalized. The higher the more similar the vectors are.
-pub fn dot_product_similarity(a: &[f32], b: &[f32]) -> f32 {
-    a.iter().zip(b).map(|(a, b)| a * b).sum()
-}
--- a/milli/src/documents/enriched.rs
+++ b/milli/src/documents/enriched.rs
@ -1,4 +1,5 @@
 use std::fs::File;
+use std::io::BufReader;
 use std::{io, str};

 use obkv::KvReader;
@ -19,14 +20,14 @@ use crate::FieldId;
 pub struct EnrichedDocumentsBatchReader<R> {
    documents: DocumentsBatchReader<R>,
    primary_key: String,
-    external_ids: grenad::ReaderCursor<File>,
+    external_ids: grenad::ReaderCursor<BufReader<File>>,
 }

 impl<R: io::Read + io::Seek> EnrichedDocumentsBatchReader<R> {
    pub fn new(
        documents: DocumentsBatchReader<R>,
        primary_key: String,
-        external_ids: grenad::Reader<File>,
+        external_ids: grenad::Reader<BufReader<File>>,
    ) -> Result<Self, Error> {
        if documents.documents_count() as u64 == external_ids.len() {
            Ok(EnrichedDocumentsBatchReader {
@ -75,7 +76,7 @@ pub struct EnrichedDocument<'a> {
 pub struct EnrichedDocumentsBatchCursor<R> {
    documents: DocumentsBatchCursor<R>,
    primary_key: String,
-    external_ids: grenad::ReaderCursor<File>,
+    external_ids: grenad::ReaderCursor<BufReader<File>>,
 }

 impl<R> EnrichedDocumentsBatchCursor<R> {
--- a/milli/src/documents/mod.rs
+++ b/milli/src/documents/mod.rs
@ -1,5 +1,6 @@
 mod builder;
 mod enriched;
+mod primary_key;
 mod reader;
 mod serde_impl;

@ -11,6 +12,7 @@ use bimap::BiHashMap;
 pub use builder::DocumentsBatchBuilder;
 pub use enriched::{EnrichedDocument, EnrichedDocumentsBatchCursor, EnrichedDocumentsBatchReader};
 use obkv::KvReader;
+pub use primary_key::{DocumentIdExtractionError, FieldIdMapper, PrimaryKey, DEFAULT_PRIMARY_KEY};
 pub use reader::{DocumentsBatchCursor, DocumentsBatchCursorError, DocumentsBatchReader};
 use serde::{Deserialize, Serialize};

@ -87,6 +89,12 @@ impl DocumentsBatchIndex {
    }
 }

+impl FieldIdMapper for DocumentsBatchIndex {
+    fn id(&self, name: &str) -> Option<FieldId> {
+        self.id(name)
+    }
+}
+
 #[derive(Debug, thiserror::Error)]
 pub enum Error {
    #[error("Error parsing number {value:?} at line {line}: {error}")]
--- a/milli/src/documents/primary_key.rs
+++ b/milli/src/documents/primary_key.rs
@ -0,0 +1,172 @@
+use std::iter;
+use std::result::Result as StdResult;
+
+use serde_json::Value;
+
+use crate::{FieldId, InternalError, Object, Result, UserError};
+
+/// The symbol used to define levels in a nested primary key.
+const PRIMARY_KEY_SPLIT_SYMBOL: char = '.';
+
+/// The default primary that is used when not specified.
+pub const DEFAULT_PRIMARY_KEY: &str = "id";
+
+/// Trait for objects that can map the name of a field to its [`FieldId`].
+pub trait FieldIdMapper {
+    /// Attempts to map the passed name to its [`FieldId`].
+    ///
+    /// `None` if the field with this name was not found.
+    fn id(&self, name: &str) -> Option<FieldId>;
+}
+
+/// A type that represent the type of primary key that has been set
+/// for this index, a classic flat one or a nested one.
+#[derive(Debug, Clone, Copy)]
+pub enum PrimaryKey<'a> {
+    Flat { name: &'a str, field_id: FieldId },
+    Nested { name: &'a str },
+}
+
+pub enum DocumentIdExtractionError {
+    InvalidDocumentId(UserError),
+    MissingDocumentId,
+    TooManyDocumentIds(usize),
+}
+
+impl<'a> PrimaryKey<'a> {
+    pub fn new(path: &'a str, fields: &impl FieldIdMapper) -> Option<Self> {
+        Some(if path.contains(PRIMARY_KEY_SPLIT_SYMBOL) {
+            Self::Nested { name: path }
+        } else {
+            let field_id = fields.id(path)?;
+            Self::Flat { name: path, field_id }
+        })
+    }
+
+    pub fn name(&self) -> &str {
+        match self {
+            PrimaryKey::Flat { name, .. } => name,
+            PrimaryKey::Nested { name } => name,
+        }
+    }
+
+    pub fn document_id(
+        &self,
+        document: &obkv::KvReader<FieldId>,
+        fields: &impl FieldIdMapper,
+    ) -> Result<StdResult<String, DocumentIdExtractionError>> {
+        match self {
+            PrimaryKey::Flat { name: _, field_id } => match document.get(*field_id) {
+                Some(document_id_bytes) => {
+                    let document_id = serde_json::from_slice(document_id_bytes)
+                        .map_err(InternalError::SerdeJson)?;
+                    match validate_document_id_value(document_id)? {
+                        Ok(document_id) => Ok(Ok(document_id)),
+                        Err(user_error) => {
+                            Ok(Err(DocumentIdExtractionError::InvalidDocumentId(user_error)))
+                        }
+                    }
+                }
+                None => Ok(Err(DocumentIdExtractionError::MissingDocumentId)),
+            },
+            nested @ PrimaryKey::Nested { .. } => {
+                let mut matching_documents_ids = Vec::new();
+                for (first_level_name, right) in nested.possible_level_names() {
+                    if let Some(field_id) = fields.id(first_level_name) {
+                        if let Some(value_bytes) = document.get(field_id) {
+                            let object = serde_json::from_slice(value_bytes)
+                                .map_err(InternalError::SerdeJson)?;
+                            fetch_matching_values(object, right, &mut matching_documents_ids);
+
+                            if matching_documents_ids.len() >= 2 {
+                                return Ok(Err(DocumentIdExtractionError::TooManyDocumentIds(
+                                    matching_documents_ids.len(),
+                                )));
+                            }
+                        }
+                    }
+                }
+
+                match matching_documents_ids.pop() {
+                    Some(document_id) => match validate_document_id_value(document_id)? {
+                        Ok(document_id) => Ok(Ok(document_id)),
+                        Err(user_error) => {
+                            Ok(Err(DocumentIdExtractionError::InvalidDocumentId(user_error)))
+                        }
+                    },
+                    None => Ok(Err(DocumentIdExtractionError::MissingDocumentId)),
+                }
+            }
+        }
+    }
+
+    /// Returns an `Iterator` that gives all the possible fields names the primary key
+    /// can have depending of the first level name and depth of the objects.
+    pub fn possible_level_names(&self) -> impl Iterator<Item = (&str, &str)> + '_ {
+        let name = self.name();
+        name.match_indices(PRIMARY_KEY_SPLIT_SYMBOL)
+            .map(move |(i, _)| (&name[..i], &name[i + PRIMARY_KEY_SPLIT_SYMBOL.len_utf8()..]))
+            .chain(iter::once((name, "")))
+    }
+}
+
+fn fetch_matching_values(value: Value, selector: &str, output: &mut Vec<Value>) {
+    match value {
+        Value::Object(object) => fetch_matching_values_in_object(object, selector, "", output),
+        otherwise => output.push(otherwise),
+    }
+}
+
+fn fetch_matching_values_in_object(
+    object: Object,
+    selector: &str,
+    base_key: &str,
+    output: &mut Vec<Value>,
+) {
+    for (key, value) in object {
+        let base_key = if base_key.is_empty() {
+            key.to_string()
+        } else {
+            format!("{}{}{}", base_key, PRIMARY_KEY_SPLIT_SYMBOL, key)
+        };
+
+        if starts_with(selector, &base_key) {
+            match value {
+                Value::Object(object) => {
+                    fetch_matching_values_in_object(object, selector, &base_key, output)
+                }
+                value => output.push(value),
+            }
+        }
+    }
+}
+
+fn starts_with(selector: &str, key: &str) -> bool {
+    selector.strip_prefix(key).map_or(false, |tail| {
+        tail.chars().next().map(|c| c == PRIMARY_KEY_SPLIT_SYMBOL).unwrap_or(true)
+    })
+}
+
+// FIXME: move to a DocumentId struct
+
+fn validate_document_id(document_id: &str) -> Option<&str> {
+    if !document_id.is_empty()
+        && document_id.chars().all(|c| matches!(c, 'a'..='z' | 'A'..='Z' | '0'..='9' | '-' | '_'))
+    {
+        Some(document_id)
+    } else {
+        None
+    }
+}
+
+pub fn validate_document_id_value(document_id: Value) -> Result<StdResult<String, UserError>> {
+    match document_id {
+        Value::String(string) => match validate_document_id(&string) {
+            Some(s) if s.len() == string.len() => Ok(Ok(string)),
+            Some(s) => Ok(Ok(s.to_string())),
+            None => Ok(Err(UserError::InvalidDocumentId { document_id: Value::String(string) })),
+        },
+        Value::Number(number) if number.is_i64() => Ok(Ok(number.to_string())),
+        content => Ok(Err(UserError::InvalidDocumentId { document_id: content })),
+    }
+}
--- a/milli/src/error.rs
+++ b/milli/src/error.rs
@ -61,6 +61,10 @@ pub enum InternalError {
    AbortedIndexation,
    #[error("The matching words list contains at least one invalid member.")]
    InvalidMatchingWords,
+    #[error(transparent)]
+    ArroyError(#[from] arroy::Error),
+    #[error(transparent)]
+    VectorEmbeddingError(#[from] crate::vector::Error),
 }

 #[derive(Error, Debug)]
@ -89,8 +93,6 @@ pub enum FieldIdMapMissingEntry {

 #[derive(Error, Debug)]
 pub enum UserError {
-    #[error("A soft deleted internal document id have been used: `{document_id}`.")]
-    AccessingSoftDeletedDocument { document_id: DocumentId },
    #[error("A document cannot contain more than 65,535 fields.")]
    AttributeLimitReached,
    #[error(transparent)]
@ -112,8 +114,10 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco
    InvalidGeoField(#[from] GeoError),
    #[error("Invalid vector dimensions: expected: `{}`, found: `{}`.", .expected, .found)]
    InvalidVectorDimensions { expected: usize, found: usize },
-    #[error("The `_vectors` field in the document with the id: `{document_id}` is not an array. Was expecting an array of floats or an array of arrays of floats but instead got `{value}`.")]
-    InvalidVectorsType { document_id: Value, value: Value },
+    #[error("The `_vectors.{subfield}` field in the document with id: `{document_id}` is not an array. Was expecting an array of floats or an array of arrays of floats but instead got `{value}`.")]
+    InvalidVectorsType { document_id: Value, value: Value, subfield: String },
+    #[error("The `_vectors` field in the document with id: `{document_id}` is not an object. Was expecting an object with a key for each embedder with manually provided vectors, but instead got `{value}`")]
+    InvalidVectorsMapType { document_id: Value, value: Value },
    #[error("{0}")]
    InvalidFilter(String),
    #[error("Invalid type for filter subexpression: expected: {}, found: {1}.", .0.join(", "))]
@ -154,7 +158,7 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco
        valid_fields: BTreeSet<String>,
        hidden_fields: bool,
    },
-    #[error("{}", HeedError::BadOpenOptions)]
+    #[error("an environment is already opened with different options")]
    InvalidLmdbOpenOptions,
    #[error("You must specify where `sort` is listed in the rankingRules setting to use the sort parameter at search time.")]
    SortRankingRuleMissing,
@ -182,6 +186,76 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco
    UnknownInternalDocumentId { document_id: DocumentId },
    #[error("`minWordSizeForTypos` setting is invalid. `oneTypo` and `twoTypos` fields should be between `0` and `255`, and `twoTypos` should be greater or equals to `oneTypo` but found `oneTypo: {0}` and twoTypos: {1}`.")]
    InvalidMinTypoWordLenSetting(u8, u8),
+    #[error(transparent)]
+    VectorEmbeddingError(#[from] crate::vector::Error),
+    #[error(transparent)]
+    MissingDocumentField(#[from] crate::prompt::error::RenderPromptError),
+    #[error(transparent)]
+    InvalidPrompt(#[from] crate::prompt::error::NewPromptError),
+    #[error("`.embedders.{0}.documentTemplate`: Invalid template: {1}.")]
+    InvalidPromptForEmbeddings(String, crate::prompt::error::NewPromptError),
+    #[error("Too many embedders in the configuration. Found {0}, but limited to 256.")]
+    TooManyEmbedders(usize),
+    #[error("Cannot find embedder with name {0}.")]
+    InvalidEmbedder(String),
+    #[error("Too many vectors for document with id {0}: found {1}, but limited to 256.")]
+    TooManyVectors(String, usize),
+    #[error("`.embedders.{embedder_name}`: Field `{field}` unavailable for source `{source_}` (only available for sources: {}). Available fields: {}",
+        allowed_sources_for_field
+         .iter()
+         .map(|accepted| format!("`{}`", accepted))
+         .collect::<Vec<String>>()
+         .join(", "),
+        allowed_fields_for_source
+         .iter()
+         .map(|accepted| format!("`{}`", accepted))
+         .collect::<Vec<String>>()
+         .join(", ")
+    )]
+    InvalidFieldForSource {
+        embedder_name: String,
+        source_: crate::vector::settings::EmbedderSource,
+        field: &'static str,
+        allowed_fields_for_source: &'static [&'static str],
+        allowed_sources_for_field: &'static [crate::vector::settings::EmbedderSource],
+    },
+    #[error("`.embedders.{embedder_name}.model`: Invalid model `{model}` for OpenAI. Supported models: {:?}", crate::vector::openai::EmbeddingModel::supported_models())]
+    InvalidOpenAiModel { embedder_name: String, model: String },
+    #[error("`.embedders.{embedder_name}`: Missing field `{field}` (note: this field is mandatory for source {source_})")]
+    MissingFieldForSource {
+        field: &'static str,
+        source_: crate::vector::settings::EmbedderSource,
+        embedder_name: String,
+    },
+}
+
+impl From<crate::vector::Error> for Error {
+    fn from(value: crate::vector::Error) -> Self {
+        match value.fault() {
+            FaultSource::User => Error::UserError(value.into()),
+            FaultSource::Runtime => Error::InternalError(value.into()),
+            FaultSource::Bug => Error::InternalError(value.into()),
+            FaultSource::Undecided => Error::InternalError(value.into()),
+        }
+    }
+}
+
+impl From<arroy::Error> for Error {
+    fn from(value: arroy::Error) -> Self {
+        match value {
+            arroy::Error::Heed(heed) => heed.into(),
+            arroy::Error::Io(io) => io.into(),
+            arroy::Error::InvalidVecDimension { expected, received } => {
+                Error::UserError(UserError::InvalidVectorDimensions { expected, found: received })
+            }
+            arroy::Error::DatabaseFull
+            | arroy::Error::InvalidItemAppend
+            | arroy::Error::UnmatchingDistance { .. }
+            | arroy::Error::MissingMetadata => {
+                Error::InternalError(InternalError::ArroyError(value))
+            }
+        }
+    }
 }

 #[derive(Error, Debug)]
@ -328,15 +402,36 @@ impl From<HeedError> for Error {
            HeedError::Mdb(MdbError::MapFull) => UserError(MaxDatabaseSizeReached),
            HeedError::Mdb(MdbError::Invalid) => UserError(InvalidStoreFile),
            HeedError::Mdb(error) => InternalError(Store(error)),
-            HeedError::Encoding => InternalError(Serialization(Encoding { db_name: None })),
-            HeedError::Decoding => InternalError(Serialization(Decoding { db_name: None })),
+            // TODO use the encoding
+            HeedError::Encoding(_) => InternalError(Serialization(Encoding { db_name: None })),
+            HeedError::Decoding(_) => InternalError(Serialization(Decoding { db_name: None })),
            HeedError::InvalidDatabaseTyping => InternalError(InvalidDatabaseTyping),
            HeedError::DatabaseClosing => InternalError(DatabaseClosing),
-            HeedError::BadOpenOptions => UserError(InvalidLmdbOpenOptions),
+            HeedError::BadOpenOptions { .. } => UserError(InvalidLmdbOpenOptions),
        }
    }
 }

+#[derive(Debug, Clone, Copy)]
+pub enum FaultSource {
+    User,
+    Runtime,
+    Bug,
+    Undecided,
+}
+
+impl std::fmt::Display for FaultSource {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        let s = match self {
+            FaultSource::User => "user error",
+            FaultSource::Runtime => "runtime error",
+            FaultSource::Bug => "coding error",
+            FaultSource::Undecided => "error",
+        };
+        f.write_str(s)
+    }
+}
+
 #[test]
 fn conditionally_lookup_for_error_message() {
    let prefix = "Attribute `name` is not sortable.";
--- a/milli/src/external_documents_ids.rs
+++ b/milli/src/external_documents_ids.rs
@ -1,159 +1,75 @@
-use std::borrow::Cow;
 use std::collections::HashMap;
-use std::convert::TryInto;
-use std::{fmt, str};

-use fst::map::IndexedValue;
-use fst::{IntoStreamer, Streamer};
-use roaring::RoaringBitmap;
+use heed::types::Str;
+use heed::{Database, RoIter, RoTxn, RwTxn};

-const DELETED_ID: u64 = u64::MAX;
+use crate::{DocumentId, BEU32};

-pub struct ExternalDocumentsIds<'a> {
-    pub(crate) hard: fst::Map<Cow<'a, [u8]>>,
-    pub(crate) soft: fst::Map<Cow<'a, [u8]>>,
-    soft_deleted_docids: RoaringBitmap,
+pub enum DocumentOperationKind {
+    Create,
+    Delete,
 }

-impl<'a> ExternalDocumentsIds<'a> {
-    pub fn new(
-        hard: fst::Map<Cow<'a, [u8]>>,
-        soft: fst::Map<Cow<'a, [u8]>>,
-        soft_deleted_docids: RoaringBitmap,
-    ) -> ExternalDocumentsIds<'a> {
-        ExternalDocumentsIds { hard, soft, soft_deleted_docids }
-    }
+pub struct DocumentOperation {
+    pub external_id: String,
+    pub internal_id: DocumentId,
+    pub kind: DocumentOperationKind,
+}

-    pub fn into_static(self) -> ExternalDocumentsIds<'static> {
-        ExternalDocumentsIds {
-            hard: self.hard.map_data(|c| Cow::Owned(c.into_owned())).unwrap(),
-            soft: self.soft.map_data(|c| Cow::Owned(c.into_owned())).unwrap(),
-            soft_deleted_docids: self.soft_deleted_docids,
-        }
+pub struct ExternalDocumentsIds(Database<Str, BEU32>);
+
+impl ExternalDocumentsIds {
+    pub fn new(db: Database<Str, BEU32>) -> ExternalDocumentsIds {
+        ExternalDocumentsIds(db)
    }

    /// Returns `true` if hard and soft external documents lists are empty.
-    pub fn is_empty(&self) -> bool {
-        self.hard.is_empty() && self.soft.is_empty()
+    pub fn is_empty(&self, rtxn: &RoTxn) -> heed::Result<bool> {
+        self.0.is_empty(rtxn).map_err(Into::into)
    }

-    pub fn get<A: AsRef<[u8]>>(&self, external_id: A) -> Option<u32> {
-        let external_id = external_id.as_ref();
-        match self.soft.get(external_id).or_else(|| self.hard.get(external_id)) {
-            Some(id) if id != DELETED_ID && !self.soft_deleted_docids.contains(id as u32) => {
-                Some(id.try_into().unwrap())
-            }
-            _otherwise => None,
-        }
-    }
-
-    /// Rebuild the internal FSTs in the ExternalDocumentsIds structure such that they
-    /// don't contain any soft deleted document id.
-    pub fn delete_soft_deleted_documents_ids_from_fsts(&mut self) -> fst::Result<()> {
-        let mut new_hard_builder = fst::MapBuilder::memory();
-
-        let union_op = self.hard.op().add(&self.soft).r#union();
-        let mut iter = union_op.into_stream();
-        while let Some((external_id, docids)) = iter.next() {
-            // prefer selecting the ids from soft, always
-            let id = indexed_last_value(docids).unwrap();
-            if id != DELETED_ID && !self.soft_deleted_docids.contains(id as u32) {
-                new_hard_builder.insert(external_id, id)?;
-            }
-        }
-        drop(iter);
-
-        // Delete soft map completely
-        self.soft = fst::Map::default().map_data(Cow::Owned)?;
-        // We save the new map as the new hard map.
-        self.hard = new_hard_builder.into_map().map_data(Cow::Owned)?;
-
-        Ok(())
-    }
-
-    pub fn insert_ids<A: AsRef<[u8]>>(&mut self, other: &fst::Map<A>) -> fst::Result<()> {
-        let union_op = self.soft.op().add(other).r#union();
-
-        let mut new_soft_builder = fst::MapBuilder::memory();
-        let mut iter = union_op.into_stream();
-        while let Some((external_id, marked_docids)) = iter.next() {
-            let id = indexed_last_value(marked_docids).unwrap();
-            new_soft_builder.insert(external_id, id)?;
-        }
-
-        drop(iter);
-
-        // We save the new map as the new soft map.
-        self.soft = new_soft_builder.into_map().map_data(Cow::Owned)?;
-        self.merge_soft_into_hard()
+    pub fn get<A: AsRef<str>>(&self, rtxn: &RoTxn, external_id: A) -> heed::Result<Option<u32>> {
+        self.0.get(rtxn, external_id.as_ref())
    }

    /// An helper function to debug this type, returns an `HashMap` of both,
    /// soft and hard fst maps, combined.
-    pub fn to_hash_map(&self) -> HashMap<String, u32> {
-        let mut map = HashMap::new();
-
-        let union_op = self.hard.op().add(&self.soft).r#union();
-        let mut iter = union_op.into_stream();
-        while let Some((external_id, marked_docids)) = iter.next() {
-            let id = indexed_last_value(marked_docids).unwrap();
-            if id != DELETED_ID {
-                let external_id = str::from_utf8(external_id).unwrap();
-                map.insert(external_id.to_owned(), id.try_into().unwrap());
-            }
+    pub fn to_hash_map(&self, rtxn: &RoTxn) -> heed::Result<HashMap<String, u32>> {
+        let mut map = HashMap::default();
+        for result in self.0.iter(rtxn)? {
+            let (external, internal) = result?;
+            map.insert(external.to_owned(), internal);
        }
-
-        map
+        Ok(map)
    }

-    /// Return an fst of the combined hard and soft deleted ID.
-    pub fn to_fst<'b>(&'b self) -> fst::Result<Cow<'b, fst::Map<Cow<'a, [u8]>>>> {
-        if self.soft.is_empty() {
-            return Ok(Cow::Borrowed(&self.hard));
-        }
-        let union_op = self.hard.op().add(&self.soft).r#union();
-
-        let mut iter = union_op.into_stream();
-        let mut new_hard_builder = fst::MapBuilder::memory();
-        while let Some((external_id, marked_docids)) = iter.next() {
-            let value = indexed_last_value(marked_docids).unwrap();
-            if value != DELETED_ID {
-                new_hard_builder.insert(external_id, value)?;
+    /// Applies the list of operations passed as argument, modifying the current external to internal id mapping.
+    ///
+    /// If the list contains multiple operations on the same external id, then the result is unspecified.
+    ///
+    /// # Panics
+    ///
+    /// - If attempting to delete a document that doesn't exist
+    /// - If attempting to create a document that already exists
+    pub fn apply(&self, wtxn: &mut RwTxn, operations: Vec<DocumentOperation>) -> heed::Result<()> {
+        for DocumentOperation { external_id, internal_id, kind } in operations {
+            match kind {
+                DocumentOperationKind::Create => {
+                    self.0.put(wtxn, &external_id, &internal_id)?;
+                }
+                DocumentOperationKind::Delete => {
+                    if !self.0.delete(wtxn, &external_id)? {
+                        panic!("Attempting to delete a non-existing document")
+                    }
+                }
            }
        }

-        drop(iter);
-
-        Ok(Cow::Owned(new_hard_builder.into_map().map_data(Cow::Owned)?))
-    }
-
-    fn merge_soft_into_hard(&mut self) -> fst::Result<()> {
-        if self.soft.len() >= self.hard.len() / 2 {
-            self.hard = self.to_fst()?.into_owned();
-            self.soft = fst::Map::default().map_data(Cow::Owned)?;
-        }
-
        Ok(())
    }
-}

-impl fmt::Debug for ExternalDocumentsIds<'_> {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        f.debug_tuple("ExternalDocumentsIds").field(&self.to_hash_map()).finish()
+    /// Returns an iterator over all the external ids.
+    pub fn iter<'t>(&self, rtxn: &'t RoTxn) -> heed::Result<RoIter<'t, Str, BEU32>> {
+        self.0.iter(rtxn)
    }
 }
-
-impl Default for ExternalDocumentsIds<'static> {
-    fn default() -> Self {
-        ExternalDocumentsIds {
-            hard: fst::Map::default().map_data(Cow::Owned).unwrap(),
-            soft: fst::Map::default().map_data(Cow::Owned).unwrap(),
-            soft_deleted_docids: RoaringBitmap::new(),
-        }
-    }
-}
-
-/// Returns the value of the `IndexedValue` with the highest _index_.
-fn indexed_last_value(indexed_values: &[IndexedValue]) -> Option<u64> {
-    indexed_values.iter().copied().max_by_key(|iv| iv.index).map(|iv| iv.value)
-}
--- a/milli/src/fields_ids_map.rs
+++ b/milli/src/fields_ids_map.rs
@ -81,6 +81,12 @@ impl Default for FieldsIdsMap {
    }
 }

+impl crate::documents::FieldIdMapper for FieldsIdsMap {
+    fn id(&self, name: &str) -> Option<FieldId> {
+        self.id(name)
+    }
+}
+
 #[cfg(test)]
 mod tests {
    use super::*;
--- a/milli/src/heed_codec/beu16_str_codec.rs
+++ b/milli/src/heed_codec/beu16_str_codec.rs
@ -2,26 +2,28 @@ use std::borrow::Cow;
 use std::convert::TryInto;
 use std::str;

+use heed::BoxedError;
+
 pub struct BEU16StrCodec;

 impl<'a> heed::BytesDecode<'a> for BEU16StrCodec {
    type DItem = (u16, &'a str);

-    fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
+    fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
        let (n_bytes, str_bytes) = bytes.split_at(2);
-        let n = n_bytes.try_into().map(u16::from_be_bytes).ok()?;
-        let s = str::from_utf8(str_bytes).ok()?;
-        Some((n, s))
+        let n = n_bytes.try_into().map(u16::from_be_bytes)?;
+        let s = str::from_utf8(str_bytes)?;
+        Ok((n, s))
    }
 }

 impl<'a> heed::BytesEncode<'a> for BEU16StrCodec {
    type EItem = (u16, &'a str);

-    fn bytes_encode((n, s): &Self::EItem) -> Option<Cow<[u8]>> {
+    fn bytes_encode((n, s): &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
        let mut bytes = Vec::with_capacity(s.len() + 2);
        bytes.extend_from_slice(&n.to_be_bytes());
        bytes.extend_from_slice(s.as_bytes());
-        Some(Cow::Owned(bytes))
+        Ok(Cow::Owned(bytes))
    }
 }
--- a/milli/src/heed_codec/beu32_str_codec.rs
+++ b/milli/src/heed_codec/beu32_str_codec.rs
@ -2,26 +2,28 @@ use std::borrow::Cow;
 use std::convert::TryInto;
 use std::str;

+use heed::BoxedError;
+
 pub struct BEU32StrCodec;

 impl<'a> heed::BytesDecode<'a> for BEU32StrCodec {
    type DItem = (u32, &'a str);

-    fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
+    fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
        let (n_bytes, str_bytes) = bytes.split_at(4);
-        let n = n_bytes.try_into().map(u32::from_be_bytes).ok()?;
-        let s = str::from_utf8(str_bytes).ok()?;
-        Some((n, s))
+        let n = n_bytes.try_into().map(u32::from_be_bytes)?;
+        let s = str::from_utf8(str_bytes)?;
+        Ok((n, s))
    }
 }

 impl<'a> heed::BytesEncode<'a> for BEU32StrCodec {
    type EItem = (u32, &'a str);

-    fn bytes_encode((n, s): &Self::EItem) -> Option<Cow<[u8]>> {
+    fn bytes_encode((n, s): &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
        let mut bytes = Vec::with_capacity(s.len() + 4);
        bytes.extend_from_slice(&n.to_be_bytes());
        bytes.extend_from_slice(s.as_bytes());
-        Some(Cow::Owned(bytes))
+        Ok(Cow::Owned(bytes))
    }
 }
--- a/milli/src/heed_codec/byte_slice_ref.rs
+++ b/milli/src/heed_codec/byte_slice_ref.rs
@ -1,23 +1,23 @@
 use std::borrow::Cow;

-use heed::{BytesDecode, BytesEncode};
+use heed::{BoxedError, BytesDecode, BytesEncode};

-/// A codec for values of type `&[u8]`. Unlike `ByteSlice`, its `EItem` and `DItem` associated
+/// A codec for values of type `&[u8]`. Unlike `Bytes`, its `EItem` and `DItem` associated
 /// types are equivalent (= `&'a [u8]`) and these values can reside within another structure.
-pub struct ByteSliceRefCodec;
+pub struct BytesRefCodec;

-impl<'a> BytesEncode<'a> for ByteSliceRefCodec {
+impl<'a> BytesEncode<'a> for BytesRefCodec {
    type EItem = &'a [u8];

-    fn bytes_encode(item: &'a Self::EItem) -> Option<Cow<'a, [u8]>> {
-        Some(Cow::Borrowed(item))
+    fn bytes_encode(item: &'a Self::EItem) -> Result<Cow<'a, [u8]>, BoxedError> {
+        Ok(Cow::Borrowed(item))
    }
 }

-impl<'a> BytesDecode<'a> for ByteSliceRefCodec {
+impl<'a> BytesDecode<'a> for BytesRefCodec {
    type DItem = &'a [u8];

-    fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
-        Some(bytes)
+    fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
+        Ok(bytes)
    }
 }
--- a/milli/src/heed_codec/facet/field_doc_id_facet_codec.rs
+++ b/milli/src/heed_codec/facet/field_doc_id_facet_codec.rs
@ -1,8 +1,9 @@
 use std::borrow::Cow;
 use std::marker::PhantomData;

-use heed::{BytesDecode, BytesEncode};
+use heed::{BoxedError, BytesDecode, BytesEncode};

+use crate::heed_codec::SliceTooShortError;
 use crate::{try_split_array_at, DocumentId, FieldId};

 pub struct FieldDocIdFacetCodec<C>(PhantomData<C>);
@ -13,16 +14,16 @@ where
 {
    type DItem = (FieldId, DocumentId, C::DItem);

-    fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
-        let (field_id_bytes, bytes) = try_split_array_at(bytes)?;
+    fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
+        let (field_id_bytes, bytes) = try_split_array_at(bytes).ok_or(SliceTooShortError)?;
        let field_id = u16::from_be_bytes(field_id_bytes);

-        let (document_id_bytes, bytes) = try_split_array_at(bytes)?;
+        let (document_id_bytes, bytes) = try_split_array_at(bytes).ok_or(SliceTooShortError)?;
        let document_id = u32::from_be_bytes(document_id_bytes);

        let value = C::bytes_decode(bytes)?;

-        Some((field_id, document_id, value))
+        Ok((field_id, document_id, value))
    }
 }

@ -32,13 +33,15 @@ where
 {
    type EItem = (FieldId, DocumentId, C::EItem);

-    fn bytes_encode((field_id, document_id, value): &'a Self::EItem) -> Option<Cow<[u8]>> {
+    fn bytes_encode(
+        (field_id, document_id, value): &'a Self::EItem,
+    ) -> Result<Cow<[u8]>, BoxedError> {
        let mut bytes = Vec::with_capacity(32);
        bytes.extend_from_slice(&field_id.to_be_bytes()); // 2 bytes
        bytes.extend_from_slice(&document_id.to_be_bytes()); // 4 bytes
        let value_bytes = C::bytes_encode(value)?;
        // variable length, if f64 -> 16 bytes, if string -> large, potentially
        bytes.extend_from_slice(&value_bytes);
-        Some(Cow::Owned(bytes))
+        Ok(Cow::Owned(bytes))
    }
 }
--- a/milli/src/heed_codec/facet/mod.rs
+++ b/milli/src/heed_codec/facet/mod.rs
@ -5,8 +5,8 @@ use std::borrow::Cow;
 use std::convert::TryFrom;
 use std::marker::PhantomData;

-use heed::types::{DecodeIgnore, OwnedType};
-use heed::{BytesDecode, BytesEncode};
+use heed::types::DecodeIgnore;
+use heed::{BoxedError, BytesDecode, BytesEncode};
 use roaring::RoaringBitmap;

 pub use self::field_doc_id_facet_codec::FieldDocIdFacetCodec;
@ -18,7 +18,7 @@ pub type FieldDocIdFacetF64Codec = FieldDocIdFacetCodec<OrderedF64Codec>;
 pub type FieldDocIdFacetStringCodec = FieldDocIdFacetCodec<StrRefCodec>;
 pub type FieldDocIdFacetIgnoreCodec = FieldDocIdFacetCodec<DecodeIgnore>;

-pub type FieldIdCodec = OwnedType<BEU16>;
+pub type FieldIdCodec = BEU16;

 /// Tries to split a slice in half at the given middle point,
 /// `None` if the slice is too short.
@ -58,7 +58,7 @@ where
 {
    type EItem = FacetGroupKey<T::EItem>;

-    fn bytes_encode(value: &'a Self::EItem) -> Option<Cow<'a, [u8]>> {
+    fn bytes_encode(value: &'a Self::EItem) -> Result<Cow<'a, [u8]>, BoxedError> {
        let mut v = vec![];
        v.extend_from_slice(&value.field_id.to_be_bytes());
        v.extend_from_slice(&[value.level]);
@ -66,7 +66,7 @@ where
        let bound = T::bytes_encode(&value.left_bound)?;
        v.extend_from_slice(&bound);

-        Some(Cow::Owned(v))
+        Ok(Cow::Owned(v))
    }
 }
 impl<'a, T> heed::BytesDecode<'a> for FacetGroupKeyCodec<T>
@ -75,11 +75,11 @@ where
 {
    type DItem = FacetGroupKey<T::DItem>;

-    fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
-        let fid = u16::from_be_bytes(<[u8; 2]>::try_from(&bytes[0..=1]).ok()?);
+    fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
+        let fid = u16::from_be_bytes(<[u8; 2]>::try_from(&bytes[0..=1])?);
        let level = bytes[2];
        let bound = T::bytes_decode(&bytes[3..])?;
-        Some(FacetGroupKey { field_id: fid, level, left_bound: bound })
+        Ok(FacetGroupKey { field_id: fid, level, left_bound: bound })
    }
 }

@ -87,17 +87,17 @@ pub struct FacetGroupValueCodec;
 impl<'a> heed::BytesEncode<'a> for FacetGroupValueCodec {
    type EItem = FacetGroupValue;

-    fn bytes_encode(value: &'a Self::EItem) -> Option<Cow<'a, [u8]>> {
+    fn bytes_encode(value: &'a Self::EItem) -> Result<Cow<'a, [u8]>, BoxedError> {
        let mut v = vec![value.size];
        CboRoaringBitmapCodec::serialize_into(&value.bitmap, &mut v);
-        Some(Cow::Owned(v))
+        Ok(Cow::Owned(v))
    }
 }
 impl<'a> heed::BytesDecode<'a> for FacetGroupValueCodec {
    type DItem = FacetGroupValue;
-    fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
+    fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
        let size = bytes[0];
-        let bitmap = CboRoaringBitmapCodec::deserialize_from(&bytes[1..]).ok()?;
-        Some(FacetGroupValue { size, bitmap })
+        let bitmap = CboRoaringBitmapCodec::deserialize_from(&bytes[1..])?;
+        Ok(FacetGroupValue { size, bitmap })
    }
 }
--- a/Show More
+++ b/Show More