Experiments with Quentin

Merge #4762
4762: Add search benchmarks r=Kerollmops a=dureuill # Pull Request ## What does this PR do? - [x] Modifies `xtask bench` so that workloads support an optional `target` argument. `target` defaults to `indexing::=trace` - [x] Refactor the spans in the search to offer finer profiling granularity - [x] Add search workloads - [x] Updates documentation in `BENCHMARKS.md` Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2025-11-30 01:35:36 +00:00 · 2024-07-15 16:20:02 +02:00 · 2024-07-03 08:39:29 +00:00 · 2024-07-02 16:13:54 +02:00 · 2024-07-02 16:13:54 +02:00 · 2024-07-02 16:13:53 +02:00
77 changed files with 2655 additions and 317 deletions
--- a/.github/workflows/bench-manual.yml
+++ b/.github/workflows/bench-manual.yml
@@ -18,11 +18,9 @@ jobs:
        timeout-minutes: 180 # 3h
        steps:
            - uses: actions/checkout@v3
-            - uses: actions-rs/toolchain@v1
+            - uses: helix-editor/rust-toolchain@v1
              with:
                profile: minimal
-                toolchain: stable
-                override: true

            - name: Run benchmarks - workload ${WORKLOAD_NAME} - branch ${{ github.ref }} - commit ${{ github.sha }}
              run: |
--- a/.github/workflows/bench-pr.yml
+++ b/.github/workflows/bench-pr.yml
@@ -35,11 +35,9 @@ jobs:
            fetch-depth: 0 # fetch full history to be able to get main commit sha
            ref: ${{ steps.comment-branch.outputs.head_ref }}

-        - uses: actions-rs/toolchain@v1
+        - uses: helix-editor/rust-toolchain@v1
          with:
            profile: minimal
-            toolchain: stable
-            override: true

        - name: Run benchmarks on PR ${{ github.event.issue.id }}
          run: |
--- a/.github/workflows/bench-push-indexing.yml
+++ b/.github/workflows/bench-push-indexing.yml
@@ -12,11 +12,9 @@ jobs:
        timeout-minutes: 180 # 3h
        steps:
          - uses: actions/checkout@v3
-          - uses: actions-rs/toolchain@v1
+          - uses: helix-editor/rust-toolchain@v1
            with:
              profile: minimal
-              toolchain: stable
-              override: true

          # Run benchmarks
          - name: Run benchmarks - Dataset ${BENCH_NAME} - Branch main - Commit ${{ github.sha }}
--- a/.github/workflows/benchmarks-manual.yml
+++ b/.github/workflows/benchmarks-manual.yml
@@ -18,11 +18,9 @@ jobs:
    timeout-minutes: 4320 # 72h
    steps:
      - uses: actions/checkout@v3
-      - uses: actions-rs/toolchain@v1
+      - uses: helix-editor/rust-toolchain@v1
        with:
          profile: minimal
-          toolchain: stable
-          override: true

      # Set variables
      - name: Set current branch name
--- a/.github/workflows/benchmarks-pr.yml
+++ b/.github/workflows/benchmarks-pr.yml
@@ -13,11 +13,9 @@ jobs:
    runs-on: benchmarks
    timeout-minutes: 4320 # 72h
    steps:
-      - uses: actions-rs/toolchain@v1
+      - uses: helix-editor/rust-toolchain@v1
        with:
          profile: minimal
-          toolchain: stable
-          override: true

      - name: Check for Command
        id: command
--- a/.github/workflows/benchmarks-push-indexing.yml
+++ b/.github/workflows/benchmarks-push-indexing.yml
@@ -16,11 +16,9 @@ jobs:
    timeout-minutes: 4320 # 72h
    steps:
      - uses: actions/checkout@v3
-      - uses: actions-rs/toolchain@v1
+      - uses: helix-editor/rust-toolchain@v1
        with:
          profile: minimal
-          toolchain: stable
-          override: true

      # Set variables
      - name: Set current branch name
--- a/.github/workflows/benchmarks-push-search-geo.yml
+++ b/.github/workflows/benchmarks-push-search-geo.yml
@@ -15,11 +15,9 @@ jobs:
    runs-on: benchmarks
    steps:
      - uses: actions/checkout@v3
-      - uses: actions-rs/toolchain@v1
+      - uses: helix-editor/rust-toolchain@v1
        with:
          profile: minimal
-          toolchain: stable
-          override: true

      # Set variables
      - name: Set current branch name
--- a/.github/workflows/benchmarks-push-search-songs.yml
+++ b/.github/workflows/benchmarks-push-search-songs.yml
@@ -15,11 +15,9 @@ jobs:
    runs-on: benchmarks
    steps:
      - uses: actions/checkout@v3
-      - uses: actions-rs/toolchain@v1
+      - uses: helix-editor/rust-toolchain@v1
        with:
          profile: minimal
-          toolchain: stable
-          override: true

      # Set variables
      - name: Set current branch name
--- a/.github/workflows/benchmarks-push-search-wiki.yml
+++ b/.github/workflows/benchmarks-push-search-wiki.yml
@@ -15,11 +15,9 @@ jobs:
    runs-on: benchmarks
    steps:
      - uses: actions/checkout@v3
-      - uses: actions-rs/toolchain@v1
+      - uses: helix-editor/rust-toolchain@v1
        with:
          profile: minimal
-          toolchain: stable
-          override: true

      # Set variables
      - name: Set current branch name
--- a/.github/workflows/flaky-tests.yml
+++ b/.github/workflows/flaky-tests.yml
@@ -16,10 +16,7 @@ jobs:
      run: |
        apt-get update && apt-get install -y curl
        apt-get install build-essential -y
-    - uses: actions-rs/toolchain@v1
-      with:
-        toolchain: stable
-        override: true
+    - uses: helix-editor/rust-toolchain@v1
    - name: Install cargo-flaky
      run: cargo install cargo-flaky
    - name: Run cargo flaky in the dumps
--- a/.github/workflows/fuzzer-indexing.yml
+++ b/.github/workflows/fuzzer-indexing.yml
@@ -12,11 +12,9 @@ jobs:
    timeout-minutes: 4320 # 72h
    steps:
      - uses: actions/checkout@v3
-      - uses: actions-rs/toolchain@v1
+      - uses: helix-editor/rust-toolchain@v1
        with:
          profile: minimal
-          toolchain: stable
-          override: true

      # Run benchmarks
      - name: Run the fuzzer
--- a/.github/workflows/publish-apt-brew-pkg.yml
+++ b/.github/workflows/publish-apt-brew-pkg.yml
@@ -25,10 +25,7 @@ jobs:
      run: |
        apt-get update && apt-get install -y curl
        apt-get install build-essential -y
-    - uses: actions-rs/toolchain@v1
-      with:
-        toolchain: stable
-        override: true
+    - uses: helix-editor/rust-toolchain@v1
    - name: Install cargo-deb
      run: cargo install cargo-deb
    - uses: actions/checkout@v3
--- a/.github/workflows/publish-binaries.yml
+++ b/.github/workflows/publish-binaries.yml
@@ -45,10 +45,7 @@ jobs:
      run: |
        apt-get update && apt-get install -y curl
        apt-get install build-essential -y
-    - uses: actions-rs/toolchain@v1
-      with:
-        toolchain: stable
-        override: true
+    - uses: helix-editor/rust-toolchain@v1
    - name: Build
      run: cargo build --release --locked
    # No need to upload binaries for dry run (cron)
@@ -78,10 +75,7 @@ jobs:
            asset_name: meilisearch-windows-amd64.exe
    steps:
    - uses: actions/checkout@v3
-    - uses: actions-rs/toolchain@v1
-      with:
-        toolchain: stable
-        override: true
+    - uses: helix-editor/rust-toolchain@v1
    - name: Build
      run: cargo build --release --locked
    # No need to upload binaries for dry run (cron)
@@ -107,12 +101,10 @@ jobs:
      - name: Checkout repository
        uses: actions/checkout@v3
      - name: Installing Rust toolchain
-        uses: actions-rs/toolchain@v1
+        uses: helix-editor/rust-toolchain@v1
        with:
-          toolchain: stable
          profile: minimal
          target: ${{ matrix.target }}
-          override: true
      - name: Cargo build
        uses: actions-rs/cargo@v1
        with:
@@ -154,12 +146,10 @@ jobs:
          add-apt-repository "deb [arch=$(dpkg --print-architecture)] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable"
          apt-get update -y && apt-get install -y docker-ce
      - name: Installing Rust toolchain
-        uses: actions-rs/toolchain@v1
+        uses: helix-editor/rust-toolchain@v1
        with:
-          toolchain: stable
          profile: minimal
          target: ${{ matrix.target }}
-          override: true
      - name: Configure target aarch64 GNU
        ## Environment variable is not passed using env:
        ## LD gold won't work with MUSL
--- a/.github/workflows/publish-docker-images.yml
+++ b/.github/workflows/publish-docker-images.yml
@@ -80,10 +80,11 @@ jobs:
            type=ref,event=tag
            type=raw,value=nightly,enable=${{ github.event_name != 'push' }}
            type=semver,pattern=v{{major}}.{{minor}},enable=${{ steps.check-tag-format.outputs.stable == 'true' }}
+            type=semver,pattern=v{{major}},enable=${{ steps.check-tag-format.outputs.stable == 'true' }}
            type=raw,value=latest,enable=${{ steps.check-tag-format.outputs.stable == 'true' && steps.check-tag-format.outputs.latest == 'true' }}

      - name: Build and push
-        uses: docker/build-push-action@v5
+        uses: docker/build-push-action@v6
        with:
          push: true
          platforms: linux/amd64,linux/arm64
--- a/.github/workflows/test-suite.yml
+++ b/.github/workflows/test-suite.yml
@@ -31,10 +31,7 @@ jobs:
          apt-get update && apt-get install -y curl
          apt-get install build-essential -y
      - name: Setup test with Rust stable
-        uses: actions-rs/toolchain@v1
-        with:
-          toolchain: stable
-          override: true
+        uses: helix-editor/rust-toolchain@v1
      - name: Cache dependencies
        uses: Swatinem/rust-cache@v2.7.1
      - name: Run cargo check without any default features
@@ -59,10 +56,7 @@ jobs:
      - uses: actions/checkout@v3
      - name: Cache dependencies
        uses: Swatinem/rust-cache@v2.7.1
-      - uses: actions-rs/toolchain@v1
-        with:
-          toolchain: stable
-          override: true
+      - uses: helix-editor/rust-toolchain@v1
      - name: Run cargo check without any default features
        uses: actions-rs/cargo@v1
        with:
@@ -87,10 +81,7 @@ jobs:
        run: |
          apt-get update
          apt-get install --assume-yes build-essential curl
-      - uses: actions-rs/toolchain@v1
-        with:
-          toolchain: stable
-          override: true
+      - uses: helix-editor/rust-toolchain@v1
      - name: Run cargo build with almost all features
        run: |
          cargo build --workspace --locked --release --features "$(cargo xtask list-features --exclude-feature cuda)"
@@ -110,10 +101,7 @@ jobs:
        run: |
          apt-get update
          apt-get install --assume-yes build-essential curl
-      - uses: actions-rs/toolchain@v1
-        with:
-          toolchain: stable
-          override: true
+      - uses: helix-editor/rust-toolchain@v1
      - name: Run cargo tree without default features and check lindera is not present
        run: |
          if cargo tree -f '{p} {f}' -e normal --no-default-features | grep -qz lindera; then
@@ -137,10 +125,7 @@ jobs:
        run: |
          apt-get update && apt-get install -y curl
          apt-get install build-essential -y
-      - uses: actions-rs/toolchain@v1
-        with:
-          toolchain: stable
-          override: true
+      - uses: helix-editor/rust-toolchain@v1
      - name: Cache dependencies
        uses: Swatinem/rust-cache@v2.7.1
      - name: Run tests in debug
@@ -154,11 +139,9 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v3
-      - uses: actions-rs/toolchain@v1
+      - uses: helix-editor/rust-toolchain@v1
        with:
          profile: minimal
-          toolchain: 1.75.0
-          override: true
          components: clippy
      - name: Cache dependencies
        uses: Swatinem/rust-cache@v2.7.1
@@ -173,10 +156,10 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v3
-      - uses: actions-rs/toolchain@v1
+      - uses: helix-editor/rust-toolchain@v1
        with:
          profile: minimal
-          toolchain: nightly
+          toolchain: nightly-2024-06-25
          override: true
          components: rustfmt
      - name: Cache dependencies
--- a/.github/workflows/update-cargo-toml-version.yml
+++ b/.github/workflows/update-cargo-toml-version.yml
@@ -18,11 +18,9 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v3
-      - uses: actions-rs/toolchain@v1
+      - uses: helix-editor/rust-toolchain@v1
        with:
          profile: minimal
-          toolchain: stable
-          override: true
      - name: Install sd
        run: cargo install sd
      - name: Update Cargo.toml file
--- a/BENCHMARKS.md
+++ b/BENCHMARKS.md
@@ -109,6 +109,12 @@ They are JSON files with the following structure (comments are not actually supp
  "run_count": 3,
  // List of arguments to add to the Meilisearch command line.
  "extra_cli_args": ["--max-indexing-threads=1"],
+  // An expression that can be parsed as a comma-separated list of targets and levels
+  // as described in [tracing_subscriber's documentation](https://docs.rs/tracing-subscriber/latest/tracing_subscriber/filter/targets/struct.Targets.html#examples).
+  // The expression is used to filter the spans that are measured for profiling purposes.
+  // Optional, defaults to "indexing::=trace" (for indexing workloads), common other values is
+  // "search::=trace"
+  "target": "indexing::=trace",
  // List of named assets that can be used in the commands.
  "assets": {
    // name of the asset.
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -381,9 +381,9 @@ dependencies = [

 [[package]]
 name = "arroy"
-version = "0.3.1"
+version = "0.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "73897699bf04bac935c0b120990d2a511e91e563e0f9769f9c8bb983d98dfbc9"
+checksum = "2ece9e5347e7fdaaea3181dec7f916677ad5f3fcbac183648ce1924eb4aeef9a"
 dependencies = [
 "bytemuck",
 "byteorder",
@@ -679,9 +679,9 @@ checksum = "2c676a478f63e9fa2dd5368a42f28bba0d6c560b775f38583c8bbaa7fcd67c9c"

 [[package]]
 name = "bytemuck"
-version = "1.15.0"
+version = "1.16.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5d6d68c57235a3a081186990eca2867354726650f42f7516ca50c28d6281fd15"
+checksum = "b236fc92302c97ed75b38da1f4917b5cdda4984745740f153a5d3059e48d725e"
 dependencies = [
 "bytemuck_derive",
 ]
@@ -2191,7 +2191,6 @@ dependencies = [
 "bytemuck",
 "byteorder",
 "rayon",
- "tempfile",
 ]

 [[package]]
@@ -2273,9 +2272,9 @@ checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"

 [[package]]
 name = "heed"
-version = "0.20.1"
+version = "0.20.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6f7acb9683d7c7068aa46d47557bfa4e35a277964b350d9504a87b03610163fd"
+checksum = "f60d7cff16094be9627830b399c087a25017e93fb3768b87cd656a68ccb1ebe8"
 dependencies = [
 "bitflags 2.5.0",
 "byteorder",
@@ -3172,9 +3171,9 @@ checksum = "f9d642685b028806386b2b6e75685faadd3eb65a85fff7df711ce18446a422da"

 [[package]]
 name = "lmdb-master-sys"
-version = "0.2.0"
+version = "0.2.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dc9048db3a58c0732d7236abc4909058f9d2708cfb6d7d047eb895fddec6419a"
+checksum = "a5142795c220effa4c8f4813537bd4c88113a07e45e93100ccb2adc5cec6c7f3"
 dependencies = [
 "cc",
 "doxygen-rs",
@@ -5053,18 +5052,18 @@ dependencies = [

 [[package]]
 name = "thiserror"
-version = "1.0.58"
+version = "1.0.61"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "03468839009160513471e86a034bb2c5c0e4baae3b43f79ffc55c4a5427b3297"
+checksum = "c546c80d6be4bc6a00c0f01730c08df82eaa7a7a61f11d656526506112cc1709"
 dependencies = [
 "thiserror-impl",
 ]

 [[package]]
 name = "thiserror-impl"
-version = "1.0.58"
+version = "1.0.61"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c61f3ba182994efc43764a46c018c347bc492c79f024e705f46567b418f6d4f7"
+checksum = "46c3384250002a6d5af4d114f2845d37b57521033f30d5c3f46c4d70e1197533"
 dependencies = [
 "proc-macro2",
 "quote",
@@ -6080,12 +6079,13 @@ dependencies = [

 [[package]]
 name = "yaup"
-version = "0.2.1"
+version = "0.3.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a59e7d27bed43f7c37c25df5192ea9d435a8092a902e02203359ac9ce3e429d9"
+checksum = "b0144f1a16a199846cb21024da74edd930b43443463292f536b7110b4855b5c6"
 dependencies = [
+ "form_urlencoded",
 "serde",
- "url",
+ "thiserror",
 ]

 [[package]]
--- a/README.md
+++ b/README.md
@@ -25,7 +25,7 @@

 <p align="center">⚡ A lightning-fast search engine that fits effortlessly into your apps, websites, and workflow 🔍</p>

-[Meilisearch](https://www.meilisearch.com) helps you shape a delightful search experience in a snap, offering features that work out of the box to speed up your workflow.
+[Meilisearch](https://www.meilisearch.com?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=intro) helps you shape a delightful search experience in a snap, offering features that work out of the box to speed up your workflow.

 <p align="center" name="demo">
  <a href="https://where2watch.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demo-gif#gh-light-mode-only" target="_blank">
@@ -36,11 +36,18 @@
  </a>
 </p>

-🔥 [**Try it!**](https://where2watch.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demo-link) 🔥
+## 🖥 Examples
+
+- [**Movies**](https://where2watch.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=organization) — An application to help you find streaming platforms to watch movies using [hybrid search](https://www.meilisearch.com/solutions/hybrid-search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demos).
+- [**Ecommerce**](https://ecommerce.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demos) — Ecommerce website using disjunctive [facets](https://www.meilisearch.com/docs/learn/fine_tuning_results/faceted_search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demos), range and rating filtering, and pagination.
+- [**Songs**](https://music.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demos) — Search through 47 million of songs.
+- [**SaaS**](https://saas.meilisearch.com/?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demos) — Search for contacts, deals, and companies in this [multi-tenant](https://www.meilisearch.com/docs/learn/security/multitenancy_tenant_tokens?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=demos) CRM application.
+
+See the list of all our example apps in our [demos repository](https://github.com/meilisearch/demos).

 ## ✨ Features
- **Hybrid search:** Combine the best of both [semantic](https://www.meilisearch.com/docs/learn/experimental/vector_search) & full-text search to get the most relevant results
- **Search-as-you-type:** find & display results in less than 50 milliseconds to provide an intuitive experience
+- **Hybrid search:** Combine the best of both [semantic](https://www.meilisearch.com/docs/learn/experimental/vector_search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features) & full-text search to get the most relevant results
+- **Search-as-you-type:** Find & display results in less than 50 milliseconds to provide an intuitive experience
 - **[Typo tolerance](https://www.meilisearch.com/docs/learn/configuration/typo_tolerance?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** get relevant matches even when queries contain typos and misspellings
 - **[Filtering](https://www.meilisearch.com/docs/learn/fine_tuning_results/filtering?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features) and [faceted search](https://www.meilisearch.com/docs/learn/fine_tuning_results/faceted_search?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** enhance your users' search experience with custom filters and build a faceted search interface in a few lines of code
 - **[Sorting](https://www.meilisearch.com/docs/learn/fine_tuning_results/sorting?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=features):** sort results based on price, date, or pretty much anything else your users need
@@ -59,7 +66,7 @@ You can consult Meilisearch's documentation at [meilisearch.com/docs](https://ww

 ## 🚀 Getting started

-For basic instructions on how to set up Meilisearch, add documents to an index, and search for documents, take a look at our [Quick Start](https://www.meilisearch.com/docs/learn/getting_started/quick_start?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=get-started) guide.
+For basic instructions on how to set up Meilisearch, add documents to an index, and search for documents, take a look at our [documentation](https://www.meilisearch.com/docs?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=get-started) guide.

 ## 🌍 Supercharge your Meilisearch experience

@@ -83,7 +90,7 @@ Finally, for more in-depth information, refer to our articles explaining fundame

 ## 📊 Telemetry

-Meilisearch collects **anonymized** data from users to help us improve our product. You can [deactivate this](https://www.meilisearch.com/docs/learn/what_is_meilisearch/telemetry?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=telemetry#how-to-disable-data-collection) whenever you want.
+Meilisearch collects **anonymized** user data to help us improve our product. You can [deactivate this](https://www.meilisearch.com/docs/learn/what_is_meilisearch/telemetry?utm_campaign=oss&utm_source=github&utm_medium=meilisearch&utm_content=telemetry#how-to-disable-data-collection) whenever you want.

 To request deletion of collected data, please write to us at [privacy@meilisearch.com](mailto:privacy@meilisearch.com). Remember to include your `Instance UID` in the message, as this helps us quickly find and delete your data.

@@ -105,11 +112,11 @@ Thank you for your support!

 ## 👩‍💻 Contributing

-Meilisearch is, and will always be, open-source! If you want to contribute to the project, please take a look at [our contribution guidelines](CONTRIBUTING.md).
+Meilisearch is, and will always be, open-source! If you want to contribute to the project, please look at [our contribution guidelines](CONTRIBUTING.md).

 ## 📦 Versioning

-Meilisearch releases and their associated binaries are available [in this GitHub page](https://github.com/meilisearch/meilisearch/releases).
+Meilisearch releases and their associated binaries are available on the project's [releases page](https://github.com/meilisearch/meilisearch/releases).

 The binaries are versioned following [SemVer conventions](https://semver.org/). To know more, read our [versioning policy](https://github.com/meilisearch/engine-team/blob/main/resources/versioning-policy.md).

--- a/index-scheduler/Cargo.toml
+++ b/index-scheduler/Cargo.toml
@@ -40,7 +40,7 @@ ureq = "2.9.7"
 uuid = { version = "1.6.1", features = ["serde", "v4"] }

 [dev-dependencies]
-arroy = "0.3.1"
+arroy = "0.4.0"
 big_s = "1.0.2"
 crossbeam = "0.8.4"
 insta = { version = "1.34.0", features = ["json", "redactions"] }
--- a/index-scheduler/src/lib.rs
+++ b/index-scheduler/src/lib.rs
@@ -1811,7 +1811,7 @@ mod tests {
                task_db_size: 1000 * 1000, // 1 MB, we don't use MiB on purpose.
                index_base_map_size: 1000 * 1000, // 1 MB, we don't use MiB on purpose.
                enable_mdb_writemap: false,
-                index_growth_amount: 1000 * 1000, // 1 MB
+                index_growth_amount: 1000 * 1000 * 1000 * 1000, // 1 TB
                index_count: 5,
                indexer_config,
                autobatching_enabled: true,
@@ -5396,7 +5396,7 @@ mod tests {
            let reader = arroy::Reader::open(&rtxn, i as u16, index.vector_arroy)
                .map(Some)
                .or_else(|e| match e {
-                    arroy::Error::MissingMetadata => Ok(None),
+                    arroy::Error::MissingMetadata(_) => Ok(None),
                    e => Err(e),
                })
                .transpose();
--- a/meilisearch-auth/src/lib.rs
+++ b/meilisearch-auth/src/lib.rs
@@ -188,6 +188,12 @@ impl AuthFilter {
        self.allow_index_creation && self.is_index_authorized(index)
    }

+    #[inline]
+    /// Return true if a tenant token was used to generate the search rules.
+    pub fn is_tenant_token(&self) -> bool {
+        self.search_rules.is_some()
+    }
+
    pub fn with_allowed_indexes(allowed_indexes: HashSet<IndexUidPattern>) -> Self {
        Self {
            search_rules: None,
@@ -205,6 +211,7 @@ impl AuthFilter {
                .unwrap_or(true)
    }

+    /// Check if the index is authorized by the API key and the tenant token.
    pub fn is_index_authorized(&self, index: &str) -> bool {
        self.key_authorized_indexes.is_index_authorized(index)
            && self
@@ -214,6 +221,44 @@ impl AuthFilter {
                .unwrap_or(true)
    }

+    /// Only check if the index is authorized by the API key
+    pub fn api_key_is_index_authorized(&self, index: &str) -> bool {
+        self.key_authorized_indexes.is_index_authorized(index)
+    }
+
+    /// Only check if the index is authorized by the tenant token
+    pub fn tenant_token_is_index_authorized(&self, index: &str) -> bool {
+        self.search_rules
+            .as_ref()
+            .map(|search_rules| search_rules.is_index_authorized(index))
+            .unwrap_or(true)
+    }
+
+    /// Return the list of authorized indexes by the tenant token if any
+    pub fn tenant_token_list_index_authorized(&self) -> Vec<String> {
+        match self.search_rules {
+            Some(ref search_rules) => {
+                let mut indexes: Vec<_> = match search_rules {
+                    SearchRules::Set(set) => set.iter().map(|s| s.to_string()).collect(),
+                    SearchRules::Map(map) => map.keys().map(|s| s.to_string()).collect(),
+                };
+                indexes.sort_unstable();
+                indexes
+            }
+            None => Vec::new(),
+        }
+    }
+
+    /// Return the list of authorized indexes by the api key if any
+    pub fn api_key_list_index_authorized(&self) -> Vec<String> {
+        let mut indexes: Vec<_> = match self.key_authorized_indexes {
+            SearchRules::Set(ref set) => set.iter().map(|s| s.to_string()).collect(),
+            SearchRules::Map(ref map) => map.keys().map(|s| s.to_string()).collect(),
+        };
+        indexes.sort_unstable();
+        indexes
+    }
+
    pub fn get_index_search_rules(&self, index: &str) -> Option<IndexSearchRules> {
        if !self.is_index_authorized(index) {
            return None;
--- a/meilisearch-types/Cargo.toml
+++ b/meilisearch-types/Cargo.toml
@@ -54,6 +54,8 @@ chinese-pinyin = ["milli/chinese-pinyin"]
 hebrew = ["milli/hebrew"]
 # japanese specialized tokenization
 japanese = ["milli/japanese"]
+# korean specialized tokenization
+korean = ["milli/korean"]
 # thai specialized tokenization
 thai = ["milli/thai"]
 # allow greek specialized tokenization
--- a/meilisearch-types/src/error.rs
+++ b/meilisearch-types/src/error.rs
@@ -398,7 +398,8 @@ impl ErrorCode for milli::Error {
                    UserError::CriterionError(_) => Code::InvalidSettingsRankingRules,
                    UserError::InvalidGeoField { .. } => Code::InvalidDocumentGeoField,
                    UserError::InvalidVectorDimensions { .. } => Code::InvalidVectorDimensions,
-                    UserError::InvalidVectorsMapType { .. } => Code::InvalidVectorsType,
+                    UserError::InvalidVectorsMapType { .. }
+                    | UserError::InvalidVectorsEmbedderConf { .. } => Code::InvalidVectorsType,
                    UserError::TooManyVectors(_, _) => Code::TooManyVectors,
                    UserError::SortError(_) => Code::InvalidSearchSort,
                    UserError::InvalidMinTypoWordLenSetting(_, _) => {
--- a/meilisearch/Cargo.toml
+++ b/meilisearch/Cargo.toml
@@ -98,7 +98,6 @@ tokio-stream = "0.1.14"
 toml = "0.8.8"
 uuid = { version = "1.6.1", features = ["serde", "v4"] }
 walkdir = "2.4.0"
-yaup = "0.2.1"
 serde_urlencoded = "0.7.1"
 termcolor = "1.4.1"
 url = { version = "2.5.0", features = ["serde"] }
@@ -118,7 +117,7 @@ maplit = "1.0.2"
 meili-snap = { path = "../meili-snap" }
 temp-env = "0.3.6"
 urlencoding = "2.1.3"
-yaup = "0.2.1"
+yaup = "0.3.1"

 [build-dependencies]
 anyhow = { version = "1.0.79", optional = true }
@@ -151,6 +150,7 @@ chinese = ["meilisearch-types/chinese"]
 chinese-pinyin = ["meilisearch-types/chinese-pinyin"]
 hebrew = ["meilisearch-types/hebrew"]
 japanese = ["meilisearch-types/japanese"]
+korean = ["meilisearch-types/korean"]
 thai = ["meilisearch-types/thai"]
 greek = ["meilisearch-types/greek"]
 khmer = ["meilisearch-types/khmer"]
@@ -158,5 +158,5 @@ vietnamese = ["meilisearch-types/vietnamese"]
 swedish-recomposition = ["meilisearch-types/swedish-recomposition"]

 [package.metadata.mini-dashboard]
-assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.13/build.zip"
-sha1 = "e20cc9b390003c6c844f4b8bcc5c5013191a77ff"
+assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.14/build.zip"
+sha1 = "592d1b5a3459d621d0aae1dded8fe3154f5c38fe"
--- a/meilisearch/src/error.rs
+++ b/meilisearch/src/error.rs
@@ -98,14 +98,29 @@ impl From<MeilisearchHttpError> for aweb::Error {

 impl From<aweb::error::PayloadError> for MeilisearchHttpError {
    fn from(error: aweb::error::PayloadError) -> Self {
-        MeilisearchHttpError::Payload(PayloadError::Payload(error))
+        match error {
+            aweb::error::PayloadError::Incomplete(_) => MeilisearchHttpError::Payload(
+                PayloadError::Payload(ActixPayloadError::IncompleteError),
+            ),
+            _ => MeilisearchHttpError::Payload(PayloadError::Payload(
+                ActixPayloadError::OtherError(error),
+            )),
+        }
    }
 }

+#[derive(Debug, thiserror::Error)]
+pub enum ActixPayloadError {
+    #[error("The provided payload is incomplete and cannot be parsed")]
+    IncompleteError,
+    #[error(transparent)]
+    OtherError(aweb::error::PayloadError),
+}
+
 #[derive(Debug, thiserror::Error)]
 pub enum PayloadError {
    #[error(transparent)]
-    Payload(aweb::error::PayloadError),
+    Payload(ActixPayloadError),
    #[error(transparent)]
    Json(JsonPayloadError),
    #[error(transparent)]
@@ -122,13 +137,15 @@ impl ErrorCode for PayloadError {
    fn error_code(&self) -> Code {
        match self {
            PayloadError::Payload(e) => match e {
-                aweb::error::PayloadError::Incomplete(_) => Code::Internal,
-                aweb::error::PayloadError::EncodingCorrupted => Code::Internal,
-                aweb::error::PayloadError::Overflow => Code::PayloadTooLarge,
-                aweb::error::PayloadError::UnknownLength => Code::Internal,
-                aweb::error::PayloadError::Http2Payload(_) => Code::Internal,
-                aweb::error::PayloadError::Io(_) => Code::Internal,
-                _ => todo!(),
+                ActixPayloadError::IncompleteError => Code::BadRequest,
+                ActixPayloadError::OtherError(error) => match error {
+                    aweb::error::PayloadError::EncodingCorrupted => Code::Internal,
+                    aweb::error::PayloadError::Overflow => Code::PayloadTooLarge,
+                    aweb::error::PayloadError::UnknownLength => Code::Internal,
+                    aweb::error::PayloadError::Http2Payload(_) => Code::Internal,
+                    aweb::error::PayloadError::Io(_) => Code::Internal,
+                    _ => todo!(),
+                },
            },
            PayloadError::Json(err) => match err {
                JsonPayloadError::Overflow { .. } => Code::PayloadTooLarge,
--- a/meilisearch/src/extractors/authentication/mod.rs
+++ b/meilisearch/src/extractors/authentication/mod.rs
@@ -12,6 +12,8 @@ use futures::Future;
 use meilisearch_auth::{AuthController, AuthFilter};
 use meilisearch_types::error::{Code, ResponseError};

+use self::policies::AuthError;
+
 pub struct GuardedData<P, D> {
    data: D,
    filters: AuthFilter,
@@ -35,12 +37,12 @@ impl<P, D> GuardedData<P, D> {
        let missing_master_key = auth.get_master_key().is_none();

        match Self::authenticate(auth, token, index).await? {
-            Some(filters) => match data {
+            Ok(filters) => match data {
                Some(data) => Ok(Self { data, filters, _marker: PhantomData }),
                None => Err(AuthenticationError::IrretrievableState.into()),
            },
-            None if missing_master_key => Err(AuthenticationError::MissingMasterKey.into()),
-            None => Err(AuthenticationError::InvalidToken.into()),
+            Err(_) if missing_master_key => Err(AuthenticationError::MissingMasterKey.into()),
+            Err(e) => Err(ResponseError::from_msg(e.to_string(), Code::InvalidApiKey)),
        }
    }

@@ -51,12 +53,12 @@ impl<P, D> GuardedData<P, D> {
        let missing_master_key = auth.get_master_key().is_none();

        match Self::authenticate(auth, String::new(), None).await? {
-            Some(filters) => match data {
+            Ok(filters) => match data {
                Some(data) => Ok(Self { data, filters, _marker: PhantomData }),
                None => Err(AuthenticationError::IrretrievableState.into()),
            },
-            None if missing_master_key => Err(AuthenticationError::MissingMasterKey.into()),
-            None => Err(AuthenticationError::MissingAuthorizationHeader.into()),
+            Err(_) if missing_master_key => Err(AuthenticationError::MissingMasterKey.into()),
+            Err(_) => Err(AuthenticationError::MissingAuthorizationHeader.into()),
        }
    }

@@ -64,7 +66,7 @@ impl<P, D> GuardedData<P, D> {
        auth: Data<AuthController>,
        token: String,
        index: Option<String>,
-    ) -> Result<Option<AuthFilter>, ResponseError>
+    ) -> Result<Result<AuthFilter, AuthError>, ResponseError>
    where
        P: Policy + 'static,
    {
@@ -127,13 +129,14 @@ pub trait Policy {
        auth: Data<AuthController>,
        token: &str,
        index: Option<&str>,
-    ) -> Option<AuthFilter>;
+    ) -> Result<AuthFilter, policies::AuthError>;
 }

 pub mod policies {
    use actix_web::web::Data;
    use jsonwebtoken::{decode, Algorithm, DecodingKey, Validation};
    use meilisearch_auth::{AuthController, AuthFilter, SearchRules};
+    use meilisearch_types::error::{Code, ErrorCode};
    // reexport actions in policies in order to be used in routes configuration.
    pub use meilisearch_types::keys::{actions, Action};
    use serde::{Deserialize, Serialize};
@@ -144,11 +147,53 @@ pub mod policies {

    enum TenantTokenOutcome {
        NotATenantToken,
-        Invalid,
-        Expired,
        Valid(Uuid, SearchRules),
    }

+    #[derive(thiserror::Error, Debug)]
+    pub enum AuthError {
+        #[error("Tenant token expired. Was valid up to `{exp}` and we're now `{now}`.")]
+        ExpiredTenantToken { exp: i64, now: i64 },
+        #[error("The provided API key is invalid.")]
+        InvalidApiKey,
+        #[error("The provided tenant token cannot acces the index `{index}`, allowed indexes are {allowed:?}.")]
+        TenantTokenAccessingnUnauthorizedIndex { index: String, allowed: Vec<String> },
+        #[error(
+            "The API key used to generate this tenant token cannot acces the index `{index}`."
+        )]
+        TenantTokenApiKeyAccessingnUnauthorizedIndex { index: String },
+        #[error(
+            "The API key cannot acces the index `{index}`, authorized indexes are {allowed:?}."
+        )]
+        ApiKeyAccessingnUnauthorizedIndex { index: String, allowed: Vec<String> },
+        #[error("The provided tenant token is invalid.")]
+        InvalidTenantToken,
+        #[error("Could not decode tenant token, {0}.")]
+        CouldNotDecodeTenantToken(jsonwebtoken::errors::Error),
+        #[error("Invalid action `{0}`.")]
+        InternalInvalidAction(u8),
+    }
+
+    impl From<jsonwebtoken::errors::Error> for AuthError {
+        fn from(error: jsonwebtoken::errors::Error) -> Self {
+            use jsonwebtoken::errors::ErrorKind;
+
+            match error.kind() {
+                ErrorKind::InvalidToken => AuthError::InvalidTenantToken,
+                _ => AuthError::CouldNotDecodeTenantToken(error),
+            }
+        }
+    }
+
+    impl ErrorCode for AuthError {
+        fn error_code(&self) -> Code {
+            match self {
+                AuthError::InternalInvalidAction(_) => Code::Internal,
+                _ => Code::InvalidApiKey,
+            }
+        }
+    }
+
    fn tenant_token_validation() -> Validation {
        let mut validation = Validation::default();
        validation.validate_exp = false;
@@ -158,15 +203,15 @@ pub mod policies {
    }

    /// Extracts the key id used to sign the payload, without performing any validation.
-    fn extract_key_id(token: &str) -> Option<Uuid> {
+    fn extract_key_id(token: &str) -> Result<Uuid, AuthError> {
        let mut validation = tenant_token_validation();
        validation.insecure_disable_signature_validation();
        let dummy_key = DecodingKey::from_secret(b"secret");
-        let token_data = decode::<Claims>(token, &dummy_key, &validation).ok()?;
+        let token_data = decode::<Claims>(token, &dummy_key, &validation)?;

        // get token fields without validating it.
        let Claims { api_key_uid, .. } = token_data.claims;
-        Some(api_key_uid)
+        Ok(api_key_uid)
    }

    fn is_keys_action(action: u8) -> bool {
@@ -187,76 +232,102 @@ pub mod policies {
            auth: Data<AuthController>,
            token: &str,
            index: Option<&str>,
-        ) -> Option<AuthFilter> {
+        ) -> Result<AuthFilter, AuthError> {
            // authenticate if token is the master key.
            // Without a master key, all routes are accessible except the key-related routes.
            if auth.get_master_key().map_or_else(|| !is_keys_action(A), |mk| mk == token) {
-                return Some(AuthFilter::default());
+                return Ok(AuthFilter::default());
            }

            let (key_uuid, search_rules) =
                match ActionPolicy::<A>::authenticate_tenant_token(&auth, token) {
-                    TenantTokenOutcome::Valid(key_uuid, search_rules) => {
+                    Ok(TenantTokenOutcome::Valid(key_uuid, search_rules)) => {
                        (key_uuid, Some(search_rules))
                    }
-                    TenantTokenOutcome::Expired => return None,
-                    TenantTokenOutcome::Invalid => return None,
-                    TenantTokenOutcome::NotATenantToken => {
-                        (auth.get_optional_uid_from_encoded_key(token.as_bytes()).ok()??, None)
-                    }
+                    Ok(TenantTokenOutcome::NotATenantToken)
+                    | Err(AuthError::InvalidTenantToken) => (
+                        auth.get_optional_uid_from_encoded_key(token.as_bytes())
+                            .map_err(|_e| AuthError::InvalidApiKey)?
+                            .ok_or(AuthError::InvalidApiKey)?,
+                        None,
+                    ),
+                    Err(e) => return Err(e),
                };

            // check that the indexes are allowed
-            let action = Action::from_repr(A)?;
-            let auth_filter = auth.get_key_filters(key_uuid, search_rules).ok()?;
-            if auth.is_key_authorized(key_uuid, action, index).unwrap_or(false)
-                && index.map(|index| auth_filter.is_index_authorized(index)).unwrap_or(true)
-            {
-                return Some(auth_filter);
+            let action = Action::from_repr(A).ok_or(AuthError::InternalInvalidAction(A))?;
+            let auth_filter = auth
+                .get_key_filters(key_uuid, search_rules)
+                .map_err(|_e| AuthError::InvalidApiKey)?;
+
+            // First check if the index is authorized in the tenant token, this is a public
+            // information, we can return a nice error message.
+            if let Some(index) = index {
+                if !auth_filter.tenant_token_is_index_authorized(index) {
+                    return Err(AuthError::TenantTokenAccessingnUnauthorizedIndex {
+                        index: index.to_string(),
+                        allowed: auth_filter.tenant_token_list_index_authorized(),
+                    });
+                }
+                if !auth_filter.api_key_is_index_authorized(index) {
+                    if auth_filter.is_tenant_token() {
+                        // If the error comes from a tenant token we cannot share the list
+                        // of authorized indexes in the API key. This is not public information.
+                        return Err(AuthError::TenantTokenApiKeyAccessingnUnauthorizedIndex {
+                            index: index.to_string(),
+                        });
+                    } else {
+                        // Otherwise we can share the list
+                        // of authorized indexes in the API key.
+                        return Err(AuthError::ApiKeyAccessingnUnauthorizedIndex {
+                            index: index.to_string(),
+                            allowed: auth_filter.api_key_list_index_authorized(),
+                        });
+                    }
+                }
+            }
+            if auth.is_key_authorized(key_uuid, action, index).unwrap_or(false) {
+                return Ok(auth_filter);
            }

-            None
+            Err(AuthError::InvalidApiKey)
        }
    }

    impl<const A: u8> ActionPolicy<A> {
-        fn authenticate_tenant_token(auth: &AuthController, token: &str) -> TenantTokenOutcome {
+        fn authenticate_tenant_token(
+            auth: &AuthController,
+            token: &str,
+        ) -> Result<TenantTokenOutcome, AuthError> {
            // Only search action can be accessed by a tenant token.
            if A != actions::SEARCH {
-                return TenantTokenOutcome::NotATenantToken;
+                return Ok(TenantTokenOutcome::NotATenantToken);
            }

-            let uid = if let Some(uid) = extract_key_id(token) {
-                uid
-            } else {
-                return TenantTokenOutcome::NotATenantToken;
-            };
+            let uid = extract_key_id(token)?;

            // Check if tenant token is valid.
            let key = if let Some(key) = auth.generate_key(uid) {
                key
            } else {
-                return TenantTokenOutcome::Invalid;
+                return Err(AuthError::InvalidTenantToken);
            };

-            let data = if let Ok(data) = decode::<Claims>(
+            let data = decode::<Claims>(
                token,
                &DecodingKey::from_secret(key.as_bytes()),
                &tenant_token_validation(),
-            ) {
-                data
-            } else {
-                return TenantTokenOutcome::Invalid;
-            };
+            )?;

            // Check if token is expired.
            if let Some(exp) = data.claims.exp {
-                if OffsetDateTime::now_utc().unix_timestamp() > exp {
-                    return TenantTokenOutcome::Expired;
+                let now = OffsetDateTime::now_utc().unix_timestamp();
+                if now > exp {
+                    return Err(AuthError::ExpiredTenantToken { exp, now });
                }
            }

-            TenantTokenOutcome::Valid(uid, data.claims.search_rules)
+            Ok(TenantTokenOutcome::Valid(uid, data.claims.search_rules))
        }
    }

--- a/meilisearch/src/search.rs
+++ b/meilisearch/src/search.rs
@@ -752,10 +752,15 @@ fn prepare_search<'t>(
        SearchKind::SemanticOnly { embedder_name, embedder } => {
            let vector = match query.vector.clone() {
                Some(vector) => vector,
-                None => embedder
-                    .embed_one(query.q.clone().unwrap())
-                    .map_err(milli::vector::Error::from)
-                    .map_err(milli::Error::from)?,
+                None => {
+                    let span = tracing::trace_span!(target: "search::vector", "embed_one");
+                    let _entered = span.enter();
+
+                    embedder
+                        .embed_one(query.q.clone().unwrap())
+                        .map_err(milli::vector::Error::from)
+                        .map_err(milli::Error::from)?
+                }
            };

            search.semantic(embedder_name.clone(), embedder.clone(), Some(vector));
@@ -1331,13 +1336,23 @@ fn insert_geo_distance(sorts: &[String], document: &mut Document) {
        // TODO: TAMO: milli encountered an internal error, what do we want to do?
        let base = [capture_group[1].parse().unwrap(), capture_group[2].parse().unwrap()];
        let geo_point = &document.get("_geo").unwrap_or(&json!(null));
-        if let Some((lat, lng)) = geo_point["lat"].as_f64().zip(geo_point["lng"].as_f64()) {
+        if let Some((lat, lng)) =
+            extract_geo_value(&geo_point["lat"]).zip(extract_geo_value(&geo_point["lng"]))
+        {
            let distance = milli::distance_between_two_points(&base, &[lat, lng]);
            document.insert("_geoDistance".to_string(), json!(distance.round() as usize));
        }
    }
 }

+fn extract_geo_value(value: &Value) -> Option<f64> {
+    match value {
+        Value::Number(n) => n.as_f64(),
+        Value::String(s) => s.parse().ok(),
+        _ => None,
+    }
+}
+
 fn compute_formatted_options(
    attr_to_highlight: &HashSet<String>,
    attr_to_crop: &[String],
@@ -1711,4 +1726,54 @@ mod test {
        insert_geo_distance(sorters, &mut document);
        assert_eq!(document.get("_geoDistance"), None);
    }
+
+    #[test]
+    fn test_insert_geo_distance_with_coords_as_string() {
+        let value: Document = serde_json::from_str(
+            r#"{
+              "_geo": {
+                "lat": "50",
+                "lng": 3
+              }
+            }"#,
+        )
+        .unwrap();
+
+        let sorters = &["_geoPoint(50,3):desc".to_string()];
+        let mut document = value.clone();
+        insert_geo_distance(sorters, &mut document);
+        assert_eq!(document.get("_geoDistance"), Some(&json!(0)));
+
+        let value: Document = serde_json::from_str(
+            r#"{
+              "_geo": {
+                "lat": "50",
+                "lng": "3"
+              },
+              "id": "1"
+            }"#,
+        )
+        .unwrap();
+
+        let sorters = &["_geoPoint(50,3):desc".to_string()];
+        let mut document = value.clone();
+        insert_geo_distance(sorters, &mut document);
+        assert_eq!(document.get("_geoDistance"), Some(&json!(0)));
+
+        let value: Document = serde_json::from_str(
+            r#"{
+              "_geo": {
+                "lat": 50,
+                "lng": "3"
+              },
+              "id": "1"
+            }"#,
+        )
+        .unwrap();
+
+        let sorters = &["_geoPoint(50,3):desc".to_string()];
+        let mut document = value.clone();
+        insert_geo_distance(sorters, &mut document);
+        assert_eq!(document.get("_geoDistance"), Some(&json!(0)));
+    }
 }
--- a/meilisearch/tests/auth/authorization.rs
+++ b/meilisearch/tests/auth/authorization.rs
@@ -78,7 +78,7 @@ pub static ALL_ACTIONS: Lazy<HashSet<&'static str>> = Lazy::new(|| {
 });

 static INVALID_RESPONSE: Lazy<Value> = Lazy::new(|| {
-    json!({"message": "The provided API key is invalid.",
+    json!({"message": null,
        "code": "invalid_api_key",
        "type": "auth",
        "link": "https://docs.meilisearch.com/errors#invalid_api_key"
@@ -119,7 +119,8 @@ async fn error_access_expired_key() {
    thread::sleep(time::Duration::new(1, 0));

    for (method, route) in AUTHORIZATIONS.keys() {
-        let (response, code) = server.dummy_request(method, route).await;
+        let (mut response, code) = server.dummy_request(method, route).await;
+        response["message"] = serde_json::json!(null);

        assert_eq!(response, INVALID_RESPONSE.clone(), "on route: {:?} - {:?}", method, route);
        assert_eq!(403, code, "{:?}", &response);
@@ -149,7 +150,8 @@ async fn error_access_unauthorized_index() {
        // filter `products` index routes
        .filter(|(_, route)| route.starts_with("/indexes/products"))
    {
-        let (response, code) = server.dummy_request(method, route).await;
+        let (mut response, code) = server.dummy_request(method, route).await;
+        response["message"] = serde_json::json!(null);

        assert_eq!(response, INVALID_RESPONSE.clone(), "on route: {:?} - {:?}", method, route);
        assert_eq!(403, code, "{:?}", &response);
@@ -176,7 +178,8 @@ async fn error_access_unauthorized_action() {

        let key = response["key"].as_str().unwrap();
        server.use_api_key(key);
-        let (response, code) = server.dummy_request(method, route).await;
+        let (mut response, code) = server.dummy_request(method, route).await;
+        response["message"] = serde_json::json!(null);

        assert_eq!(response, INVALID_RESPONSE.clone(), "on route: {:?} - {:?}", method, route);
        assert_eq!(403, code, "{:?}", &response);
@@ -280,7 +283,7 @@ async fn access_authorized_no_index_restriction() {
                route,
                action
            );
-            assert_ne!(code, 403);
+            assert_ne!(code, 403, "on route: {:?} - {:?} with action: {:?}", method, route, action);
        }
    }
 }
--- a/meilisearch/tests/auth/errors.rs
+++ b/meilisearch/tests/auth/errors.rs
@@ -1,7 +1,10 @@
+use actix_web::test;
+use http::StatusCode;
+use jsonwebtoken::{EncodingKey, Header};
 use meili_snap::*;
 use uuid::Uuid;

-use crate::common::Server;
+use crate::common::{Server, Value};
 use crate::json;

 #[actix_rt::test]
@@ -436,3 +439,262 @@ async fn patch_api_keys_unknown_field() {
    }
    "###);
 }
+
+async fn send_request_with_custom_auth(
+    app: impl actix_web::dev::Service<
+        actix_http::Request,
+        Response = actix_web::dev::ServiceResponse<impl actix_web::body::MessageBody>,
+        Error = actix_web::Error,
+    >,
+    url: &str,
+    auth: &str,
+) -> (Value, StatusCode) {
+    let req = test::TestRequest::get().uri(url).insert_header(("Authorization", auth)).to_request();
+    let res = test::call_service(&app, req).await;
+    let status_code = res.status();
+    let body = test::read_body(res).await;
+    let response: Value = serde_json::from_slice(&body).unwrap_or_default();
+
+    (response, status_code)
+}
+
+#[actix_rt::test]
+async fn invalid_auth_format() {
+    let server = Server::new_auth().await;
+    let app = server.init_web_app().await;
+
+    let req = test::TestRequest::get().uri("/indexes/dog/documents").to_request();
+    let res = test::call_service(&app, req).await;
+    let status_code = res.status();
+    let body = test::read_body(res).await;
+    let response: Value = serde_json::from_slice(&body).unwrap_or_default();
+    snapshot!(status_code, @"401 Unauthorized");
+    snapshot!(response, @r###"
+    {
+      "message": "The Authorization header is missing. It must use the bearer authorization method.",
+      "code": "missing_authorization_header",
+      "type": "auth",
+      "link": "https://docs.meilisearch.com/errors#missing_authorization_header"
+    }
+    "###);
+
+    let req = test::TestRequest::get().uri("/indexes/dog/documents").to_request();
+    let res = test::call_service(&app, req).await;
+    let status_code = res.status();
+    let body = test::read_body(res).await;
+    let response: Value = serde_json::from_slice(&body).unwrap_or_default();
+    snapshot!(status_code, @"401 Unauthorized");
+    snapshot!(response, @r###"
+    {
+      "message": "The Authorization header is missing. It must use the bearer authorization method.",
+      "code": "missing_authorization_header",
+      "type": "auth",
+      "link": "https://docs.meilisearch.com/errors#missing_authorization_header"
+    }
+    "###);
+
+    let (response, status_code) =
+        send_request_with_custom_auth(&app, "/indexes/dog/documents", "Bearer").await;
+    snapshot!(status_code, @"403 Forbidden");
+    snapshot!(response, @r###"
+    {
+      "message": "The provided API key is invalid.",
+      "code": "invalid_api_key",
+      "type": "auth",
+      "link": "https://docs.meilisearch.com/errors#invalid_api_key"
+    }
+    "###);
+}
+
+#[actix_rt::test]
+async fn invalid_api_key() {
+    let server = Server::new_auth().await;
+    let app = server.init_web_app().await;
+
+    let (response, status_code) =
+        send_request_with_custom_auth(&app, "/indexes/dog/search", "Bearer kefir").await;
+    snapshot!(status_code, @"403 Forbidden");
+    snapshot!(response, @r###"
+    {
+      "message": "The provided API key is invalid.",
+      "code": "invalid_api_key",
+      "type": "auth",
+      "link": "https://docs.meilisearch.com/errors#invalid_api_key"
+    }
+    "###);
+
+    let uuid = Uuid::nil();
+    let key = json!({ "actions": ["search"], "indexes": ["dog"], "expiresAt": null, "uid": uuid.to_string() });
+    let req = test::TestRequest::post()
+        .uri("/keys")
+        .insert_header(("Authorization", "Bearer MASTER_KEY"))
+        .set_json(&key)
+        .to_request();
+    let res = test::call_service(&app, req).await;
+    let body = test::read_body(res).await;
+    let response: Value = serde_json::from_slice(&body).unwrap_or_default();
+    snapshot!(json_string!(response, { ".createdAt" => "[date]",  ".updatedAt" => "[date]" }), @r###"
+    {
+      "name": null,
+      "description": null,
+      "key": "aeb94973e0b6e912d94165430bbe87dee91a7c4f891ce19050c3910ec96977e9",
+      "uid": "00000000-0000-0000-0000-000000000000",
+      "actions": [
+        "search"
+      ],
+      "indexes": [
+        "dog"
+      ],
+      "expiresAt": null,
+      "createdAt": "[date]",
+      "updatedAt": "[date]"
+    }
+    "###);
+    let key = response["key"].as_str().unwrap();
+
+    let (response, status_code) =
+        send_request_with_custom_auth(&app, "/indexes/doggo/search", &format!("Bearer {key}"))
+            .await;
+    snapshot!(status_code, @"403 Forbidden");
+    snapshot!(response, @r###"
+    {
+      "message": "The API key cannot acces the index `doggo`, authorized indexes are [\"dog\"].",
+      "code": "invalid_api_key",
+      "type": "auth",
+      "link": "https://docs.meilisearch.com/errors#invalid_api_key"
+    }
+    "###);
+}
+
+#[actix_rt::test]
+async fn invalid_tenant_token() {
+    let server = Server::new_auth().await;
+    let app = server.init_web_app().await;
+
+    // The tenant token won't be recognized at all if we're not on a search route
+    let claims = json!({ "tamo": "kefir" });
+    let jwt = jsonwebtoken::encode(&Header::default(), &claims, &EncodingKey::from_secret(b"tamo"))
+        .unwrap();
+    let (response, status_code) =
+        send_request_with_custom_auth(&app, "/indexes/dog/documents", &format!("Bearer {jwt}"))
+            .await;
+    snapshot!(status_code, @"403 Forbidden");
+    snapshot!(response, @r###"
+    {
+      "message": "The provided API key is invalid.",
+      "code": "invalid_api_key",
+      "type": "auth",
+      "link": "https://docs.meilisearch.com/errors#invalid_api_key"
+    }
+    "###);
+
+    let claims = json!({ "tamo": "kefir" });
+    let jwt = jsonwebtoken::encode(&Header::default(), &claims, &EncodingKey::from_secret(b"tamo"))
+        .unwrap();
+    let (response, status_code) =
+        send_request_with_custom_auth(&app, "/indexes/dog/search", &format!("Bearer {jwt}")).await;
+    snapshot!(status_code, @"403 Forbidden");
+    snapshot!(response, @r###"
+    {
+      "message": "Could not decode tenant token, JSON error: missing field `searchRules` at line 1 column 16.",
+      "code": "invalid_api_key",
+      "type": "auth",
+      "link": "https://docs.meilisearch.com/errors#invalid_api_key"
+    }
+    "###);
+
+    // The error messages are not ideal but that's expected since we cannot _yet_ use deserr
+    let claims = json!({ "searchRules": "kefir" });
+    let jwt = jsonwebtoken::encode(&Header::default(), &claims, &EncodingKey::from_secret(b"tamo"))
+        .unwrap();
+    let (response, status_code) =
+        send_request_with_custom_auth(&app, "/indexes/dog/search", &format!("Bearer {jwt}")).await;
+    snapshot!(status_code, @"403 Forbidden");
+    snapshot!(response, @r###"
+    {
+      "message": "Could not decode tenant token, JSON error: data did not match any variant of untagged enum SearchRules at line 1 column 23.",
+      "code": "invalid_api_key",
+      "type": "auth",
+      "link": "https://docs.meilisearch.com/errors#invalid_api_key"
+    }
+    "###);
+
+    let uuid = Uuid::nil();
+    let claims = json!({ "searchRules": ["kefir"], "apiKeyUid": uuid.to_string() });
+    let jwt = jsonwebtoken::encode(&Header::default(), &claims, &EncodingKey::from_secret(b"tamo"))
+        .unwrap();
+    let (response, status_code) =
+        send_request_with_custom_auth(&app, "/indexes/dog/search", &format!("Bearer {jwt}")).await;
+    snapshot!(status_code, @"403 Forbidden");
+    snapshot!(response, @r###"
+    {
+      "message": "Could not decode tenant token, InvalidSignature.",
+      "code": "invalid_api_key",
+      "type": "auth",
+      "link": "https://docs.meilisearch.com/errors#invalid_api_key"
+    }
+    "###);
+
+    // ~~ For the next tests we first need a valid API key
+    let key = json!({ "actions": ["search"], "indexes": ["dog"], "expiresAt": null, "uid": uuid.to_string() });
+    let req = test::TestRequest::post()
+        .uri("/keys")
+        .insert_header(("Authorization", "Bearer MASTER_KEY"))
+        .set_json(&key)
+        .to_request();
+    let res = test::call_service(&app, req).await;
+    let body = test::read_body(res).await;
+    let response: Value = serde_json::from_slice(&body).unwrap_or_default();
+    snapshot!(json_string!(response, { ".createdAt" => "[date]",  ".updatedAt" => "[date]" }), @r###"
+    {
+      "name": null,
+      "description": null,
+      "key": "aeb94973e0b6e912d94165430bbe87dee91a7c4f891ce19050c3910ec96977e9",
+      "uid": "00000000-0000-0000-0000-000000000000",
+      "actions": [
+        "search"
+      ],
+      "indexes": [
+        "dog"
+      ],
+      "expiresAt": null,
+      "createdAt": "[date]",
+      "updatedAt": "[date]"
+    }
+    "###);
+    let key = response["key"].as_str().unwrap();
+
+    let claims = json!({ "searchRules": ["doggo", "catto"], "apiKeyUid": uuid.to_string() });
+    let jwt = jsonwebtoken::encode(
+        &Header::default(),
+        &claims,
+        &EncodingKey::from_secret(key.as_bytes()),
+    )
+    .unwrap();
+    // Try to access an index that is not authorized by the tenant token
+    let (response, status_code) =
+        send_request_with_custom_auth(&app, "/indexes/dog/search", &format!("Bearer {jwt}")).await;
+    snapshot!(status_code, @"403 Forbidden");
+    snapshot!(response, @r###"
+    {
+      "message": "The provided tenant token cannot acces the index `dog`, allowed indexes are [\"catto\", \"doggo\"].",
+      "code": "invalid_api_key",
+      "type": "auth",
+      "link": "https://docs.meilisearch.com/errors#invalid_api_key"
+    }
+    "###);
+
+    // Try to access an index that *is* authorized by the tenant token but not by the api key used to generate the tt
+    let (response, status_code) =
+        send_request_with_custom_auth(&app, "/indexes/doggo/search", &format!("Bearer {jwt}"))
+            .await;
+    snapshot!(status_code, @"403 Forbidden");
+    snapshot!(response, @r###"
+    {
+      "message": "The API key used to generate this tenant token cannot acces the index `doggo`.",
+      "code": "invalid_api_key",
+      "type": "auth",
+      "link": "https://docs.meilisearch.com/errors#invalid_api_key"
+    }
+    "###);
+}
--- a/meilisearch/tests/auth/tenant_token.rs
+++ b/meilisearch/tests/auth/tenant_token.rs
@@ -53,7 +53,8 @@ static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
 });

 static INVALID_RESPONSE: Lazy<Value> = Lazy::new(|| {
-    json!({"message": "The provided API key is invalid.",
+    json!({
+        "message": null,
        "code": "invalid_api_key",
        "type": "auth",
        "link": "https://docs.meilisearch.com/errors#invalid_api_key"
@@ -191,7 +192,9 @@ macro_rules! compute_forbidden_search {
                server.use_api_key(&web_token);
                let index = server.index("sales");
                index
-                    .search(json!({}), |response, code| {
+                    .search(json!({}), |mut response, code| {
+                        // We don't assert anything on the message since it may change between cases
+                        response["message"] = serde_json::json!(null);
                        assert_eq!(
                            response,
                            INVALID_RESPONSE.clone(),
@@ -495,7 +498,8 @@ async fn error_access_forbidden_routes() {

    for ((method, route), actions) in AUTHORIZATIONS.iter() {
        if !actions.contains("search") {
-            let (response, code) = server.dummy_request(method, route).await;
+            let (mut response, code) = server.dummy_request(method, route).await;
+            response["message"] = serde_json::json!(null);
            assert_eq!(response, INVALID_RESPONSE.clone());
            assert_eq!(code, 403);
        }
@@ -529,14 +533,16 @@ async fn error_access_expired_parent_key() {
    server.use_api_key(&web_token);

    // test search request while parent_key is not expired
-    let (response, code) = server.dummy_request("POST", "/indexes/products/search").await;
+    let (mut response, code) = server.dummy_request("POST", "/indexes/products/search").await;
+    response["message"] = serde_json::json!(null);
    assert_ne!(response, INVALID_RESPONSE.clone());
    assert_ne!(code, 403);

    // wait until the key is expired.
    thread::sleep(time::Duration::new(1, 0));

-    let (response, code) = server.dummy_request("POST", "/indexes/products/search").await;
+    let (mut response, code) = server.dummy_request("POST", "/indexes/products/search").await;
+    response["message"] = serde_json::json!(null);
    assert_eq!(response, INVALID_RESPONSE.clone());
    assert_eq!(code, 403);
 }
@@ -585,7 +591,8 @@ async fn error_access_modified_token() {
    .join(".");

    server.use_api_key(&altered_token);
-    let (response, code) = server.dummy_request("POST", "/indexes/products/search").await;
+    let (mut response, code) = server.dummy_request("POST", "/indexes/products/search").await;
+    response["message"] = serde_json::json!(null);
    assert_eq!(response, INVALID_RESPONSE.clone());
    assert_eq!(code, 403);
 }
--- a/meilisearch/tests/auth/tenant_token_multi_search.rs
+++ b/meilisearch/tests/auth/tenant_token_multi_search.rs
@@ -109,9 +109,11 @@ static NESTED_DOCUMENTS: Lazy<Value> = Lazy::new(|| {

 fn invalid_response(query_index: Option<usize>) -> Value {
    let message = if let Some(query_index) = query_index {
-        format!("Inside `.queries[{query_index}]`: The provided API key is invalid.")
+        json!(format!("Inside `.queries[{query_index}]`: The provided API key is invalid."))
    } else {
-        "The provided API key is invalid.".to_string()
+        // if it's anything else we simply return null and will tests all the
+        // error messages somewhere else
+        json!(null)
    };
    json!({"message": message,
        "code": "invalid_api_key",
@@ -414,7 +416,10 @@ macro_rules! compute_forbidden_single_search {
            for (tenant_token, failed_query_index) in $tenant_tokens.iter().zip(failed_query_indexes.into_iter()) {
                let web_token = generate_tenant_token(&uid, &key, tenant_token.clone());
                server.use_api_key(&web_token);
-                let (response, code) = server.multi_search(json!({"queries" : [{"indexUid": "sales"}]})).await;
+                let (mut response, code) = server.multi_search(json!({"queries" : [{"indexUid": "sales"}]})).await;
+                if failed_query_index.is_none() && !response["message"].is_null() {
+                    response["message"] = serde_json::json!(null);
+                }
                assert_eq!(
                    response,
                    invalid_response(failed_query_index),
@@ -469,10 +474,13 @@ macro_rules! compute_forbidden_multiple_search {
            for (tenant_token, failed_query_index) in $tenant_tokens.iter().zip(failed_query_indexes.into_iter()) {
                let web_token = generate_tenant_token(&uid, &key, tenant_token.clone());
                server.use_api_key(&web_token);
-                let (response, code) = server.multi_search(json!({"queries" : [
+                let (mut response, code) = server.multi_search(json!({"queries" : [
                    {"indexUid": "sales"},
                    {"indexUid": "products"},
                ]})).await;
+                if failed_query_index.is_none() && !response["message"].is_null() {
+                    response["message"] = serde_json::json!(null);
+                }
                assert_eq!(
                    response,
                    invalid_response(failed_query_index),
@@ -1073,18 +1081,20 @@ async fn error_access_expired_parent_key() {
    server.use_api_key(&web_token);

    // test search request while parent_key is not expired
-    let (response, code) = server
+    let (mut response, code) = server
        .multi_search(json!({"queries" : [{"indexUid": "sales"}, {"indexUid": "products"}]}))
        .await;
+    response["message"] = serde_json::json!(null);
    assert_ne!(response, invalid_response(None));
    assert_ne!(code, 403);

    // wait until the key is expired.
    thread::sleep(time::Duration::new(1, 0));

-    let (response, code) = server
+    let (mut response, code) = server
        .multi_search(json!({"queries" : [{"indexUid": "sales"}, {"indexUid": "products"}]}))
        .await;
+    response["message"] = serde_json::json!(null);
    assert_eq!(response, invalid_response(None));
    assert_eq!(code, 403);
 }
@@ -1134,8 +1144,9 @@ async fn error_access_modified_token() {
    .join(".");

    server.use_api_key(&altered_token);
-    let (response, code) =
+    let (mut response, code) =
        server.multi_search(json!({"queries" : [{"indexUid": "products"}]})).await;
+    response["message"] = serde_json::json!(null);
    assert_eq!(response, invalid_response(None));
    assert_eq!(code, 403);
 }
--- a/meilisearch/tests/common/index.rs
+++ b/meilisearch/tests/common/index.rs
@@ -185,7 +185,7 @@ impl Index<'_> {
    pub async fn get_document(&self, id: u64, options: Option<Value>) -> (Value, StatusCode) {
        let mut url = format!("/indexes/{}/documents/{}", urlencode(self.uid.as_ref()), id);
        if let Some(options) = options {
-            write!(url, "?{}", yaup::to_string(&options).unwrap()).unwrap();
+            write!(url, "{}", yaup::to_string(&options).unwrap()).unwrap();
        }
        self.service.get(url).await
    }
@@ -202,7 +202,7 @@ impl Index<'_> {

    pub async fn get_all_documents(&self, options: GetAllDocumentsOptions) -> (Value, StatusCode) {
        let url = format!(
-            "/indexes/{}/documents?{}",
+            "/indexes/{}/documents{}",
            urlencode(self.uid.as_ref()),
            yaup::to_string(&options).unwrap()
        );
@@ -365,7 +365,7 @@ impl Index<'_> {
    }

    pub async fn search_get(&self, query: &str) -> (Value, StatusCode) {
-        let url = format!("/indexes/{}/search?{}", urlencode(self.uid.as_ref()), query);
+        let url = format!("/indexes/{}/search{}", urlencode(self.uid.as_ref()), query);
        self.service.get(url).await
    }

@@ -402,7 +402,7 @@ impl Index<'_> {
    }

    pub async fn similar_get(&self, query: &str) -> (Value, StatusCode) {
-        let url = format!("/indexes/{}/similar?{}", urlencode(self.uid.as_ref()), query);
+        let url = format!("/indexes/{}/similar{}", urlencode(self.uid.as_ref()), query);
        self.service.get(url).await
    }

@@ -427,8 +427,11 @@ impl Index<'_> {
 #[derive(Debug, Default, serde::Serialize)]
 #[serde(rename_all = "camelCase")]
 pub struct GetAllDocumentsOptions {
+    #[serde(skip_serializing_if = "Option::is_none")]
    pub limit: Option<usize>,
+    #[serde(skip_serializing_if = "Option::is_none")]
    pub offset: Option<usize>,
-    pub retrieve_vectors: bool,
+    #[serde(skip_serializing_if = "Option::is_none")]
    pub fields: Option<Vec<&'static str>>,
+    pub retrieve_vectors: bool,
 }
--- a/meilisearch/tests/common/mod.rs
+++ b/meilisearch/tests/common/mod.rs
@@ -42,6 +42,12 @@ impl std::ops::Deref for Value {
    }
 }

+impl std::ops::DerefMut for Value {
+    fn deref_mut(&mut self) -> &mut Self::Target {
+        &mut self.0
+    }
+}
+
 impl PartialEq<serde_json::Value> for Value {
    fn eq(&self, other: &serde_json::Value) -> bool {
        &self.0 == other
@@ -65,7 +71,7 @@ impl Display for Value {
        write!(
            f,
            "{}",
-            json_string!(self, { ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]" })
+            json_string!(self, { ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]", ".processingTimeMs" => "[duration]" })
        )
    }
 }
--- a/meilisearch/tests/documents/add_documents.rs
+++ b/meilisearch/tests/documents/add_documents.rs
@@ -183,6 +183,58 @@ async fn add_single_document_gzip_encoded() {
    }
    "###);
 }
+#[actix_rt::test]
+async fn add_single_document_gzip_encoded_with_incomplete_error() {
+    let document = json!("kefir");
+
+    // this is a what is expected and should work
+    let server = Server::new().await;
+    let app = server.init_web_app().await;
+    // post
+    let document = serde_json::to_string(&document).unwrap();
+    let req = test::TestRequest::post()
+        .uri("/indexes/dog/documents")
+        .set_payload(document.to_string())
+        .insert_header(("content-type", "application/json"))
+        .insert_header(("content-encoding", "gzip"))
+        .to_request();
+    let res = test::call_service(&app, req).await;
+    let status_code = res.status();
+    let body = test::read_body(res).await;
+    let response: Value = serde_json::from_slice(&body).unwrap_or_default();
+    snapshot!(status_code, @"400 Bad Request");
+    snapshot!(json_string!(response),
+        @r###"
+    {
+      "message": "The provided payload is incomplete and cannot be parsed",
+      "code": "bad_request",
+      "type": "invalid_request",
+      "link": "https://docs.meilisearch.com/errors#bad_request"
+    }
+    "###);
+
+    // put
+    let req = test::TestRequest::put()
+        .uri("/indexes/dog/documents")
+        .set_payload(document.to_string())
+        .insert_header(("content-type", "application/json"))
+        .insert_header(("content-encoding", "gzip"))
+        .to_request();
+    let res = test::call_service(&app, req).await;
+    let status_code = res.status();
+    let body = test::read_body(res).await;
+    let response: Value = serde_json::from_slice(&body).unwrap_or_default();
+    snapshot!(status_code, @"400 Bad Request");
+    snapshot!(json_string!(response),
+        @r###"
+    {
+      "message": "The provided payload is incomplete and cannot be parsed",
+      "code": "bad_request",
+      "type": "invalid_request",
+      "link": "https://docs.meilisearch.com/errors#bad_request"
+    }
+    "###);
+}

 /// Here we try document request with every encoding
 #[actix_rt::test]
@@ -1040,6 +1092,52 @@ async fn document_addition_with_primary_key() {
    "###);
 }

+#[actix_rt::test]
+async fn document_addition_with_huge_int_primary_key() {
+    let server = Server::new().await;
+    let index = server.index("test");
+
+    let documents = json!([
+        {
+            "primary": 14630868576586246730u64,
+            "content": "foo",
+        }
+    ]);
+    let (response, code) = index.add_documents(documents, Some("primary")).await;
+    snapshot!(code, @"202 Accepted");
+
+    let response = index.wait_task(response.uid()).await;
+    snapshot!(response,
+        @r###"
+    {
+      "uid": 0,
+      "indexUid": "test",
+      "status": "succeeded",
+      "type": "documentAdditionOrUpdate",
+      "canceledBy": null,
+      "details": {
+        "receivedDocuments": 1,
+        "indexedDocuments": 1
+      },
+      "error": null,
+      "duration": "[duration]",
+      "enqueuedAt": "[date]",
+      "startedAt": "[date]",
+      "finishedAt": "[date]"
+    }
+    "###);
+
+    let (response, code) = index.get_document(14630868576586246730u64, None).await;
+    snapshot!(code, @"200 OK");
+    snapshot!(json_string!(response),
+        @r###"
+    {
+      "primary": 14630868576586246730,
+      "content": "foo"
+    }
+    "###);
+}
+
 #[actix_rt::test]
 async fn replace_document() {
    let server = Server::new().await;
--- a/meilisearch/tests/documents/errors.rs
+++ b/meilisearch/tests/documents/errors.rs
@@ -719,7 +719,7 @@ async fn fetch_document_by_filter() {

    let (response, code) = index.get_document_by_filter(json!(null)).await;
    snapshot!(code, @"400 Bad Request");
-    snapshot!(json_string!(response), @r###"
+    snapshot!(response, @r###"
    {
      "message": "Invalid value type: expected an object, but found null",
      "code": "bad_request",
@@ -730,7 +730,7 @@ async fn fetch_document_by_filter() {

    let (response, code) = index.get_document_by_filter(json!({ "offset": "doggo" })).await;
    snapshot!(code, @"400 Bad Request");
-    snapshot!(json_string!(response), @r###"
+    snapshot!(response, @r###"
    {
      "message": "Invalid value type at `.offset`: expected a positive integer, but found a string: `\"doggo\"`",
      "code": "invalid_document_offset",
@@ -741,7 +741,7 @@ async fn fetch_document_by_filter() {

    let (response, code) = index.get_document_by_filter(json!({ "limit": "doggo" })).await;
    snapshot!(code, @"400 Bad Request");
-    snapshot!(json_string!(response), @r###"
+    snapshot!(response, @r###"
    {
      "message": "Invalid value type at `.limit`: expected a positive integer, but found a string: `\"doggo\"`",
      "code": "invalid_document_limit",
@@ -752,7 +752,7 @@ async fn fetch_document_by_filter() {

    let (response, code) = index.get_document_by_filter(json!({ "fields": "doggo" })).await;
    snapshot!(code, @"400 Bad Request");
-    snapshot!(json_string!(response), @r###"
+    snapshot!(response, @r###"
    {
      "message": "Invalid value type at `.fields`: expected an array, but found a string: `\"doggo\"`",
      "code": "invalid_document_fields",
@@ -763,7 +763,7 @@ async fn fetch_document_by_filter() {

    let (response, code) = index.get_document_by_filter(json!({ "filter": true })).await;
    snapshot!(code, @"400 Bad Request");
-    snapshot!(json_string!(response), @r###"
+    snapshot!(response, @r###"
    {
      "message": "Invalid syntax for the filter parameter: `expected String, Array, found: true`.",
      "code": "invalid_document_filter",
@@ -774,7 +774,7 @@ async fn fetch_document_by_filter() {

    let (response, code) = index.get_document_by_filter(json!({ "filter": "cool doggo" })).await;
    snapshot!(code, @"400 Bad Request");
-    snapshot!(json_string!(response), @r###"
+    snapshot!(response, @r###"
    {
      "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `cool doggo`.\n1:11 cool doggo",
      "code": "invalid_document_filter",
@@ -786,7 +786,7 @@ async fn fetch_document_by_filter() {
    let (response, code) =
        index.get_document_by_filter(json!({ "filter": "doggo = bernese" })).await;
    snapshot!(code, @"400 Bad Request");
-    snapshot!(json_string!(response), @r###"
+    snapshot!(response, @r###"
    {
      "message": "Attribute `doggo` is not filterable. Available filterable attributes are: `color`.\n1:6 doggo = bernese",
      "code": "invalid_document_filter",
@@ -803,7 +803,7 @@ async fn retrieve_vectors() {

    // GET ALL DOCUMENTS BY QUERY
    let (response, _code) = index.get_all_documents_raw("?retrieveVectors=tamo").await;
-    snapshot!(json_string!(response), @r###"
+    snapshot!(response, @r###"
    {
      "message": "Invalid value in parameter `retrieveVectors`: could not parse `tamo` as a boolean, expected either `true` or `false`",
      "code": "invalid_document_retrieve_vectors",
@@ -812,7 +812,7 @@ async fn retrieve_vectors() {
    }
    "###);
    let (response, _code) = index.get_all_documents_raw("?retrieveVectors=true").await;
-    snapshot!(json_string!(response), @r###"
+    snapshot!(response, @r###"
    {
      "message": "Passing `retrieveVectors` as a parameter requires enabling the `vector store` experimental feature. See https://github.com/meilisearch/product/discussions/677",
      "code": "feature_not_enabled",
@@ -824,7 +824,7 @@ async fn retrieve_vectors() {
    // FETCH ALL DOCUMENTS BY POST
    let (response, _code) =
        index.get_document_by_filter(json!({ "retrieveVectors": "tamo" })).await;
-    snapshot!(json_string!(response), @r###"
+    snapshot!(response, @r###"
    {
      "message": "Invalid value type at `.retrieveVectors`: expected a boolean, but found a string: `\"tamo\"`",
      "code": "invalid_document_retrieve_vectors",
@@ -833,7 +833,7 @@ async fn retrieve_vectors() {
    }
    "###);
    let (response, _code) = index.get_document_by_filter(json!({ "retrieveVectors": true })).await;
-    snapshot!(json_string!(response), @r###"
+    snapshot!(response, @r###"
    {
      "message": "Passing `retrieveVectors` as a parameter requires enabling the `vector store` experimental feature. See https://github.com/meilisearch/product/discussions/677",
      "code": "feature_not_enabled",
@@ -844,7 +844,7 @@ async fn retrieve_vectors() {

    // GET A SINGLE DOCUMENT
    let (response, _code) = index.get_document(0, Some(json!({"retrieveVectors": "tamo"}))).await;
-    snapshot!(json_string!(response), @r###"
+    snapshot!(response, @r###"
    {
      "message": "Invalid value in parameter `retrieveVectors`: could not parse `tamo` as a boolean, expected either `true` or `false`",
      "code": "invalid_document_retrieve_vectors",
@@ -853,7 +853,7 @@ async fn retrieve_vectors() {
    }
    "###);
    let (response, _code) = index.get_document(0, Some(json!({"retrieveVectors": true}))).await;
-    snapshot!(json_string!(response), @r###"
+    snapshot!(response, @r###"
    {
      "message": "Passing `retrieveVectors` as a parameter requires enabling the `vector store` experimental feature. See https://github.com/meilisearch/product/discussions/677",
      "code": "feature_not_enabled",
--- a/meilisearch/tests/search/errors.rs
+++ b/meilisearch/tests/search/errors.rs
@@ -71,7 +71,7 @@ async fn search_bad_offset() {
    }
    "###);

-    let (response, code) = index.search_get("offset=doggo").await;
+    let (response, code) = index.search_get("?offset=doggo").await;
    snapshot!(code, @"400 Bad Request");
    snapshot!(json_string!(response), @r###"
    {
@@ -99,7 +99,7 @@ async fn search_bad_limit() {
    }
    "###);

-    let (response, code) = index.search_get("limit=doggo").await;
+    let (response, code) = index.search_get("?limit=doggo").await;
    snapshot!(code, @"400 Bad Request");
    snapshot!(json_string!(response), @r###"
    {
@@ -127,7 +127,7 @@ async fn search_bad_page() {
    }
    "###);

-    let (response, code) = index.search_get("page=doggo").await;
+    let (response, code) = index.search_get("?page=doggo").await;
    snapshot!(code, @"400 Bad Request");
    snapshot!(json_string!(response), @r###"
    {
@@ -155,7 +155,7 @@ async fn search_bad_hits_per_page() {
    }
    "###);

-    let (response, code) = index.search_get("hitsPerPage=doggo").await;
+    let (response, code) = index.search_get("?hitsPerPage=doggo").await;
    snapshot!(code, @"400 Bad Request");
    snapshot!(json_string!(response), @r###"
    {
@@ -212,7 +212,7 @@ async fn search_bad_retrieve_vectors() {
    }
    "###);

-    let (response, code) = index.search_get("retrieveVectors=").await;
+    let (response, code) = index.search_get("?retrieveVectors=").await;
    snapshot!(code, @"400 Bad Request");
    snapshot!(json_string!(response), @r###"
    {
@@ -223,7 +223,7 @@ async fn search_bad_retrieve_vectors() {
    }
    "###);

-    let (response, code) = index.search_get("retrieveVectors=doggo").await;
+    let (response, code) = index.search_get("?retrieveVectors=doggo").await;
    snapshot!(code, @"400 Bad Request");
    snapshot!(json_string!(response), @r###"
    {
@@ -269,7 +269,7 @@ async fn search_bad_crop_length() {
    }
    "###);

-    let (response, code) = index.search_get("cropLength=doggo").await;
+    let (response, code) = index.search_get("?cropLength=doggo").await;
    snapshot!(code, @"400 Bad Request");
    snapshot!(json_string!(response), @r###"
    {
@@ -359,7 +359,7 @@ async fn search_bad_show_matches_position() {
    }
    "###);

-    let (response, code) = index.search_get("showMatchesPosition=doggo").await;
+    let (response, code) = index.search_get("?showMatchesPosition=doggo").await;
    snapshot!(code, @"400 Bad Request");
    snapshot!(json_string!(response), @r###"
    {
@@ -442,7 +442,7 @@ async fn search_non_filterable_facets() {
    }
    "###);

-    let (response, code) = index.search_get("facets=doggo").await;
+    let (response, code) = index.search_get("?facets=doggo").await;
    snapshot!(code, @"400 Bad Request");
    snapshot!(json_string!(response), @r###"
    {
@@ -472,7 +472,7 @@ async fn search_non_filterable_facets_multiple_filterable() {
    }
    "###);

-    let (response, code) = index.search_get("facets=doggo").await;
+    let (response, code) = index.search_get("?facets=doggo").await;
    snapshot!(code, @"400 Bad Request");
    snapshot!(json_string!(response), @r###"
    {
@@ -502,7 +502,7 @@ async fn search_non_filterable_facets_no_filterable() {
    }
    "###);

-    let (response, code) = index.search_get("facets=doggo").await;
+    let (response, code) = index.search_get("?facets=doggo").await;
    snapshot!(code, @"400 Bad Request");
    snapshot!(json_string!(response), @r###"
    {
@@ -532,7 +532,7 @@ async fn search_non_filterable_facets_multiple_facets() {
    }
    "###);

-    let (response, code) = index.search_get("facets=doggo,neko").await;
+    let (response, code) = index.search_get("?facets=doggo,neko").await;
    snapshot!(code, @"400 Bad Request");
    snapshot!(json_string!(response), @r###"
    {
@@ -625,7 +625,7 @@ async fn search_bad_matching_strategy() {
    }
    "###);

-    let (response, code) = index.search_get("matchingStrategy=doggo").await;
+    let (response, code) = index.search_get("?matchingStrategy=doggo").await;
    snapshot!(code, @"400 Bad Request");
    snapshot!(json_string!(response), @r###"
    {
--- a/meilisearch/tests/search/geo.rs
+++ b/meilisearch/tests/search/geo.rs
@@ -150,7 +150,8 @@ async fn bug_4640() {
                      "_geo": {
                        "lat": "45.4777599",
                        "lng": "9.1967508"
-                      }
+                      },
+                      "_geoDistance": 0
                    },
                    {
                      "id": 1,
--- a/meilisearch/tests/search/hybrid.rs
+++ b/meilisearch/tests/search/hybrid.rs
@@ -150,6 +150,35 @@ async fn simple_search() {
    snapshot!(response["semanticHitCount"], @"3");
 }

+#[actix_rt::test]
+async fn limit_offset() {
+    let server = Server::new().await;
+    let index = index_with_documents_user_provided(&server, &SIMPLE_SEARCH_DOCUMENTS_VEC).await;
+
+    let (response, code) = index
+        .search_post(
+            json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.2}, "retrieveVectors": true, "offset": 1, "limit": 1}),
+        )
+        .await;
+    snapshot!(code, @"200 OK");
+    snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":{"embeddings":[[2.0,3.0]],"regenerate":false}}}]"###);
+    snapshot!(response["semanticHitCount"], @"0");
+    assert_eq!(response["hits"].as_array().unwrap().len(), 1);
+
+    let server = Server::new().await;
+    let index = index_with_documents_user_provided(&server, &SIMPLE_SEARCH_DOCUMENTS_VEC).await;
+
+    let (response, code) = index
+        .search_post(
+            json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.9}, "retrieveVectors": true, "offset": 1, "limit": 1}),
+        )
+        .await;
+    snapshot!(code, @"200 OK");
+    snapshot!(response["hits"], @r###"[{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":{"embeddings":[[1.0,2.0]],"regenerate":false}}}]"###);
+    snapshot!(response["semanticHitCount"], @"1");
+    assert_eq!(response["hits"].as_array().unwrap().len(), 1);
+}
+
 #[actix_rt::test]
 async fn simple_search_hf() {
    let server = Server::new().await;
--- a/meilisearch/tests/similar/errors.rs
+++ b/meilisearch/tests/similar/errors.rs
@@ -241,7 +241,7 @@ async fn similar_bad_offset() {
    }
    "###);

-    let (response, code) = index.similar_get("id=287947&offset=doggo").await;
+    let (response, code) = index.similar_get("?id=287947&offset=doggo").await;
    snapshot!(code, @"400 Bad Request");
    snapshot!(json_string!(response), @r###"
    {
@@ -283,7 +283,7 @@ async fn similar_bad_limit() {
    }
    "###);

-    let (response, code) = index.similar_get("id=287946&limit=doggo").await;
+    let (response, code) = index.similar_get("?id=287946&limit=doggo").await;
    snapshot!(code, @"400 Bad Request");
    snapshot!(json_string!(response), @r###"
    {
@@ -785,7 +785,7 @@ async fn similar_bad_retrieve_vectors() {
    }
    "###);

-    let (response, code) = index.similar_get("retrieveVectors=").await;
+    let (response, code) = index.similar_get("?retrieveVectors=").await;
    snapshot!(code, @"400 Bad Request");
    snapshot!(json_string!(response), @r###"
    {
@@ -796,7 +796,7 @@ async fn similar_bad_retrieve_vectors() {
    }
    "###);

-    let (response, code) = index.similar_get("retrieveVectors=doggo").await;
+    let (response, code) = index.similar_get("?retrieveVectors=doggo").await;
    snapshot!(code, @"400 Bad Request");
    snapshot!(json_string!(response), @r###"
    {
--- a/meilisearch/tests/tasks/mod.rs
+++ b/meilisearch/tests/tasks/mod.rs
@@ -2,6 +2,7 @@ mod errors;
 mod webhook;

 use meili_snap::insta::assert_json_snapshot;
+use meili_snap::snapshot;
 use time::format_description::well_known::Rfc3339;
 use time::OffsetDateTime;

@@ -738,11 +739,9 @@ async fn test_summarized_index_creation() {
 async fn test_summarized_index_deletion() {
    let server = Server::new().await;
    let index = server.index("test");
-    index.delete().await;
-    index.wait_task(0).await;
-    let (task, _) = index.get_task(0).await;
-    assert_json_snapshot!(task,
-        { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" },
+    let (ret, _code) = index.delete().await;
+    let task = index.wait_task(ret.uid()).await;
+    snapshot!(task,
        @r###"
    {
      "uid": 0,
@@ -767,12 +766,34 @@ async fn test_summarized_index_deletion() {
    "###);

    // is the details correctly set when documents are actually deleted.
-    index.add_documents(json!({ "id": 42, "content": "doggos & fluff" }), Some("id")).await;
-    index.delete().await;
-    index.wait_task(2).await;
-    let (task, _) = index.get_task(2).await;
-    assert_json_snapshot!(task,
-        { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" },
+    // /!\ We need to wait for the document addition to be processed otherwise, if the test runs too slow,
+    // both tasks may get autobatched and the deleted documents count will be wrong.
+    let (ret, _code) =
+        index.add_documents(json!({ "id": 42, "content": "doggos & fluff" }), Some("id")).await;
+    let task = index.wait_task(ret.uid()).await;
+    snapshot!(task,
+        @r###"
+    {
+      "uid": 1,
+      "indexUid": "test",
+      "status": "succeeded",
+      "type": "documentAdditionOrUpdate",
+      "canceledBy": null,
+      "details": {
+        "receivedDocuments": 1,
+        "indexedDocuments": 1
+      },
+      "error": null,
+      "duration": "[duration]",
+      "enqueuedAt": "[date]",
+      "startedAt": "[date]",
+      "finishedAt": "[date]"
+    }
+    "###);
+
+    let (ret, _code) = index.delete().await;
+    let task = index.wait_task(ret.uid()).await;
+    snapshot!(task,
        @r###"
    {
      "uid": 2,
@@ -792,22 +813,25 @@ async fn test_summarized_index_deletion() {
    "###);

    // What happens when you delete an index that doesn't exists.
-    index.delete().await;
-    index.wait_task(2).await;
-    let (task, _) = index.get_task(2).await;
-    assert_json_snapshot!(task,
-        { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" },
+    let (ret, _code) = index.delete().await;
+    let task = index.wait_task(ret.uid()).await;
+    snapshot!(task,
        @r###"
    {
-      "uid": 2,
+      "uid": 3,
      "indexUid": "test",
-      "status": "succeeded",
+      "status": "failed",
      "type": "indexDeletion",
      "canceledBy": null,
      "details": {
-        "deletedDocuments": 1
+        "deletedDocuments": 0
+      },
+      "error": {
+        "message": "Index `test` not found.",
+        "code": "index_not_found",
+        "type": "invalid_request",
+        "link": "https://docs.meilisearch.com/errors#index_not_found"
      },
-      "error": null,
      "duration": "[duration]",
      "enqueuedAt": "[date]",
      "startedAt": "[date]",
--- a/meilisearch/tests/vector/mod.rs
+++ b/meilisearch/tests/vector/mod.rs
@@ -190,6 +190,285 @@ async fn generate_default_user_provided_documents(server: &Server) -> Index {
    index
 }

+#[actix_rt::test]
+async fn user_provided_embeddings_error() {
+    let server = Server::new().await;
+    let index = generate_default_user_provided_documents(&server).await;
+
+    // First case, we forget to specify the `regenerate`
+    let documents =
+        json!({"id": 0, "name": "kefir", "_vectors": { "manual": { "embeddings": [0, 0, 0] }}});
+    let (value, code) = index.add_documents(documents, None).await;
+    snapshot!(code, @"202 Accepted");
+    let task = index.wait_task(value.uid()).await;
+    snapshot!(task, @r###"
+    {
+      "uid": 2,
+      "indexUid": "doggo",
+      "status": "failed",
+      "type": "documentAdditionOrUpdate",
+      "canceledBy": null,
+      "details": {
+        "receivedDocuments": 1,
+        "indexedDocuments": 0
+      },
+      "error": {
+        "message": "Bad embedder configuration in the document with id: `\"0\"`. Missing field `regenerate` inside `.manual`",
+        "code": "invalid_vectors_type",
+        "type": "invalid_request",
+        "link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
+      },
+      "duration": "[duration]",
+      "enqueuedAt": "[date]",
+      "startedAt": "[date]",
+      "finishedAt": "[date]"
+    }
+    "###);
+
+    // Second case, we don't specify anything
+    let documents = json!({"id": 0, "name": "kefir", "_vectors": { "manual": {}}});
+    let (value, code) = index.add_documents(documents, None).await;
+    snapshot!(code, @"202 Accepted");
+    let task = index.wait_task(value.uid()).await;
+    snapshot!(task, @r###"
+    {
+      "uid": 3,
+      "indexUid": "doggo",
+      "status": "failed",
+      "type": "documentAdditionOrUpdate",
+      "canceledBy": null,
+      "details": {
+        "receivedDocuments": 1,
+        "indexedDocuments": 0
+      },
+      "error": {
+        "message": "Bad embedder configuration in the document with id: `\"0\"`. Missing field `regenerate` inside `.manual`",
+        "code": "invalid_vectors_type",
+        "type": "invalid_request",
+        "link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
+      },
+      "duration": "[duration]",
+      "enqueuedAt": "[date]",
+      "startedAt": "[date]",
+      "finishedAt": "[date]"
+    }
+    "###);
+
+    // Third case, we specify something wrong in place of regenerate
+    let documents =
+        json!({"id": 0, "name": "kefir", "_vectors": { "manual": { "regenerate": "yes please" }}});
+    let (value, code) = index.add_documents(documents, None).await;
+    snapshot!(code, @"202 Accepted");
+    let task = index.wait_task(value.uid()).await;
+    snapshot!(task, @r###"
+    {
+      "uid": 4,
+      "indexUid": "doggo",
+      "status": "failed",
+      "type": "documentAdditionOrUpdate",
+      "canceledBy": null,
+      "details": {
+        "receivedDocuments": 1,
+        "indexedDocuments": 0
+      },
+      "error": {
+        "message": "Bad embedder configuration in the document with id: `\"0\"`. Invalid value type at `.manual.regenerate`: expected a boolean, but found a string: `\"yes please\"`",
+        "code": "invalid_vectors_type",
+        "type": "invalid_request",
+        "link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
+      },
+      "duration": "[duration]",
+      "enqueuedAt": "[date]",
+      "startedAt": "[date]",
+      "finishedAt": "[date]"
+    }
+    "###);
+
+    let documents =
+        json!({"id": 0, "name": "kefir", "_vectors": { "manual": { "embeddings": true }}});
+    let (value, code) = index.add_documents(documents, None).await;
+    snapshot!(code, @"202 Accepted");
+    let task = index.wait_task(value.uid()).await;
+    snapshot!(task, @r###"
+    {
+      "uid": 5,
+      "indexUid": "doggo",
+      "status": "failed",
+      "type": "documentAdditionOrUpdate",
+      "canceledBy": null,
+      "details": {
+        "receivedDocuments": 1,
+        "indexedDocuments": 0
+      },
+      "error": {
+        "message": "Bad embedder configuration in the document with id: `\"0\"`. Invalid value type at `.manual.embeddings`: expected null or an array, but found a boolean: `true`",
+        "code": "invalid_vectors_type",
+        "type": "invalid_request",
+        "link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
+      },
+      "duration": "[duration]",
+      "enqueuedAt": "[date]",
+      "startedAt": "[date]",
+      "finishedAt": "[date]"
+    }
+    "###);
+
+    let documents =
+        json!({"id": 0, "name": "kefir", "_vectors": { "manual": { "embeddings": [true] }}});
+    let (value, code) = index.add_documents(documents, None).await;
+    snapshot!(code, @"202 Accepted");
+    let task = index.wait_task(value.uid()).await;
+    snapshot!(task, @r###"
+    {
+      "uid": 6,
+      "indexUid": "doggo",
+      "status": "failed",
+      "type": "documentAdditionOrUpdate",
+      "canceledBy": null,
+      "details": {
+        "receivedDocuments": 1,
+        "indexedDocuments": 0
+      },
+      "error": {
+        "message": "Bad embedder configuration in the document with id: `\"0\"`. Invalid value type at `.manual.embeddings[0]`: expected a number or an array, but found a boolean: `true`",
+        "code": "invalid_vectors_type",
+        "type": "invalid_request",
+        "link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
+      },
+      "duration": "[duration]",
+      "enqueuedAt": "[date]",
+      "startedAt": "[date]",
+      "finishedAt": "[date]"
+    }
+    "###);
+
+    let documents =
+        json!({"id": 0, "name": "kefir", "_vectors": { "manual": { "embeddings": [[true]] }}});
+    let (value, code) = index.add_documents(documents, None).await;
+    snapshot!(code, @"202 Accepted");
+    let task = index.wait_task(value.uid()).await;
+    snapshot!(task, @r###"
+    {
+      "uid": 7,
+      "indexUid": "doggo",
+      "status": "failed",
+      "type": "documentAdditionOrUpdate",
+      "canceledBy": null,
+      "details": {
+        "receivedDocuments": 1,
+        "indexedDocuments": 0
+      },
+      "error": {
+        "message": "Bad embedder configuration in the document with id: `\"0\"`. Invalid value type at `.manual.embeddings[0][0]`: expected a number, but found a boolean: `true`",
+        "code": "invalid_vectors_type",
+        "type": "invalid_request",
+        "link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
+      },
+      "duration": "[duration]",
+      "enqueuedAt": "[date]",
+      "startedAt": "[date]",
+      "finishedAt": "[date]"
+    }
+    "###);
+
+    let documents = json!({"id": 0, "name": "kefir", "_vectors": { "manual": { "embeddings": [23, 0.1, -12], "regenerate": true }}});
+    let (value, code) = index.add_documents(documents, None).await;
+    snapshot!(code, @"202 Accepted");
+    let task = index.wait_task(value.uid()).await;
+    snapshot!(task["status"], @r###""succeeded""###);
+
+    let documents =
+        json!({"id": 0, "name": "kefir", "_vectors": { "manual": { "regenerate": false }}});
+    let (value, code) = index.add_documents(documents, None).await;
+    snapshot!(code, @"202 Accepted");
+    let task = index.wait_task(value.uid()).await;
+    snapshot!(task["status"], @r###""succeeded""###);
+
+    let documents = json!({"id": 0, "name": "kefir", "_vectors": { "manual": { "regenerate": false, "embeddings": [0.1, [0.2, 0.3]] }}});
+    let (value, code) = index.add_documents(documents, None).await;
+    snapshot!(code, @"202 Accepted");
+    let task = index.wait_task(value.uid()).await;
+    snapshot!(task, @r###"
+    {
+      "uid": 10,
+      "indexUid": "doggo",
+      "status": "failed",
+      "type": "documentAdditionOrUpdate",
+      "canceledBy": null,
+      "details": {
+        "receivedDocuments": 1,
+        "indexedDocuments": 0
+      },
+      "error": {
+        "message": "Bad embedder configuration in the document with id: `\"0\"`. Invalid value type at `.manual.embeddings[1]`: expected a number, but found an array: `[0.2,0.3]`",
+        "code": "invalid_vectors_type",
+        "type": "invalid_request",
+        "link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
+      },
+      "duration": "[duration]",
+      "enqueuedAt": "[date]",
+      "startedAt": "[date]",
+      "finishedAt": "[date]"
+    }
+    "###);
+
+    let documents = json!({"id": 0, "name": "kefir", "_vectors": { "manual": { "regenerate": false, "embeddings": [[0.1, 0.2], 0.3] }}});
+    let (value, code) = index.add_documents(documents, None).await;
+    snapshot!(code, @"202 Accepted");
+    let task = index.wait_task(value.uid()).await;
+    snapshot!(task, @r###"
+    {
+      "uid": 11,
+      "indexUid": "doggo",
+      "status": "failed",
+      "type": "documentAdditionOrUpdate",
+      "canceledBy": null,
+      "details": {
+        "receivedDocuments": 1,
+        "indexedDocuments": 0
+      },
+      "error": {
+        "message": "Bad embedder configuration in the document with id: `\"0\"`. Invalid value type at `.manual.embeddings[1]`: expected an array, but found a number: `0.3`",
+        "code": "invalid_vectors_type",
+        "type": "invalid_request",
+        "link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
+      },
+      "duration": "[duration]",
+      "enqueuedAt": "[date]",
+      "startedAt": "[date]",
+      "finishedAt": "[date]"
+    }
+    "###);
+
+    let documents = json!({"id": 0, "name": "kefir", "_vectors": { "manual": { "regenerate": false, "embeddings": [[0.1, true], 0.3] }}});
+    let (value, code) = index.add_documents(documents, None).await;
+    snapshot!(code, @"202 Accepted");
+    let task = index.wait_task(value.uid()).await;
+    snapshot!(task, @r###"
+    {
+      "uid": 12,
+      "indexUid": "doggo",
+      "status": "failed",
+      "type": "documentAdditionOrUpdate",
+      "canceledBy": null,
+      "details": {
+        "receivedDocuments": 1,
+        "indexedDocuments": 0
+      },
+      "error": {
+        "message": "Bad embedder configuration in the document with id: `\"0\"`. Invalid value type at `.manual.embeddings[0][1]`: expected a number, but found a boolean: `true`",
+        "code": "invalid_vectors_type",
+        "type": "invalid_request",
+        "link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
+      },
+      "duration": "[duration]",
+      "enqueuedAt": "[date]",
+      "startedAt": "[date]",
+      "finishedAt": "[date]"
+    }
+    "###);
+}
+
 #[actix_rt::test]
 async fn clear_documents() {
    let server = Server::new().await;
@@ -213,11 +492,11 @@ async fn clear_documents() {

    // Make sure the arroy DB has been cleared
    let (documents, _code) = index.search_post(json!({ "vector": [1, 1, 1] })).await;
-    snapshot!(json_string!(documents), @r###"
+    snapshot!(documents, @r###"
    {
      "hits": [],
      "query": "",
-      "processingTimeMs": 0,
+      "processingTimeMs": "[duration]",
      "limit": 20,
      "offset": 0,
      "estimatedTotalHits": 0,
@@ -225,3 +504,85 @@ async fn clear_documents() {
    }
    "###);
 }
+
+#[actix_rt::test]
+async fn add_remove_one_vector_4588() {
+    // https://github.com/meilisearch/meilisearch/issues/4588
+    let server = Server::new().await;
+    let index = server.index("doggo");
+    let (value, code) = server.set_features(json!({"vectorStore": true})).await;
+    snapshot!(code, @"200 OK");
+    snapshot!(value, @r###"
+    {
+      "vectorStore": true,
+      "metrics": false,
+      "logsRoute": false
+    }
+    "###);
+
+    let (response, code) = index
+        .update_settings(json!({
+          "embedders": {
+              "manual": {
+                  "source": "userProvided",
+                  "dimensions": 3,
+              }
+          },
+        }))
+        .await;
+    snapshot!(code, @"202 Accepted");
+    let task = server.wait_task(response.uid()).await;
+    snapshot!(task, name: "settings-processed");
+
+    let documents = json!([
+      {"id": 0, "name": "kefir", "_vectors": { "manual": [0, 0, 0] }},
+    ]);
+    let (value, code) = index.add_documents(documents, None).await;
+    snapshot!(code, @"202 Accepted");
+    let task = index.wait_task(value.uid()).await;
+    snapshot!(task, name: "document-added");
+
+    let documents = json!([
+      {"id": 0, "name": "kefir", "_vectors": { "manual": null }},
+    ]);
+    let (value, code) = index.add_documents(documents, None).await;
+    snapshot!(code, @"202 Accepted");
+    let task = index.wait_task(value.uid()).await;
+    snapshot!(task, name: "document-deleted");
+
+    let (documents, _code) = index.search_post(json!({"vector": [1, 1, 1] })).await;
+    snapshot!(documents, @r###"
+    {
+      "hits": [
+        {
+          "id": 0,
+          "name": "kefir"
+        }
+      ],
+      "query": "",
+      "processingTimeMs": "[duration]",
+      "limit": 20,
+      "offset": 0,
+      "estimatedTotalHits": 1,
+      "semanticHitCount": 1
+    }
+    "###);
+
+    let (documents, _code) = index
+        .get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() })
+        .await;
+    snapshot!(json_string!(documents), @r###"
+    {
+      "results": [
+        {
+          "id": 0,
+          "name": "kefir",
+          "_vectors": {}
+        }
+      ],
+      "offset": 0,
+      "limit": 20,
+      "total": 1
+    }
+    "###);
+}
--- a/meilisearch/tests/vector/snapshots/mod.rs/add_remove_one_vector_4588/document-added.snap
+++ b/meilisearch/tests/vector/snapshots/mod.rs/add_remove_one_vector_4588/document-added.snap
@@ -0,0 +1,19 @@
+---
+source: meilisearch/tests/vector/mod.rs
+---
+{
+  "uid": 1,
+  "indexUid": "doggo",
+  "status": "succeeded",
+  "type": "documentAdditionOrUpdate",
+  "canceledBy": null,
+  "details": {
+    "receivedDocuments": 1,
+    "indexedDocuments": 1
+  },
+  "error": null,
+  "duration": "[duration]",
+  "enqueuedAt": "[date]",
+  "startedAt": "[date]",
+  "finishedAt": "[date]"
+}
--- a/meilisearch/tests/vector/snapshots/mod.rs/add_remove_one_vector_4588/document-deleted.snap
+++ b/meilisearch/tests/vector/snapshots/mod.rs/add_remove_one_vector_4588/document-deleted.snap
@@ -0,0 +1,19 @@
+---
+source: meilisearch/tests/vector/mod.rs
+---
+{
+  "uid": 2,
+  "indexUid": "doggo",
+  "status": "succeeded",
+  "type": "documentAdditionOrUpdate",
+  "canceledBy": null,
+  "details": {
+    "receivedDocuments": 1,
+    "indexedDocuments": 1
+  },
+  "error": null,
+  "duration": "[duration]",
+  "enqueuedAt": "[date]",
+  "startedAt": "[date]",
+  "finishedAt": "[date]"
+}
--- a/meilisearch/tests/vector/snapshots/mod.rs/add_remove_one_vector_4588/settings-processed.snap
+++ b/meilisearch/tests/vector/snapshots/mod.rs/add_remove_one_vector_4588/settings-processed.snap
@@ -0,0 +1,23 @@
+---
+source: meilisearch/tests/vector/mod.rs
+---
+{
+  "uid": 0,
+  "indexUid": "doggo",
+  "status": "succeeded",
+  "type": "settingsUpdate",
+  "canceledBy": null,
+  "details": {
+    "embedders": {
+      "manual": {
+        "source": "userProvided",
+        "dimensions": 3
+      }
+    }
+  },
+  "error": null,
+  "duration": "[duration]",
+  "enqueuedAt": "[date]",
+  "startedAt": "[date]",
+  "finishedAt": "[date]"
+}
--- a/milli/Cargo.toml
+++ b/milli/Cargo.toml
@@ -27,8 +27,7 @@ fst = "0.4.7"
 fxhash = "0.2.1"
 geoutils = "0.5.1"
 grenad = { version = "0.4.6", default-features = false, features = [
-    "rayon",
-    "tempfile",
+    "rayon"
 ] }
 heed = { version = "0.20.1", default-features = false, features = [
    "serde-json",
@@ -79,7 +78,7 @@ hf-hub = { git = "https://github.com/dureuill/hf-hub.git", branch = "rust_tls",
 ] }
 tiktoken-rs = "0.5.8"
 liquid = "0.26.4"
-arroy = "0.3.1"
+arroy = "0.4.0"
 rand = "0.8.5"
 tracing = "0.1.40"
 ureq = { version = "2.9.7", features = ["json"] }
--- a/milli/src/documents/primary_key.rs
+++ b/milli/src/documents/primary_key.rs
@@ -166,7 +166,7 @@ pub fn validate_document_id_value(document_id: Value) -> StdResult<String, UserE
            Some(s) => Ok(s.to_string()),
            None => Err(UserError::InvalidDocumentId { document_id: Value::String(string) }),
        },
-        Value::Number(number) if number.is_i64() => Ok(number.to_string()),
+        Value::Number(number) if !number.is_f64() => Ok(number.to_string()),
        content => Err(UserError::InvalidDocumentId { document_id: content }),
    }
 }
--- a/milli/src/error.rs
+++ b/milli/src/error.rs
@@ -119,6 +119,8 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco
    InvalidVectorDimensions { expected: usize, found: usize },
    #[error("The `_vectors` field in the document with id: `{document_id}` is not an object. Was expecting an object with a key for each embedder with manually provided vectors, but instead got `{value}`")]
    InvalidVectorsMapType { document_id: String, value: Value },
+    #[error("Bad embedder configuration in the document with id: `{document_id}`. {error}")]
+    InvalidVectorsEmbedderConf { document_id: String, error: deserr::errors::JsonError },
    #[error("{0}")]
    InvalidFilter(String),
    #[error("Invalid type for filter subexpression: expected: {}, found: {1}.", .0.join(", "))]
@@ -281,8 +283,9 @@ impl From<arroy::Error> for Error {
            arroy::Error::DatabaseFull
            | arroy::Error::InvalidItemAppend
            | arroy::Error::UnmatchingDistance { .. }
-            | arroy::Error::MissingNode
-            | arroy::Error::MissingMetadata => {
+            | arroy::Error::NeedBuild(_)
+            | arroy::Error::MissingKey { .. }
+            | arroy::Error::MissingMetadata(_) => {
                Error::InternalError(InternalError::ArroyError(value))
            }
        }
--- a/milli/src/index.rs
+++ b/milli/src/index.rs
@@ -1610,7 +1610,7 @@ impl Index {
            arroy::Reader::open(rtxn, k, self.vector_arroy)
                .map(Some)
                .or_else(|e| match e {
-                    arroy::Error::MissingMetadata => Ok(None),
+                    arroy::Error::MissingMetadata(_) => Ok(None),
                    e => Err(e.into()),
                })
                .transpose()
@@ -1643,7 +1643,7 @@ impl Index {
                let reader = arroy::Reader::open(rtxn, embedder_id | (i as u16), self.vector_arroy)
                    .map(Some)
                    .or_else(|e| match e {
-                        arroy::Error::MissingMetadata => Ok(None),
+                        arroy::Error::MissingMetadata(_) => Ok(None),
                        e => Err(e),
                    })
                    .transpose();
--- a/milli/src/search/facet/facet_distribution_iter.rs
+++ b/milli/src/search/facet/facet_distribution_iter.rs
@@ -6,9 +6,11 @@ use heed::Result;
 use roaring::RoaringBitmap;

 use super::{get_first_facet_value, get_highest_level};
-use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec};
+use crate::heed_codec::facet::{
+    FacetGroupKey, FacetGroupKeyCodec, FacetGroupLazyValueCodec, FacetGroupValueCodec,
+};
 use crate::heed_codec::BytesRefCodec;
-use crate::DocumentId;
+use crate::{CboRoaringBitmapCodec, DocumentId};

 /// Call the given closure on the facet distribution of the candidate documents.
 ///
@@ -31,12 +33,9 @@ pub fn lexicographically_iterate_over_facet_distribution<'t, CB>(
 where
    CB: FnMut(&'t [u8], u64, DocumentId) -> Result<ControlFlow<()>>,
 {
+    let db = db.remap_data_type::<FacetGroupLazyValueCodec>();
    let mut fd = LexicographicFacetDistribution { rtxn, db, field_id, callback };
-    let highest_level = get_highest_level(
-        rtxn,
-        db.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>(),
-        field_id,
-    )?;
+    let highest_level = get_highest_level(rtxn, db, field_id)?;

    if let Some(first_bound) = get_first_facet_value::<BytesRefCodec, _>(rtxn, db, field_id)? {
        fd.iterate(candidates, highest_level, first_bound, usize::MAX)?;
@@ -75,11 +74,8 @@ where

    // Represents the list of keys that we must explore.
    let mut heap = BinaryHeap::new();
-    let highest_level = get_highest_level(
-        rtxn,
-        db.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>(),
-        field_id,
-    )?;
+    let db = db.remap_data_type::<FacetGroupLazyValueCodec>();
+    let highest_level = get_highest_level(rtxn, db, field_id)?;

    if let Some(first_bound) = get_first_facet_value::<BytesRefCodec, _>(rtxn, db, field_id)? {
        // We first fill the heap with values from the highest level
@@ -92,7 +88,10 @@ where
            if key.field_id != field_id {
                break;
            }
-            let intersection = value.bitmap & candidates;
+            let intersection = CboRoaringBitmapCodec::intersection_with_serialized(
+                value.bitmap_bytes,
+                candidates,
+            )?;
            let count = intersection.len();
            if count != 0 {
                heap.push(LevelEntry {
@@ -121,7 +120,10 @@ where
                    if key.field_id != field_id {
                        break;
                    }
-                    let intersection = value.bitmap & candidates;
+                    let intersection = CboRoaringBitmapCodec::intersection_with_serialized(
+                        value.bitmap_bytes,
+                        candidates,
+                    )?;
                    let count = intersection.len();
                    if count != 0 {
                        heap.push(LevelEntry {
@@ -146,7 +148,7 @@ where
    CB: FnMut(&'t [u8], u64, DocumentId) -> Result<ControlFlow<()>>,
 {
    rtxn: &'t heed::RoTxn<'t>,
-    db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
+    db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupLazyValueCodec>,
    field_id: u16,
    callback: CB,
 }
@@ -171,7 +173,10 @@ where
            if key.field_id != self.field_id {
                return Ok(ControlFlow::Break(()));
            }
-            let docids_in_common = value.bitmap & candidates;
+            let docids_in_common = CboRoaringBitmapCodec::intersection_with_serialized(
+                value.bitmap_bytes,
+                candidates,
+            )?;
            if !docids_in_common.is_empty() {
                let any_docid_in_common = docids_in_common.min().unwrap();
                match (self.callback)(key.left_bound, docids_in_common.len(), any_docid_in_common)?
@@ -205,7 +210,10 @@ where
            if key.field_id != self.field_id {
                return Ok(ControlFlow::Break(()));
            }
-            let docids_in_common = value.bitmap & candidates;
+            let docids_in_common = CboRoaringBitmapCodec::intersection_with_serialized(
+                value.bitmap_bytes,
+                candidates,
+            )?;
            if !docids_in_common.is_empty() {
                let cf = self.iterate(
                    &docids_in_common,
--- a/milli/src/search/hybrid.rs
+++ b/milli/src/search/hybrid.rs
@@ -17,6 +17,7 @@ struct ScoreWithRatioResult {

 type ScoreWithRatio = (Vec<ScoreDetails>, f32);

+#[tracing::instrument(level = "trace", skip_all, target = "search::hybrid")]
 fn compare_scores(
    &(ref left_scores, left_ratio): &ScoreWithRatio,
    &(ref right_scores, right_ratio): &ScoreWithRatio,
@@ -84,6 +85,7 @@ impl ScoreWithRatioResult {
        }
    }

+    #[tracing::instrument(level = "trace", skip_all, target = "search::hybrid")]
    fn merge(
        vector_results: Self,
        keyword_results: Self,
@@ -150,6 +152,7 @@ impl ScoreWithRatioResult {
 }

 impl<'a> Search<'a> {
+    #[tracing::instrument(level = "trace", skip_all, target = "search::hybrid")]
    pub fn execute_hybrid(&self, semantic_ratio: f32) -> Result<(SearchResult, Option<u32>)> {
        // TODO: find classier way to achieve that than to reset vector and query params
        // create separate keyword and semantic searches
@@ -178,22 +181,25 @@ impl<'a> Search<'a> {

        // completely skip semantic search if the results of the keyword search are good enough
        if self.results_good_enough(&keyword_results, semantic_ratio) {
-            return Ok((keyword_results, Some(0)));
+            return Ok(return_keyword_results(self.limit, self.offset, keyword_results));
        }

        // no vector search against placeholder search
        let Some(query) = search.query.take() else {
-            return Ok((keyword_results, Some(0)));
+            return Ok(return_keyword_results(self.limit, self.offset, keyword_results));
        };
        // no embedder, no semantic search
        let Some(SemanticSearch { vector, embedder_name, embedder }) = semantic else {
-            return Ok((keyword_results, Some(0)));
+            return Ok(return_keyword_results(self.limit, self.offset, keyword_results));
        };

        let vector_query = match vector {
            Some(vector_query) => vector_query,
            None => {
                // attempt to embed the vector
+                let span = tracing::trace_span!(target: "search::hybrid", "embed_one");
+                let _entered = span.enter();
+
                match embedder.embed_one(query) {
                    Ok(embedding) => embedding,
                    Err(error) => {
@@ -239,3 +245,44 @@ impl<'a> Search<'a> {
        true
    }
 }
+
+fn return_keyword_results(
+    limit: usize,
+    offset: usize,
+    SearchResult {
+        matching_words,
+        candidates,
+        mut documents_ids,
+        mut document_scores,
+        degraded,
+        used_negative_operator,
+    }: SearchResult,
+) -> (SearchResult, Option<u32>) {
+    let (documents_ids, document_scores) = if offset >= documents_ids.len() ||
+    // technically redudant because documents_ids.len() == document_scores.len(),
+    // defensive programming
+    offset >= document_scores.len()
+    {
+        (vec![], vec![])
+    } else {
+        // PANICS: offset < len
+        documents_ids.rotate_left(offset);
+        documents_ids.truncate(limit);
+
+        // PANICS: offset < len
+        document_scores.rotate_left(offset);
+        document_scores.truncate(limit);
+        (documents_ids, document_scores)
+    };
+    (
+        SearchResult {
+            matching_words,
+            candidates,
+            documents_ids,
+            document_scores,
+            degraded,
+            used_negative_operator,
+        },
+        Some(0),
+    )
+}
--- a/milli/src/search/mod.rs
+++ b/milli/src/search/mod.rs
@@ -371,4 +371,28 @@ mod test {

        assert_eq!(documents_ids, vec![1]);
    }
+
+    #[cfg(feature = "korean")]
+    #[test]
+    fn test_hangul_language_detection() {
+        use crate::index::tests::TempIndex;
+
+        let index = TempIndex::new();
+
+        index
+            .add_documents(documents!([
+                { "id": 0, "title": "The quick (\"brown\") fox can't jump 32.3 feet, right? Brr, it's 29.3°F!" },
+                { "id": 1, "title": "김밥먹을래。" },
+                { "id": 2, "title": "הַשּׁוּעָל הַמָּהִיר (״הַחוּם״) לֹא יָכוֹל לִקְפֹּץ 9.94 מֶטְרִים, נָכוֹן? ברר, 1.5°C- בַּחוּץ!" }
+            ]))
+            .unwrap();
+
+        let txn = index.write_txn().unwrap();
+        let mut search = Search::new(&txn, &index);
+
+        search.query("김밥");
+        let SearchResult { documents_ids, .. } = search.execute().unwrap();
+
+        assert_eq!(documents_ids, vec![1]);
+    }
 }
--- a/milli/src/search/new/bucket_sort.rs
+++ b/milli/src/search/new/bucket_sort.rs
@@ -213,9 +213,6 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
            continue;
        }

-        let span = tracing::trace_span!(target: "search::bucket_sort", "next_bucket", id = ranking_rules[cur_ranking_rule_index].id());
-        let entered = span.enter();
-
        let Some(next_bucket) = ranking_rules[cur_ranking_rule_index].next_bucket(
            ctx,
            logger,
@@ -225,7 +222,6 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
            back!();
            continue;
        };
-        drop(entered);

        ranking_rule_scores.push(next_bucket.score);

--- a/milli/src/search/new/exact_attribute.rs
+++ b/milli/src/search/new/exact_attribute.rs
@@ -27,6 +27,7 @@ impl<'ctx> RankingRule<'ctx, QueryGraph> for ExactAttribute {
        "exact_attribute".to_owned()
    }

+    #[tracing::instrument(level = "trace", skip_all, target = "search::exact_attribute")]
    fn start_iteration(
        &mut self,
        ctx: &mut SearchContext<'ctx>,
@@ -38,6 +39,7 @@ impl<'ctx> RankingRule<'ctx, QueryGraph> for ExactAttribute {
        Ok(())
    }

+    #[tracing::instrument(level = "trace", skip_all, target = "search::exact_attribute")]
    fn next_bucket(
        &mut self,
        _ctx: &mut SearchContext<'ctx>,
@@ -51,6 +53,7 @@ impl<'ctx> RankingRule<'ctx, QueryGraph> for ExactAttribute {
        Ok(output)
    }

+    #[tracing::instrument(level = "trace", skip_all, target = "search::exact_attribute")]
    fn end_iteration(
        &mut self,
        _ctx: &mut SearchContext<'ctx>,
--- a/milli/src/search/new/geo_sort.rs
+++ b/milli/src/search/new/geo_sort.rs
@@ -209,6 +209,7 @@ impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for GeoSort<Q> {
        "geo_sort".to_owned()
    }

+    #[tracing::instrument(level = "trace", skip_all, target = "search::geo_sort")]
    fn start_iteration(
        &mut self,
        ctx: &mut SearchContext<'ctx>,
@@ -234,6 +235,7 @@ impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for GeoSort<Q> {
        Ok(())
    }

+    #[tracing::instrument(level = "trace", skip_all, target = "search::geo_sort")]
    #[allow(clippy::only_used_in_recursion)]
    fn next_bucket(
        &mut self,
@@ -285,6 +287,7 @@ impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for GeoSort<Q> {
        self.next_bucket(ctx, logger, universe)
    }

+    #[tracing::instrument(level = "trace", skip_all, target = "search::geo_sort")]
    fn end_iteration(&mut self, _ctx: &mut SearchContext<'ctx>, _logger: &mut dyn SearchLogger<Q>) {
        // we do not reset the rtree here, it could be used in a next iteration
        self.query = None;
--- a/milli/src/search/new/graph_based_ranking_rule.rs
+++ b/milli/src/search/new/graph_based_ranking_rule.rs
@@ -127,6 +127,8 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
    fn id(&self) -> String {
        self.id.clone()
    }
+
+    #[tracing::instrument(level = "trace", skip_all, target = "search::graph_based")]
    fn start_iteration(
        &mut self,
        ctx: &mut SearchContext<'ctx>,
@@ -209,6 +211,7 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
        Ok(())
    }

+    #[tracing::instrument(level = "trace", skip_all, target = "search::graph_based")]
    fn next_bucket(
        &mut self,
        ctx: &mut SearchContext<'ctx>,
@@ -358,6 +361,7 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
        Ok(Some(RankingRuleOutput { query: next_query_graph, candidates: bucket, score }))
    }

+    #[tracing::instrument(level = "trace", skip_all, target = "search::graph_based")]
    fn end_iteration(
        &mut self,
        _ctx: &mut SearchContext<'ctx>,
--- a/milli/src/search/new/mod.rs
+++ b/milli/src/search/new/mod.rs
@@ -212,7 +212,7 @@ fn resolve_maximally_reduced_query_graph(
    Ok(docids)
 }

-#[tracing::instrument(level = "trace", skip_all, target = "search")]
+#[tracing::instrument(level = "trace", skip_all, target = "search::universe")]
 fn resolve_universe(
    ctx: &mut SearchContext,
    initial_universe: &RoaringBitmap,
@@ -229,7 +229,7 @@ fn resolve_universe(
    )
 }

-#[tracing::instrument(level = "trace", skip_all, target = "search")]
+#[tracing::instrument(level = "trace", skip_all, target = "search::query")]
 fn resolve_negative_words(
    ctx: &mut SearchContext,
    negative_words: &[Word],
@@ -243,7 +243,7 @@ fn resolve_negative_words(
    Ok(negative_bitmap)
 }

-#[tracing::instrument(level = "trace", skip_all, target = "search")]
+#[tracing::instrument(level = "trace", skip_all, target = "search::query")]
 fn resolve_negative_phrases(
    ctx: &mut SearchContext,
    negative_phrases: &[LocatedQueryTerm],
@@ -548,7 +548,7 @@ fn resolve_sort_criteria<'ctx, Query: RankingRuleQueryTrait>(
    Ok(())
 }

-#[tracing::instrument(level = "trace", skip_all, target = "search")]
+#[tracing::instrument(level = "trace", skip_all, target = "search::universe")]
 pub fn filtered_universe(
    index: &Index,
    txn: &RoTxn<'_>,
@@ -620,7 +620,7 @@ pub fn execute_vector_search(
 }

 #[allow(clippy::too_many_arguments)]
-#[tracing::instrument(level = "trace", skip_all, target = "search")]
+#[tracing::instrument(level = "trace", skip_all, target = "search::main")]
 pub fn execute_search(
    ctx: &mut SearchContext,
    query: Option<&str>,
--- a/milli/src/search/new/ranking_rule_graph/exactness/mod.rs
+++ b/milli/src/search/new/ranking_rule_graph/exactness/mod.rs
@@ -44,6 +44,7 @@ fn compute_docids(
 impl RankingRuleGraphTrait for ExactnessGraph {
    type Condition = ExactnessCondition;

+    #[tracing::instrument(level = "trace", skip_all, target = "search::exactness")]
    fn resolve_condition(
        ctx: &mut SearchContext,
        condition: &Self::Condition,
@@ -71,6 +72,7 @@ impl RankingRuleGraphTrait for ExactnessGraph {
        })
    }

+    #[tracing::instrument(level = "trace", skip_all, target = "search::exactness")]
    fn build_edges(
        _ctx: &mut SearchContext,
        conditions_interner: &mut DedupInterner<Self::Condition>,
@@ -86,6 +88,7 @@ impl RankingRuleGraphTrait for ExactnessGraph {
        Ok(vec![(0, exact_condition), (dest_node.term_ids.len() as u32, skip_condition)])
    }

+    #[tracing::instrument(level = "trace", skip_all, target = "search::exactness")]
    fn rank_to_score(rank: Rank) -> ScoreDetails {
        ScoreDetails::ExactWords(score_details::ExactWords::from_rank(rank))
    }
--- a/milli/src/search/new/ranking_rule_graph/fid/mod.rs
+++ b/milli/src/search/new/ranking_rule_graph/fid/mod.rs
@@ -20,6 +20,7 @@ pub enum FidGraph {}
 impl RankingRuleGraphTrait for FidGraph {
    type Condition = FidCondition;

+    #[tracing::instrument(level = "trace", skip_all, target = "search::fid")]
    fn resolve_condition(
        ctx: &mut SearchContext,
        condition: &Self::Condition,
@@ -44,6 +45,7 @@ impl RankingRuleGraphTrait for FidGraph {
        })
    }

+    #[tracing::instrument(level = "trace", skip_all, target = "search::fid")]
    fn build_edges(
        ctx: &mut SearchContext,
        conditions_interner: &mut DedupInterner<Self::Condition>,
@@ -101,6 +103,7 @@ impl RankingRuleGraphTrait for FidGraph {
        Ok(edges)
    }

+    #[tracing::instrument(level = "trace", skip_all, target = "search::fid")]
    fn rank_to_score(rank: Rank) -> ScoreDetails {
        ScoreDetails::Fid(rank)
    }
--- a/milli/src/search/new/ranking_rule_graph/position/mod.rs
+++ b/milli/src/search/new/ranking_rule_graph/position/mod.rs
@@ -20,6 +20,7 @@ pub enum PositionGraph {}
 impl RankingRuleGraphTrait for PositionGraph {
    type Condition = PositionCondition;

+    #[tracing::instrument(level = "trace", skip_all, target = "search::position")]
    fn resolve_condition(
        ctx: &mut SearchContext,
        condition: &Self::Condition,
@@ -44,6 +45,7 @@ impl RankingRuleGraphTrait for PositionGraph {
        })
    }

+    #[tracing::instrument(level = "trace", skip_all, target = "search::position")]
    fn build_edges(
        ctx: &mut SearchContext,
        conditions_interner: &mut DedupInterner<Self::Condition>,
@@ -117,6 +119,7 @@ impl RankingRuleGraphTrait for PositionGraph {
        Ok(edges)
    }

+    #[tracing::instrument(level = "trace", skip_all, target = "search::position")]
    fn rank_to_score(rank: Rank) -> ScoreDetails {
        ScoreDetails::Position(rank)
    }
--- a/milli/src/search/new/ranking_rule_graph/proximity/mod.rs
+++ b/milli/src/search/new/ranking_rule_graph/proximity/mod.rs
@@ -21,6 +21,7 @@ pub enum ProximityGraph {}
 impl RankingRuleGraphTrait for ProximityGraph {
    type Condition = ProximityCondition;

+    #[tracing::instrument(level = "trace", skip_all, target = "search::proximity")]
    fn resolve_condition(
        ctx: &mut SearchContext,
        condition: &Self::Condition,
@@ -29,6 +30,7 @@ impl RankingRuleGraphTrait for ProximityGraph {
        compute_docids::compute_docids(ctx, condition, universe)
    }

+    #[tracing::instrument(level = "trace", skip_all, target = "search::proximity")]
    fn build_edges(
        ctx: &mut SearchContext,
        conditions_interner: &mut DedupInterner<Self::Condition>,
@@ -38,6 +40,7 @@ impl RankingRuleGraphTrait for ProximityGraph {
        build::build_edges(ctx, conditions_interner, source_term, dest_term)
    }

+    #[tracing::instrument(level = "trace", skip_all, target = "search::proximity")]
    fn rank_to_score(rank: Rank) -> ScoreDetails {
        ScoreDetails::Proximity(rank)
    }
--- a/milli/src/search/new/ranking_rule_graph/typo/mod.rs
+++ b/milli/src/search/new/ranking_rule_graph/typo/mod.rs
@@ -19,6 +19,7 @@ pub enum TypoGraph {}
 impl RankingRuleGraphTrait for TypoGraph {
    type Condition = TypoCondition;

+    #[tracing::instrument(level = "trace", skip_all, target = "search::typo")]
    fn resolve_condition(
        ctx: &mut SearchContext,
        condition: &Self::Condition,
@@ -37,6 +38,7 @@ impl RankingRuleGraphTrait for TypoGraph {
        })
    }

+    #[tracing::instrument(level = "trace", skip_all, target = "search::typo")]
    fn build_edges(
        ctx: &mut SearchContext,
        conditions_interner: &mut DedupInterner<Self::Condition>,
@@ -77,6 +79,7 @@ impl RankingRuleGraphTrait for TypoGraph {
        Ok(edges)
    }

+    #[tracing::instrument(level = "trace", skip_all, target = "search::typo")]
    fn rank_to_score(rank: Rank) -> ScoreDetails {
        ScoreDetails::Typo(score_details::Typo::from_rank(rank))
    }
--- a/milli/src/search/new/ranking_rule_graph/words/mod.rs
+++ b/milli/src/search/new/ranking_rule_graph/words/mod.rs
@@ -18,6 +18,7 @@ pub enum WordsGraph {}
 impl RankingRuleGraphTrait for WordsGraph {
    type Condition = WordsCondition;

+    #[tracing::instrument(level = "trace", skip_all, target = "search::words")]
    fn resolve_condition(
        ctx: &mut SearchContext,
        condition: &Self::Condition,
@@ -36,6 +37,7 @@ impl RankingRuleGraphTrait for WordsGraph {
        })
    }

+    #[tracing::instrument(level = "trace", skip_all, target = "search::words")]
    fn build_edges(
        _ctx: &mut SearchContext,
        conditions_interner: &mut DedupInterner<Self::Condition>,
@@ -45,6 +47,7 @@ impl RankingRuleGraphTrait for WordsGraph {
        Ok(vec![(0, conditions_interner.insert(WordsCondition { term: to_term.clone() }))])
    }

+    #[tracing::instrument(level = "trace", skip_all, target = "search::words")]
    fn rank_to_score(rank: Rank) -> ScoreDetails {
        ScoreDetails::Words(score_details::Words::from_rank(rank))
    }
--- a/milli/src/search/new/sort.rs
+++ b/milli/src/search/new/sort.rs
@@ -88,6 +88,8 @@ impl<'ctx, Query: RankingRuleQueryTrait> RankingRule<'ctx, Query> for Sort<'ctx,
        let Self { field_name, is_ascending, .. } = self;
        format!("{field_name}:{}", if *is_ascending { "asc" } else { "desc" })
    }
+
+    #[tracing::instrument(level = "trace", skip_all, target = "search::sort")]
    fn start_iteration(
        &mut self,
        ctx: &mut SearchContext<'ctx>,
@@ -186,6 +188,7 @@ impl<'ctx, Query: RankingRuleQueryTrait> RankingRule<'ctx, Query> for Sort<'ctx,
        Ok(())
    }

+    #[tracing::instrument(level = "trace", skip_all, target = "search::sort")]
    fn next_bucket(
        &mut self,
        _ctx: &mut SearchContext<'ctx>,
@@ -211,6 +214,7 @@ impl<'ctx, Query: RankingRuleQueryTrait> RankingRule<'ctx, Query> for Sort<'ctx,
        }
    }

+    #[tracing::instrument(level = "trace", skip_all, target = "search::sort")]
    fn end_iteration(
        &mut self,
        _ctx: &mut SearchContext<'ctx>,
--- a/milli/src/search/new/vector_sort.rs
+++ b/milli/src/search/new/vector_sort.rs
@@ -73,6 +73,7 @@ impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for VectorSort<Q> {
        "vector_sort".to_owned()
    }

+    #[tracing::instrument(level = "trace", skip_all, target = "search::vector_sort")]
    fn start_iteration(
        &mut self,
        ctx: &mut SearchContext<'ctx>,
@@ -89,6 +90,7 @@ impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for VectorSort<Q> {
    }

    #[allow(clippy::only_used_in_recursion)]
+    #[tracing::instrument(level = "trace", skip_all, target = "search::vector_sort")]
    fn next_bucket(
        &mut self,
        ctx: &mut SearchContext<'ctx>,
@@ -139,6 +141,7 @@ impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for VectorSort<Q> {
        self.next_bucket(ctx, _logger, universe)
    }

+    #[tracing::instrument(level = "trace", skip_all, target = "search::vector_sort")]
    fn end_iteration(&mut self, _ctx: &mut SearchContext<'ctx>, _logger: &mut dyn SearchLogger<Q>) {
        self.query = None;
    }
--- a/milli/src/update/index_documents/extract/extract_vector_points.rs
+++ b/milli/src/update/index_documents/extract/extract_vector_points.rs
@@ -290,7 +290,7 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
                        regenerate_if_prompt_changed(
                            obkv,
                            (old_prompt, prompt),
-                            (&old_fields_ids_map, &new_fields_ids_map),
+                            (old_fields_ids_map, new_fields_ids_map),
                        )?
                    } else {
                        // we can simply ignore user provided vectors as they are not regenerated and are
@@ -306,7 +306,7 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
                    prompt,
                    (add_to_user_provided, remove_from_user_provided),
                    (old, new),
-                    (&old_fields_ids_map, &new_fields_ids_map),
+                    (old_fields_ids_map, new_fields_ids_map),
                    document_id,
                )?,
            };
--- a/milli/src/update/index_documents/extract/mod.rs
+++ b/milli/src/update/index_documents/extract/mod.rs
@@ -11,7 +11,7 @@ mod extract_word_position_docids;

 use std::fs::File;
 use std::io::BufReader;
-use std::sync::Arc;
+use std::sync::{Arc, OnceLock};

 use crossbeam_channel::Sender;
 use rayon::prelude::*;
@@ -32,7 +32,7 @@ use super::helpers::{as_cloneable_grenad, CursorClonableMmap, GrenadParameters};
 use super::{helpers, TypedChunk};
 use crate::index::IndexEmbeddingConfig;
 use crate::update::settings::InnerIndexSettingsDiff;
-use crate::{FieldId, Result, ThreadPoolNoAbortBuilder};
+use crate::{FieldId, Result, ThreadPoolNoAbort, ThreadPoolNoAbortBuilder};

 /// Extract data for each databases from obkv documents in parallel.
 /// Send data in grenad file over provided Sender.
@@ -207,6 +207,18 @@ fn run_extraction_task<FE, FS, M>(
    })
 }

+fn request_threads() -> &'static ThreadPoolNoAbort {
+    static REQUEST_THREADS: OnceLock<ThreadPoolNoAbort> = OnceLock::new();
+
+    REQUEST_THREADS.get_or_init(|| {
+        ThreadPoolNoAbortBuilder::new()
+            .num_threads(crate::vector::REQUEST_PARALLELISM)
+            .thread_name(|index| format!("embedding-request-{index}"))
+            .build()
+            .unwrap()
+    })
+}
+
 /// Extract chunked data and send it into lmdb_writer_sx sender:
 /// - documents
 fn send_original_documents_data(
@@ -219,11 +231,6 @@ fn send_original_documents_data(
    let original_documents_chunk =
        original_documents_chunk.and_then(|c| unsafe { as_cloneable_grenad(&c) })?;

-    let request_threads = ThreadPoolNoAbortBuilder::new()
-        .num_threads(crate::vector::REQUEST_PARALLELISM)
-        .thread_name(|index| format!("embedding-request-{index}"))
-        .build()?;
-
    let index_vectors = (settings_diff.reindex_vectors() || !settings_diff.settings_update_only())
        // no point in indexing vectors without embedders
        && (!settings_diff.new.embedding_configs.inner_as_ref().is_empty());
@@ -256,7 +263,7 @@ fn send_original_documents_data(
                            prompts,
                            indexer,
                            embedder.clone(),
-                            &request_threads,
+                            request_threads(),
                        ) {
                            Ok(results) => Some(results),
                            Err(error) => {
--- a/milli/src/update/index_documents/mod.rs
+++ b/milli/src/update/index_documents/mod.rs
@@ -325,7 +325,7 @@ where
        let documents_chunk_size = match self.indexer_config.documents_chunk_size {
            Some(chunk_size) => chunk_size,
            None => {
-                let default_chunk_size = 1024 * 1024 * 4; // 4MiB
+                let default_chunk_size = 1024 * 1024 * 1024 * 2; // 2 GiB
                let min_chunk_size = 1024 * 512; // 512KiB

                // compute the chunk size from the number of available threads and the inputed data size.
@@ -547,10 +547,11 @@ where
            pool.install(|| {
                for k in crate::vector::arroy_db_range_for_embedder(embedder_index) {
                    let writer = arroy::Writer::new(vector_arroy, k, dimension);
-                    if writer.is_empty(wtxn)? {
+                    if writer.need_build(wtxn)? {
+                        writer.build(wtxn, &mut rng, None)?;
+                    } else if writer.is_empty(wtxn)? {
                        break;
                    }
-                    writer.build(wtxn, &mut rng, None)?;
                }
                Result::Ok(())
            })
--- a/milli/src/vector/parsed_vectors.rs
+++ b/milli/src/vector/parsed_vectors.rs
@@ -1,5 +1,6 @@
 use std::collections::{BTreeMap, BTreeSet};

+use deserr::{take_cf_content, DeserializeError, Deserr, Sequence};
 use obkv::KvReader;
 use serde_json::{from_slice, Value};

@@ -10,13 +11,44 @@ use crate::{DocumentId, FieldId, InternalError, UserError};

 pub const RESERVED_VECTORS_FIELD_NAME: &str = "_vectors";

-#[derive(serde::Serialize, serde::Deserialize, Debug)]
+#[derive(serde::Serialize, Debug)]
 #[serde(untagged)]
 pub enum Vectors {
    ImplicitlyUserProvided(VectorOrArrayOfVectors),
    Explicit(ExplicitVectors),
 }

+impl<E: DeserializeError> Deserr<E> for Vectors {
+    fn deserialize_from_value<V: deserr::IntoValue>(
+        value: deserr::Value<V>,
+        location: deserr::ValuePointerRef,
+    ) -> Result<Self, E> {
+        match value {
+            deserr::Value::Sequence(_) | deserr::Value::Null => {
+                Ok(Vectors::ImplicitlyUserProvided(VectorOrArrayOfVectors::deserialize_from_value(
+                    value, location,
+                )?))
+            }
+            deserr::Value::Map(_) => {
+                Ok(Vectors::Explicit(ExplicitVectors::deserialize_from_value(value, location)?))
+            }
+
+            value => Err(take_cf_content(E::error(
+                None,
+                deserr::ErrorKind::IncorrectValueKind {
+                    actual: value,
+                    accepted: &[
+                        deserr::ValueKind::Sequence,
+                        deserr::ValueKind::Map,
+                        deserr::ValueKind::Null,
+                    ],
+                },
+                location,
+            ))),
+        }
+    }
+}
+
 impl Vectors {
    pub fn must_regenerate(&self) -> bool {
        match self {
@@ -37,9 +69,11 @@ impl Vectors {
    }
 }

-#[derive(serde::Serialize, serde::Deserialize, Debug)]
+#[derive(serde::Serialize, Deserr, Debug)]
 #[serde(rename_all = "camelCase")]
 pub struct ExplicitVectors {
+    #[serde(default)]
+    #[deserr(default)]
    pub embeddings: Option<VectorOrArrayOfVectors>,
    pub regenerate: bool,
 }
@@ -149,13 +183,20 @@ impl ParsedVectorsDiff {

 pub struct ParsedVectors(pub BTreeMap<String, Vectors>);

+impl<E: DeserializeError> Deserr<E> for ParsedVectors {
+    fn deserialize_from_value<V: deserr::IntoValue>(
+        value: deserr::Value<V>,
+        location: deserr::ValuePointerRef,
+    ) -> Result<Self, E> {
+        let value = <BTreeMap<String, Vectors>>::deserialize_from_value(value, location)?;
+        Ok(ParsedVectors(value))
+    }
+}
+
 impl ParsedVectors {
    pub fn from_bytes(value: &[u8]) -> Result<Self, Error> {
-        let Ok(value) = from_slice(value) else {
-            let value = from_slice(value).map_err(Error::InternalSerdeJson)?;
-            return Err(Error::InvalidMap(value));
-        };
-        Ok(ParsedVectors(value))
+        let value: serde_json::Value = from_slice(value).map_err(Error::InternalSerdeJson)?;
+        deserr::deserialize(value).map_err(|error| Error::InvalidEmbedderConf { error })
    }

    pub fn retain_not_embedded_vectors(&mut self, embedders: &BTreeSet<String>) {
@@ -165,6 +206,7 @@ impl ParsedVectors {

 pub enum Error {
    InvalidMap(Value),
+    InvalidEmbedderConf { error: deserr::errors::JsonError },
    InternalSerdeJson(serde_json::Error),
 }

@@ -174,6 +216,12 @@ impl Error {
            Error::InvalidMap(value) => {
                crate::Error::UserError(UserError::InvalidVectorsMapType { document_id, value })
            }
+            Error::InvalidEmbedderConf { error } => {
+                crate::Error::UserError(UserError::InvalidVectorsEmbedderConf {
+                    document_id,
+                    error,
+                })
+            }
            Error::InternalSerdeJson(error) => {
                crate::Error::InternalError(InternalError::SerdeJson(error))
            }
@@ -194,13 +242,84 @@ fn to_vector_map(
 }

 /// Represents either a vector or an array of multiple vectors.
-#[derive(serde::Serialize, serde::Deserialize, Debug)]
+#[derive(serde::Serialize, Debug)]
 #[serde(transparent)]
 pub struct VectorOrArrayOfVectors {
    #[serde(with = "either::serde_untagged_optional")]
    inner: Option<either::Either<Vec<Embedding>, Embedding>>,
 }

+impl<E: DeserializeError> Deserr<E> for VectorOrArrayOfVectors {
+    fn deserialize_from_value<V: deserr::IntoValue>(
+        value: deserr::Value<V>,
+        location: deserr::ValuePointerRef,
+    ) -> Result<Self, E> {
+        match value {
+            deserr::Value::Null => Ok(VectorOrArrayOfVectors { inner: None }),
+            deserr::Value::Sequence(seq) => {
+                let mut iter = seq.into_iter();
+                match iter.next().map(|v| v.into_value()) {
+                    None => {
+                        // With the strange way serde serialize the `Either`, we must send the left part
+                        // otherwise it'll consider we returned [[]]
+                        Ok(VectorOrArrayOfVectors { inner: Some(either::Either::Left(Vec::new())) })
+                    }
+                    Some(val @ deserr::Value::Sequence(_)) => {
+                        let first = Embedding::deserialize_from_value(val, location.push_index(0))?;
+                        let mut collect = vec![first];
+                        let mut tail = iter
+                            .enumerate()
+                            .map(|(i, v)| {
+                                Embedding::deserialize_from_value(
+                                    v.into_value(),
+                                    location.push_index(i + 1),
+                                )
+                            })
+                            .collect::<Result<Vec<_>, _>>()?;
+                        collect.append(&mut tail);
+
+                        Ok(VectorOrArrayOfVectors { inner: Some(either::Either::Left(collect)) })
+                    }
+                    Some(
+                        val @ deserr::Value::Integer(_)
+                        | val @ deserr::Value::NegativeInteger(_)
+                        | val @ deserr::Value::Float(_),
+                    ) => {
+                        let first = <f32>::deserialize_from_value(val, location.push_index(0))?;
+                        let mut embedding = iter
+                            .enumerate()
+                            .map(|(i, v)| {
+                                <f32>::deserialize_from_value(
+                                    v.into_value(),
+                                    location.push_index(i + 1),
+                                )
+                            })
+                            .collect::<Result<Vec<_>, _>>()?;
+                        embedding.insert(0, first);
+                        Ok(VectorOrArrayOfVectors { inner: Some(either::Either::Right(embedding)) })
+                    }
+                    Some(value) => Err(take_cf_content(E::error(
+                        None,
+                        deserr::ErrorKind::IncorrectValueKind {
+                            actual: value,
+                            accepted: &[deserr::ValueKind::Sequence, deserr::ValueKind::Float],
+                        },
+                        location.push_index(0),
+                    ))),
+                }
+            }
+            value => Err(take_cf_content(E::error(
+                None,
+                deserr::ErrorKind::IncorrectValueKind {
+                    actual: value,
+                    accepted: &[deserr::ValueKind::Sequence, deserr::ValueKind::Null],
+                },
+                location,
+            ))),
+        }
+    }
+}
+
 impl VectorOrArrayOfVectors {
    pub fn into_array_of_vectors(self) -> Option<Vec<Embedding>> {
        match self.inner? {
@@ -234,15 +353,19 @@ impl From<Vec<Embedding>> for VectorOrArrayOfVectors {
 mod test {
    use super::VectorOrArrayOfVectors;

+    fn embedding_from_str(s: &str) -> Result<VectorOrArrayOfVectors, deserr::errors::JsonError> {
+        let value: serde_json::Value = serde_json::from_str(s).unwrap();
+        deserr::deserialize(value)
+    }
+
    #[test]
    fn array_of_vectors() {
-        let null: VectorOrArrayOfVectors = serde_json::from_str("null").unwrap();
-        let empty: VectorOrArrayOfVectors = serde_json::from_str("[]").unwrap();
-        let one: VectorOrArrayOfVectors = serde_json::from_str("[0.1]").unwrap();
-        let two: VectorOrArrayOfVectors = serde_json::from_str("[0.1, 0.2]").unwrap();
-        let one_vec: VectorOrArrayOfVectors = serde_json::from_str("[[0.1, 0.2]]").unwrap();
-        let two_vecs: VectorOrArrayOfVectors =
-            serde_json::from_str("[[0.1, 0.2], [0.3, 0.4]]").unwrap();
+        let null = embedding_from_str("null").unwrap();
+        let empty = embedding_from_str("[]").unwrap();
+        let one = embedding_from_str("[0.1]").unwrap();
+        let two = embedding_from_str("[0.1, 0.2]").unwrap();
+        let one_vec = embedding_from_str("[[0.1, 0.2]]").unwrap();
+        let two_vecs = embedding_from_str("[[0.1, 0.2], [0.3, 0.4]]").unwrap();

        insta::assert_json_snapshot!(null.into_array_of_vectors(), @"null");
        insta::assert_json_snapshot!(empty.into_array_of_vectors(), @"[]");
--- a/rust-toolchain.toml
+++ b/rust-toolchain.toml
@@ -0,0 +1,3 @@
+[toolchain]
+channel = "1.75.0"
+components = ["clippy"]
--- a/workloads/search/embeddings-movies-subset-hf.json
+++ b/workloads/search/embeddings-movies-subset-hf.json
@@ -0,0 +1,171 @@
+{
+  "name": "search-movies-subset-hf-embeddings",
+  "run_count": 2,
+  "target": "search::=trace",
+  "extra_cli_args": [
+    "--max-indexing-threads=4"
+  ],
+  "assets": {
+    "movies-100.json": {
+      "local_location": null,
+      "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/movies-100.json",
+      "sha256": "d215e395e4240f12f03b8f1f68901eac82d9e7ded5b462cbf4a6b8efde76c6c6"
+    }
+  },
+  "precommands": [
+    {
+      "route": "experimental-features",
+      "method": "PATCH",
+      "body": {
+        "inline": {
+          "vectorStore": true
+        }
+      },
+      "synchronous": "DontWait"
+    },
+    {
+      "route": "indexes/movies/settings",
+      "method": "PATCH",
+      "body": {
+        "inline": {
+          "searchableAttributes": [
+            "title",
+            "overview"
+          ],
+          "filterableAttributes": [
+            "genres",
+            "release_date"
+          ],
+          "sortableAttributes": [
+            "release_date"
+          ],
+          "searchCutoffMs": 15000
+        }
+      },
+      "synchronous": "WaitForTask"
+    },
+    {
+      "route": "indexes/movies/settings",
+      "method": "PATCH",
+      "body": {
+        "inline": {
+          "embedders": {
+            "default": {
+              "source": "huggingFace",
+              "documentTemplate": "A movie titled '{{doc.title}}' whose description starts with {{doc.overview|truncatewords: 20}}"
+            }
+          }
+        }
+      },
+      "synchronous": "WaitForTask"
+    },
+    {
+      "route": "indexes/movies/documents",
+      "method": "POST",
+      "body": {
+        "asset": "movies-100.json"
+      },
+      "synchronous": "WaitForTask"
+    }
+  ],
+  "commands": [
+    {
+      "route": "indexes/movies/search",
+      "method": "POST",
+      "body": {
+        "inline": {
+          "q": "puppy cute comforting movie",
+          "limit": 100,
+          "hybrid": {
+            "semanticRatio": 0.1
+          }
+        }
+      },
+      "synchronous": "WaitForResponse"
+    },
+    {
+      "route": "indexes/movies/search",
+      "method": "POST",
+      "body": {
+        "inline": {
+          "q": "puppy cute comforting movie",
+          "limit": 100,
+          "hybrid": {
+            "semanticRatio": 0.5
+          }
+        }
+      },
+      "synchronous": "WaitForResponse"
+    },
+    {
+      "route": "indexes/movies/search",
+      "method": "POST",
+      "body": {
+        "inline": {
+          "q": "puppy cute comforting movie",
+          "limit": 100,
+          "hybrid": {
+            "semanticRatio": 0.9
+          }
+        }
+      },
+      "synchronous": "WaitForResponse"
+    },
+    {
+      "route": "indexes/movies/search",
+      "method": "POST",
+      "body": {
+        "inline": {
+          "q": "puppy cute comforting movie",
+          "limit": 100,
+          "hybrid": {
+            "semanticRatio": 1.0
+          }
+        }
+      },
+      "synchronous": "WaitForResponse"
+    },
+    {
+      "route": "indexes/movies/search",
+      "method": "POST",
+      "body": {
+        "inline": {
+          "q": "shrek",
+          "limit": 100,
+          "hybrid": {
+            "semanticRatio": 1.0
+          }
+        }
+      },
+      "synchronous": "WaitForResponse"
+    },
+    {
+      "route": "indexes/movies/search",
+      "method": "POST",
+      "body": {
+        "inline": {
+          "q": "shrek",
+          "limit": 100,
+          "hybrid": {
+            "semanticRatio": 0.5
+          }
+        }
+      },
+      "synchronous": "WaitForResponse"
+    },
+    {
+      "route": "indexes/movies/search",
+      "method": "POST",
+      "body": {
+        "inline": {
+          "q": "shrek",
+          "limit": 100,
+          "hybrid": {
+            "semanticRatio": 0.1
+          }
+        }
+      },
+      "synchronous": "WaitForResponse"
+    }
+  ]
+}
--- a/workloads/search/filterable-movies.json
+++ b/workloads/search/filterable-movies.json
@@ -0,0 +1,94 @@
+{
+  "name": "search-sortable-movies.json",
+  "run_count": 10,
+  "target": "search::=trace",
+  "extra_cli_args": [],
+  "assets": {
+    "movies.json": {
+      "local_location": null,
+      "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/movies.json",
+      "sha256": "5b6e4cb660bc20327776e8a33ea197b43d9ec84856710ead1cc87ab24df77de1"
+    }
+  },
+  "precommands": [
+    {
+      "route": "indexes/movies/settings",
+      "method": "PATCH",
+      "body": {
+        "inline": {
+          "searchableAttributes": [
+            "title",
+            "overview"
+          ],
+          "filterableAttributes": [
+            "genres",
+            "release_date"
+          ],
+          "sortableAttributes": [
+            "release_date"
+          ],
+          "searchCutoffMs": 15000
+        }
+      },
+      "synchronous": "DontWait"
+    },
+    {
+      "route": "indexes/movies/documents",
+      "method": "POST",
+      "body": {
+        "asset": "movies.json"
+      },
+      "synchronous": "WaitForTask"
+    }
+  ],
+  "commands": [
+    {
+      "route": "indexes/movies/search",
+      "method": "POST",
+      "body": {
+        "inline": {
+          "q": "",
+          "limit": 100,
+          "filter": "genres IN [action, comedy, adventure] AND release_date = 233366400"
+        }
+      },
+      "synchronous": "WaitForResponse"
+    },
+    {
+      "route": "indexes/movies/search",
+      "method": "POST",
+      "body": {
+        "inline": {
+          "q": "Batman returns",
+          "limit": 100,
+          "filter": "genres IN [action, comedy, adventure] AND release_date > 233366400"
+        }
+      },
+      "synchronous": "WaitForResponse"
+    },
+    {
+      "route": "indexes/movies/search",
+      "method": "POST",
+      "body": {
+        "inline": {
+          "q": "the",
+          "limit": 100,
+          "filter": "genres IN [animation, comedy, adventure] AND release_date < 233366400"
+        }
+      },
+      "synchronous": "WaitForResponse"
+    },
+    {
+      "route": "indexes/movies/search",
+      "method": "POST",
+      "body": {
+        "inline": {
+          "q": "t",
+          "limit": 100,
+          "filter": "genres = Family AND release_date <= 233366400 OR release_date >= 1054252800"
+        }
+      },
+      "synchronous": "WaitForResponse"
+    }
+  ]
+}
--- a/workloads/search/geosort.json
+++ b/workloads/search/geosort.json
@@ -0,0 +1,340 @@
+{
+    "name": "search-geosort.jsonl_1M",
+    "run_count": 3,
+    "target": "search::=trace",
+    "extra_cli_args": [],
+    "assets": {
+        "smol-all-countries-100k.jsonl": {
+            "local_location": null,
+            "format": "NdJson",
+            "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/smol-all-countries/smol-all-countries-100k.jsonl",
+            "sha256": "d00924689abc02d09ec4667cc5a18364ff7bc236bad51367f34b9184b945ece3"
+        },
+        "smol-all-countries-200k.jsonl": {
+            "local_location": null,
+            "format": "NdJson",
+            "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/smol-all-countries/smol-all-countries-200k.jsonl",
+            "sha256": "2a215b43b35d596d9da4f1071deab9002a93602e6dbf1308fba53eb89d9c5a9e"
+        },
+        "smol-all-countries-300k.jsonl": {
+            "local_location": null,
+            "format": "NdJson",
+            "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/smol-all-countries/smol-all-countries-300k.jsonl",
+            "sha256": "91d94d78eeb10d631557a5ccf775e74a41d14ccaff4d7121dd90c7aa35534f2b"
+        },
+        "smol-all-countries-400k.jsonl": {
+            "local_location": null,
+            "format": "NdJson",
+            "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/smol-all-countries/smol-all-countries-400k.jsonl",
+            "sha256": "ee883a353b571f35f4abb79b95cfa628f3f1c582919dd658a388b220f97fe035"
+        },
+        "smol-all-countries-500k.jsonl": {
+            "local_location": null,
+            "format": "NdJson",
+            "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/smol-all-countries/smol-all-countries-500k.jsonl",
+            "sha256": "5be254ce4c50db12b7f1795859b8bbdcbc2ec22bccb3a1898899bd4c4765a1bf"
+        },
+        "smol-all-countries-600k.jsonl": {
+            "local_location": null,
+            "format": "NdJson",
+            "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/smol-all-countries/smol-all-countries-600k.jsonl",
+            "sha256": "3aa91afe3361f5185c142125dfcdc8ddcb7d39fdeeeb4f5e67439511905e9826"
+        },
+        "smol-all-countries-700k.jsonl": {
+            "local_location": null,
+            "format": "NdJson",
+            "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/smol-all-countries/smol-all-countries-700k.jsonl",
+            "sha256": "5a864a1e9d89736147a8da594e2cbce5264979326d38655d0945d8447f3867b3"
+        },
+        "smol-all-countries-800k.jsonl": {
+            "local_location": null,
+            "format": "NdJson",
+            "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/smol-all-countries/smol-all-countries-800k.jsonl",
+            "sha256": "d85eb9c85a612fd7b77623e162ecd0f8265ba3be97054e26b9cff7c48735809b"
+        },
+        "smol-all-countries-900k.jsonl": {
+            "local_location": null,
+            "format": "NdJson",
+            "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/smol-all-countries/smol-all-countries-900k.jsonl",
+            "sha256": "4fd6662e8b9bfcd9fad7d5dcd691a47ec985d810d1e340465c056ee84e9c40f3"
+        },
+        "smol-all-countries-1M.jsonl": {
+            "local_location": null,
+            "format": "NdJson",
+            "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/smol-all-countries/smol-all-countries-1M.jsonl",
+            "sha256": "585a713b489b154b94e7c07707bd369f888c7fe24eb90bf604578d7adf51a9e6"
+        }
+    },
+    "precommands": [
+        {
+            "route": "indexes/movies/settings",
+            "method": "PATCH",
+            "body": {
+                "inline": {
+                    "displayedAttributes": [
+                        "geonameid",
+                        "name",
+                        "asciiname",
+                        "alternatenames",
+                        "_geo",
+                        "population"
+                    ],
+                    "searchableAttributes": [
+                        "name",
+                        "alternatenames",
+                        "elevation"
+                    ],
+                    "filterableAttributes": [
+                        "_geo",
+                        "population",
+                        "elevation"
+                    ],
+                    "sortableAttributes": [
+                        "_geo",
+                        "population",
+                        "elevation"
+                    ],
+                    "searchCutoffMs": 15000
+                }
+            },
+            "synchronous": "DontWait"
+        },
+        {
+            "route": "indexes/movies/documents",
+            "method": "POST",
+            "body": {
+                "asset": "smol-all-countries-100k.jsonl"
+            },
+            "synchronous": "WaitForTask"
+        },
+        {
+            "route": "indexes/movies/documents",
+            "method": "POST",
+            "body": {
+                "asset": "smol-all-countries-200k.jsonl"
+            },
+            "synchronous": "WaitForResponse"
+        },
+        {
+            "route": "indexes/movies/documents",
+            "method": "POST",
+            "body": {
+                "asset": "smol-all-countries-300k.jsonl"
+            },
+            "synchronous": "WaitForResponse"
+        },
+        {
+            "route": "indexes/movies/documents",
+            "method": "POST",
+            "body": {
+                "asset": "smol-all-countries-400k.jsonl"
+            },
+            "synchronous": "WaitForResponse"
+        },
+        {
+            "route": "indexes/movies/documents",
+            "method": "POST",
+            "body": {
+                "asset": "smol-all-countries-500k.jsonl"
+            },
+            "synchronous": "WaitForResponse"
+        },
+        {
+            "route": "indexes/movies/documents",
+            "method": "POST",
+            "body": {
+                "asset": "smol-all-countries-600k.jsonl"
+            },
+            "synchronous": "WaitForResponse"
+        },
+        {
+            "route": "indexes/movies/documents",
+            "method": "POST",
+            "body": {
+                "asset": "smol-all-countries-700k.jsonl"
+            },
+            "synchronous": "WaitForResponse"
+        },
+        {
+            "route": "indexes/movies/documents",
+            "method": "POST",
+            "body": {
+                "asset": "smol-all-countries-800k.jsonl"
+            },
+            "synchronous": "WaitForResponse"
+        },
+        {
+            "route": "indexes/movies/documents",
+            "method": "POST",
+            "body": {
+                "asset": "smol-all-countries-900k.jsonl"
+            },
+            "synchronous": "WaitForResponse"
+        },
+        {
+            "route": "indexes/movies/documents",
+            "method": "POST",
+            "body": {
+                "asset": "smol-all-countries-1M.jsonl"
+            },
+            "synchronous": "WaitForTask"
+        }
+    ],
+    "commands": [
+        {
+            "route": "indexes/movies/search",
+            "method": "POST",
+            "body": {
+                "inline": {
+                    "q": "",
+                    "limit": 100
+                }
+            },
+            "synchronous": "WaitForResponse"
+        },
+        {
+            "route": "indexes/movies/search",
+            "method": "POST",
+            "body": {
+                "inline": {
+                    "limit": 100,
+                    "sort": [
+                        "_geoPoint(50.62999333378238, 3.086269263384099):asc"
+                    ]
+                }
+            },
+            "synchronous": "WaitForResponse"
+        },
+        {
+            "route": "indexes/movies/search",
+            "method": "POST",
+            "body": {
+                "inline": {
+                    "limit": 100,
+                    "sort": [
+                        "_geoPoint(50.62999333378238, 3.086269263384099):desc"
+                    ]
+                }
+            },
+            "synchronous": "WaitForResponse"
+        },
+        {
+            "route": "indexes/movies/search",
+            "method": "POST",
+            "body": {
+                "inline": {
+                    "limit": 100,
+                    "sort": [
+                        "_geoPoint(35.749512532692144, 139.61664952543356):asc"
+                    ]
+                }
+            },
+            "synchronous": "WaitForResponse"
+        },
+        {
+            "route": "indexes/movies/search",
+            "method": "POST",
+            "body": {
+                "inline": {
+                    "limit": 100,
+                    "sort": [
+                        "_geoPoint(35.749512532692144, 139.61664952543356):desc"
+                    ]
+                }
+            },
+            "synchronous": "WaitForResponse"
+        },
+        {
+            "route": "indexes/movies/search",
+            "method": "POST",
+            "body": {
+                "inline": {
+                    "limit": 100,
+                    "sort": [
+                        "_geoPoint(-48.87561645055408, -123.39275749319793):asc"
+                    ]
+                }
+            },
+            "synchronous": "WaitForResponse"
+        },
+        {
+            "route": "indexes/movies/search",
+            "method": "POST",
+            "body": {
+                "inline": {
+                    "limit": 100,
+                    "sort": [
+                        "_geoPoint(-48.87561645055408, -123.39275749319793):desc"
+                    ]
+                }
+            },
+            "synchronous": "WaitForResponse"
+        },
+        {
+            "route": "indexes/movies/search",
+            "method": "POST",
+            "body": {
+                "inline": {
+                    "limit": 100,
+                    "filter": "_geoRadius(50.62999333378238, 3.086269263384099, 100000)"
+                }
+            },
+            "synchronous": "WaitForResponse"
+        },
+        {
+            "route": "indexes/movies/search",
+            "method": "POST",
+            "body": {
+                "inline": {
+                    "limit": 100,
+                    "filter": "_geoRadius(50.62999333378238, 3.086269263384099, 1000)"
+                }
+            },
+            "synchronous": "WaitForResponse"
+        },
+        {
+            "route": "indexes/movies/search",
+            "method": "POST",
+            "body": {
+                "inline": {
+                    "limit": 100,
+                    "filter": "_geoRadius(35.749512532692144, 139.61664952543356, 100000)"
+                }
+            },
+            "synchronous": "WaitForResponse"
+        },
+        {
+            "route": "indexes/movies/search",
+            "method": "POST",
+            "body": {
+                "inline": {
+                    "limit": 100,
+                    "filter": "_geoRadius(35.749512532692144, 139.61664952543356, 1000)"
+                }
+            },
+            "synchronous": "WaitForResponse"
+        },
+        {
+            "route": "indexes/movies/search",
+            "method": "POST",
+            "body": {
+                "inline": {
+                    "limit": 100,
+                    "filter": "_geoRadius(-48.87561645055408, -123.39275749319793, 100000)"
+                }
+            },
+            "synchronous": "WaitForResponse"
+        },
+        {
+            "route": "indexes/movies/search",
+            "method": "POST",
+            "body": {
+                "inline": {
+                    "limit": 100,
+                    "filter": "_geoRadius(-48.87561645055408, -123.39275749319793, 1000)"
+                }
+            },
+            "synchronous": "WaitForResponse"
+        }
+    ]
+}
--- a/workloads/search/hackernews.json
+++ b/workloads/search/hackernews.json
@@ -0,0 +1,255 @@
+{
+  "name": "search-hackernews.ndjson_1M",
+  "run_count": 3,
+  "target": "search::=trace",
+  "extra_cli_args": [],
+  "assets": {
+    "hackernews-100_000.ndjson": {
+      "local_location": null,
+      "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/hackernews-100_000.ndjson",
+      "sha256": "60ecd23485d560edbd90d9ca31f0e6dba1455422f2a44e402600fbb5f7f1b213"
+    },
+    "hackernews-200_000.ndjson": {
+      "local_location": null,
+      "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/hackernews-200_000.ndjson",
+      "sha256": "785b0271fdb47cba574fab617d5d332276b835c05dd86e4a95251cf7892a1685"
+    },
+    "hackernews-300_000.ndjson": {
+      "local_location": null,
+      "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/hackernews-300_000.ndjson",
+      "sha256": "de73c7154652eddfaf69cdc3b2f824d5c452f095f40a20a1c97bb1b5c4d80ab2"
+    },
+    "hackernews-400_000.ndjson": {
+      "local_location": null,
+      "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/hackernews-400_000.ndjson",
+      "sha256": "c1b00a24689110f366447e434c201c086d6f456d54ed1c4995894102794d8fe7"
+    },
+    "hackernews-500_000.ndjson": {
+      "local_location": null,
+      "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/hackernews-500_000.ndjson",
+      "sha256": "ae98f9dbef8193d750e3e2dbb6a91648941a1edca5f6e82c143e7996f4840083"
+    },
+    "hackernews-600_000.ndjson": {
+      "local_location": null,
+      "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/hackernews-600_000.ndjson",
+      "sha256": "b495fdc72c4a944801f786400f22076ab99186bee9699f67cbab2f21f5b74dbe"
+    },
+    "hackernews-700_000.ndjson": {
+      "local_location": null,
+      "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/hackernews-700_000.ndjson",
+      "sha256": "4b2c63974f3dabaa4954e3d4598b48324d03c522321ac05b0d583f36cb78a28b"
+    },
+    "hackernews-800_000.ndjson": {
+      "local_location": null,
+      "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/hackernews-800_000.ndjson",
+      "sha256": "cb7b6afe0e6caa1be111be256821bc63b0771b2a0e1fad95af7aaeeffd7ba546"
+    },
+    "hackernews-900_000.ndjson": {
+      "local_location": null,
+      "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/hackernews-900_000.ndjson",
+      "sha256": "e1154ddcd398f1c867758a93db5bcb21a07b9e55530c188a2917fdef332d3ba9"
+    },
+    "hackernews-1_000_000.ndjson": {
+      "local_location": null,
+      "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/hackernews-1_000_000.ndjson",
+      "sha256": "27e25efd0b68b159b8b21350d9af76938710cb29ce0393fa71b41c4f3c630ffe"
+    }
+  },
+  "precommands": [
+    {
+      "route": "indexes/movies/settings",
+      "method": "PATCH",
+      "body": {
+        "inline": {
+          "displayedAttributes": [
+            "title",
+            "by",
+            "score",
+            "time"
+          ],
+          "searchableAttributes": [
+            "title"
+          ],
+          "filterableAttributes": [
+            "by"
+          ],
+          "sortableAttributes": [
+            "score",
+            "time"
+          ],
+          "rankingRules": [
+            "sort",
+            "words",
+            "typo",
+            "proximity",
+            "attribute",
+            "exactness"
+          ],
+          "searchCutoffMs": 15000
+        }
+      },
+      "synchronous": "WaitForTask"
+    },
+    {
+      "route": "indexes/movies/documents",
+      "method": "POST",
+      "body": {
+        "asset": "hackernews-100_000.ndjson"
+      },
+      "synchronous": "WaitForTask"
+    },
+    {
+      "route": "indexes/movies/documents",
+      "method": "POST",
+      "body": {
+        "asset": "hackernews-200_000.ndjson"
+      },
+      "synchronous": "WaitForResponse"
+    },
+    {
+      "route": "indexes/movies/documents",
+      "method": "POST",
+      "body": {
+        "asset": "hackernews-300_000.ndjson"
+      },
+      "synchronous": "WaitForResponse"
+    },
+    {
+      "route": "indexes/movies/documents",
+      "method": "POST",
+      "body": {
+        "asset": "hackernews-400_000.ndjson"
+      },
+      "synchronous": "WaitForResponse"
+    },
+    {
+      "route": "indexes/movies/documents",
+      "method": "POST",
+      "body": {
+        "asset": "hackernews-500_000.ndjson"
+      },
+      "synchronous": "WaitForResponse"
+    },
+    {
+      "route": "indexes/movies/documents",
+      "method": "POST",
+      "body": {
+        "asset": "hackernews-600_000.ndjson"
+      },
+      "synchronous": "WaitForResponse"
+    },
+    {
+      "route": "indexes/movies/documents",
+      "method": "POST",
+      "body": {
+        "asset": "hackernews-700_000.ndjson"
+      },
+      "synchronous": "WaitForResponse"
+    },
+    {
+      "route": "indexes/movies/documents",
+      "method": "POST",
+      "body": {
+        "asset": "hackernews-800_000.ndjson"
+      },
+      "synchronous": "WaitForResponse"
+    },
+    {
+      "route": "indexes/movies/documents",
+      "method": "POST",
+      "body": {
+        "asset": "hackernews-900_000.ndjson"
+      },
+      "synchronous": "WaitForResponse"
+    },
+    {
+      "route": "indexes/movies/documents",
+      "method": "POST",
+      "body": {
+        "asset": "hackernews-1_000_000.ndjson"
+      },
+      "synchronous": "WaitForTask"
+    }
+  ],
+  "commands": [
+    {
+      "route": "indexes/movies/search",
+      "method": "POST",
+      "body": {
+        "inline": {
+          "q": "rust meilisearch",
+          "limit": 100,
+          "filter": "by = tpayet",
+          "sort": [
+            "score:desc",
+            "time:asc"
+          ]
+        }
+      },
+      "synchronous": "WaitForResponse"
+    },
+    {
+      "route": "indexes/movies/search",
+      "method": "POST",
+      "body": {
+        "inline": {
+          "q": "rust meilisearch",
+          "limit": 100,
+          "filter": "NOT by = tpayet",
+          "sort": [
+            "score:desc",
+            "time:asc"
+          ]
+        }
+      },
+      "synchronous": "WaitForResponse"
+    },
+    {
+      "route": "indexes/movies/search",
+      "method": "POST",
+      "body": {
+        "inline": {
+          "q": "meilisearch",
+          "limit": 100,
+          "sort": [
+            "score:desc",
+            "time:desc"
+          ]
+        }
+      },
+      "synchronous": "WaitForResponse"
+    },
+    {
+      "route": "indexes/movies/search",
+      "method": "POST",
+      "body": {
+        "inline": {
+          "q": "rust",
+          "limit": 100,
+          "filter": "by = dang",
+          "sort": [
+            "score:desc",
+            "time:asc"
+          ]
+        }
+      },
+      "synchronous": "WaitForResponse"
+    },
+    {
+      "route": "indexes/movies/search",
+      "method": "POST",
+      "body": {
+        "inline": {
+          "q": "combinator YC",
+          "limit": 100,
+          "filter": "by = dang",
+          "sort": [
+            "score:desc",
+            "time:asc"
+          ]
+        }
+      },
+      "synchronous": "WaitForResponse"
+    }
+  ]
+}
--- a/workloads/search/movies.json
+++ b/workloads/search/movies.json
@@ -0,0 +1,90 @@
+{
+  "name": "search-movies.json",
+  "run_count": 10,
+  "target": "search::=trace",
+  "extra_cli_args": [],
+  "assets": {
+    "movies.json": {
+      "local_location": null,
+      "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/movies.json",
+      "sha256": "5b6e4cb660bc20327776e8a33ea197b43d9ec84856710ead1cc87ab24df77de1"
+    }
+  },
+  "precommands": [
+    {
+      "route": "indexes/movies/settings",
+      "method": "PATCH",
+      "body": {
+        "inline": {
+          "searchableAttributes": [
+            "title",
+            "overview"
+          ],
+          "filterableAttributes": [
+            "genres",
+            "release_date"
+          ],
+          "sortableAttributes": [
+            "release_date"
+          ],
+          "searchCutoffMs": 15000
+        }
+      },
+      "synchronous": "DontWait"
+    },
+    {
+      "route": "indexes/movies/documents",
+      "method": "POST",
+      "body": {
+        "asset": "movies.json"
+      },
+      "synchronous": "WaitForTask"
+    }
+  ],
+  "commands": [
+    {
+      "route": "indexes/movies/search",
+      "method": "POST",
+      "body": {
+        "inline": {
+          "q": "",
+          "limit": 100
+        }
+      },
+      "synchronous": "WaitForResponse"
+    },
+    {
+      "route": "indexes/movies/search",
+      "method": "POST",
+      "body": {
+        "inline": {
+          "q": "Batman returns",
+          "limit": 100
+        }
+      },
+      "synchronous": "WaitForResponse"
+    },
+    {
+      "route": "indexes/movies/search",
+      "method": "POST",
+      "body": {
+        "inline": {
+          "limit": 100,
+          "q": "the"
+        }
+      },
+      "synchronous": "WaitForResponse"
+    },
+    {
+      "route": "indexes/movies/search",
+      "method": "POST",
+      "body": {
+        "inline": {
+          "limit": 100,
+          "q": "t"
+        }
+      },
+      "synchronous": "WaitForResponse"
+    }
+  ]
+}
--- a/workloads/search/sortable-movies.json
+++ b/workloads/search/sortable-movies.json
@@ -0,0 +1,110 @@
+{
+  "name": "search-sortable-movies.json",
+  "run_count": 10,
+  "target": "search::=trace",
+  "extra_cli_args": [],
+  "assets": {
+    "movies.json": {
+      "local_location": null,
+      "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/movies.json",
+      "sha256": "5b6e4cb660bc20327776e8a33ea197b43d9ec84856710ead1cc87ab24df77de1"
+    }
+  },
+  "precommands": [
+    {
+      "route": "indexes/movies/settings",
+      "method": "PATCH",
+      "body": {
+        "inline": {
+          "searchableAttributes": [
+            "title",
+            "overview"
+          ],
+          "filterableAttributes": [
+            "genres",
+            "release_date"
+          ],
+          "sortableAttributes": [
+            "release_date"
+          ],
+          "rankingRules": [
+            "sort",
+            "words",
+            "typo",
+            "proximity",
+            "attribute",
+            "exactness"
+          ],
+          "searchCutoffMs": 15000
+        }
+      },
+      "synchronous": "DontWait"
+    },
+    {
+      "route": "indexes/movies/documents",
+      "method": "POST",
+      "body": {
+        "asset": "movies.json"
+      },
+      "synchronous": "WaitForTask"
+    }
+  ],
+  "commands": [
+    {
+      "route": "indexes/movies/search",
+      "method": "POST",
+      "body": {
+        "inline": {
+          "q": "",
+          "limit": 100,
+          "sort": [
+            "release_date:asc"
+          ]
+        }
+      },
+      "synchronous": "WaitForResponse"
+    },
+    {
+      "route": "indexes/movies/search",
+      "method": "POST",
+      "body": {
+        "inline": {
+          "q": "Batman returns",
+          "limit": 100,
+          "sort": [
+            "release_date:desc"
+          ]
+        }
+      },
+      "synchronous": "WaitForResponse"
+    },
+    {
+      "route": "indexes/movies/search",
+      "method": "POST",
+      "body": {
+        "inline": {
+          "q": "the",
+          "limit": 100,
+          "sort": [
+            "release_date:asc"
+          ]
+        }
+      },
+      "synchronous": "WaitForResponse"
+    },
+    {
+      "route": "indexes/movies/search",
+      "method": "POST",
+      "body": {
+        "inline": {
+          "q": "t",
+          "limit": 100,
+          "sort": [
+            "release_date:asc"
+          ]
+        }
+      },
+      "synchronous": "WaitForResponse"
+    }
+  ]
+}
--- a/xtask/src/bench/workload.rs
+++ b/xtask/src/bench/workload.rs
@@ -23,6 +23,8 @@ pub struct Workload {
    pub extra_cli_args: Vec<String>,
    pub assets: BTreeMap<String, Asset>,
    #[serde(default)]
+    pub target: String,
+    #[serde(default)]
    pub precommands: Vec<super::command::Command>,
    pub commands: Vec<super::command::Command>,
 }
@@ -54,7 +56,7 @@ async fn run_commands(
    let trace_filename = format!("{report_folder}/{workload_name}-{run_number}-trace.json");
    let report_filename = format!("{report_folder}/{workload_name}-{run_number}-report.json");

-    let report_handle = start_report(logs_client, trace_filename).await?;
+    let report_handle = start_report(logs_client, trace_filename, &workload.target).await?;

    for batch in workload
        .commands
@@ -160,7 +162,11 @@ async fn execute_run(
 async fn start_report(
    logs_client: &Client,
    filename: String,
+    target: &str,
 ) -> anyhow::Result<tokio::task::JoinHandle<anyhow::Result<std::fs::File>>> {
+    const DEFAULT_TARGET: &str = "indexing::=trace";
+    let target = if target.is_empty() { DEFAULT_TARGET } else { target };
+
    let report_file = std::fs::File::options()
        .create(true)
        .truncate(true)
@@ -174,7 +180,7 @@ async fn start_report(
        .post("")
        .json(&json!({
            "mode": "profile",
-            "target": "indexing::=trace"
+            "target": target,
        }))
        .send()
        .await