Update grenad

Actually abort in case of corrupted index
Changes for tracking issue 138
2025-11-26 07:40:31 +00:00 · 2024-04-09 09:31:23 +02:00 · 2024-04-04 11:02:54 +02:00 · 2024-04-04 10:22:49 +02:00 · 2024-03-25 16:32:50 +01:00 · 2024-03-13 16:23:32 +00:00
79 changed files with 4415 additions and 990 deletions
--- a/.cargo/config.toml
+++ b/.cargo/config.toml
@@ -1,2 +1,2 @@
 [alias]
-xtask = "run --package xtask --"
+xtask = "run --release --package xtask --"
--- a/.github/workflows/bench-manual.yml
+++ b/.github/workflows/bench-manual.yml
@@ -0,0 +1,30 @@
+name: Bench (manual)
+
+on:
+    workflow_dispatch:
+        inputs:
+            workload:
+                description: 'The path to the workloads to execute (workloads/...)'
+                required: true
+                default: 'workloads/movies.json'
+
+env:
+    WORKLOAD_NAME: ${{ github.event.inputs.workload }}
+
+jobs:
+    benchmarks:
+        name: Run and upload benchmarks
+        runs-on: benchmarks
+        timeout-minutes: 180 # 3h
+        steps:
+            - uses: actions/checkout@v3
+            - uses: actions-rs/toolchain@v1
+              with:
+                profile: minimal
+                toolchain: stable
+                override: true
+
+            - name: Run benchmarks - workload ${WORKLOAD_NAME} - branch ${{ github.ref }} - commit ${{ github.sha }}
+              run: |
+               cargo xtask bench --api-key "${{ secrets.BENCHMARK_API_KEY }}" --dashboard-url "${{ vars.BENCHMARK_DASHBOARD_URL }}" --reason "Manual [Run #${{ github.run_id }}](https://github.com/meilisearch/meilisearch/actions/runs/${{ github.run_id }})" -- ${WORKLOAD_NAME}
+
--- a/.github/workflows/bench-pr.yml
+++ b/.github/workflows/bench-pr.yml
@@ -0,0 +1,46 @@
+name: Bench (PR)
+on:
+    issue_comment:
+        types: [created]
+
+permissions:
+    issues: write
+
+env:
+    GH_TOKEN: ${{ secrets.MEILI_BOT_GH_PAT }}
+
+jobs:
+    run-benchmarks-on-comment:
+      if: startsWith(github.event.comment.body, '/bench')
+      name: Run and upload benchmarks
+      runs-on: benchmarks
+      timeout-minutes: 180 # 3h
+      steps:
+        - name: Check for Command
+          id: command
+          uses: xt0rted/slash-command-action@v2
+          with:
+              command: bench
+              reaction-type: "rocket"
+              repo-token: ${{ env.GH_TOKEN }}
+
+        - uses: xt0rted/pull-request-comment-branch@v2
+          id: comment-branch
+          with:
+            repo_token: ${{ env.GH_TOKEN }}
+
+        - uses: actions/checkout@v3
+          if: success()
+          with:
+            fetch-depth: 0 # fetch full history to be able to get main commit sha
+            ref: ${{ steps.comment-branch.outputs.head_ref }}
+
+        - uses: actions-rs/toolchain@v1
+          with:
+            profile: minimal
+            toolchain: stable
+            override: true
+
+        - name: Run benchmarks on PR ${{ github.event.issue.id }}
+          run: |
+            cargo xtask bench --api-key "${{ secrets.BENCHMARK_API_KEY }}" --dashboard-url "${{ vars.BENCHMARK_DASHBOARD_URL }}" --reason "[Comment](${{ github.event.comment.url }}) on [#${{github.event.issue.id}}](${{ github.event.issue.url }})" -- ${{ steps.command.outputs.command-arguments }}
--- a/.github/workflows/bench-push-indexing.yml
+++ b/.github/workflows/bench-push-indexing.yml
@@ -0,0 +1,25 @@
+name: Indexing bench (push)
+
+on:
+    push:
+        branches:
+            - main
+
+jobs:
+    benchmarks:
+        name: Run and upload benchmarks
+        runs-on: benchmarks
+        timeout-minutes: 180 # 3h
+        steps:
+          - uses: actions/checkout@v3
+          - uses: actions-rs/toolchain@v1
+            with:
+              profile: minimal
+              toolchain: stable
+              override: true
+
+          # Run benchmarks
+          - name: Run benchmarks - Dataset ${BENCH_NAME} - Branch main - Commit ${{ github.sha }}
+            run: |
+              cargo xtask bench --api-key "${{ secrets.BENCHMARK_API_KEY }}" --dashboard-url "${{ vars.BENCHMARK_DASHBOARD_URL }}" --reason "Push on `main` [Run #${{ github.run_id }}](https://github.com/meilisearch/meilisearch/actions/runs/${{ github.run_id }})" -- workloads/*.json
+
--- a/.github/workflows/test-suite.yml
+++ b/.github/workflows/test-suite.yml
@@ -31,17 +31,10 @@ jobs:
          apt-get update && apt-get install -y curl
          apt-get install build-essential -y
      - name: Setup test with Rust stable
-        if: github.event_name != 'schedule'
        uses: actions-rs/toolchain@v1
        with:
          toolchain: stable
          override: true
-      - name: Setup test with Rust nightly
-        if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
-        uses: actions-rs/toolchain@v1
-        with:
-          toolchain: nightly
-          override: true
      - name: Cache dependencies
        uses: Swatinem/rust-cache@v2.7.1
      - name: Run cargo check without any default features
--- a/.gitignore
+++ b/.gitignore
@@ -9,6 +9,8 @@
 /data.ms
 /snapshots
 /dumps
+/bench
+/_xtask_benchmark.ms

 # Snapshots
 ## ... large
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -356,9 +356,9 @@ dependencies = [

 [[package]]
 name = "anyhow"
-version = "1.0.79"
+version = "1.0.80"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "080e9890a082662b09c1ad45f567faeeb47f22b5fb23895fbe1e651e718e25ca"
+checksum = "5ad32ce52e4161730f7098c077cd2ed6229b5804ccf99e5366be1ab72a98b4e1"
 dependencies = [
 "backtrace",
 ]
@@ -440,6 +440,12 @@ dependencies = [
 "syn 2.0.48",
 ]

+[[package]]
+name = "atomic"
+version = "0.5.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c59bdb34bc650a32731b31bd8f0829cc15d24a708ee31559e0bb34f2bc320cba"
+
 [[package]]
 name = "atomic-polyfill"
 version = "0.1.11"
@@ -490,7 +496,7 @@ checksum = "8c3c1a368f70d6cf7302d78f8f7093da241fb8e8807c05cc9e51a125895a6d5b"

 [[package]]
 name = "benchmarks"
-version = "1.7.0"
+version = "1.7.1"
 dependencies = [
 "anyhow",
 "bytes",
@@ -622,6 +628,15 @@ dependencies = [
 "serde",
 ]

+[[package]]
+name = "build-info"
+version = "1.7.1"
+dependencies = [
+ "anyhow",
+ "time",
+ "vergen-git2",
+]
+
 [[package]]
 name = "bumpalo"
 version = "3.13.0"
@@ -1342,7 +1357,16 @@ version = "0.12.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "8d67778784b508018359cbc8696edb3db78160bab2c2a28ba7f56ef6932997f8"
 dependencies = [
- "derive_builder_macro",
+ "derive_builder_macro 0.12.0",
+]
+
+[[package]]
+name = "derive_builder"
+version = "0.13.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8f59169f400d8087f238c5c0c7db6a28af18681717f3b623227d92f397e938c7"
+dependencies = [
+ "derive_builder_macro 0.13.1",
 ]

 [[package]]
@@ -1357,13 +1381,35 @@ dependencies = [
 "syn 1.0.109",
 ]

+[[package]]
+name = "derive_builder_core"
+version = "0.13.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a4ec317cc3e7ef0928b0ca6e4a634a4d6c001672ae210438cf114a83e56b018d"
+dependencies = [
+ "darling 0.14.4",
+ "proc-macro2",
+ "quote",
+ "syn 1.0.109",
+]
+
 [[package]]
 name = "derive_builder_macro"
 version = "0.12.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ebcda35c7a396850a55ffeac740804b40ffec779b98fffbb1738f4033f0ee79e"
 dependencies = [
- "derive_builder_core",
+ "derive_builder_core 0.12.0",
+ "syn 1.0.109",
+]
+
+[[package]]
+name = "derive_builder_macro"
+version = "0.13.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "870368c3fb35b8031abb378861d4460f573b92238ec2152c927a21f77e3e0127"
+dependencies = [
+ "derive_builder_core 0.13.1",
 "syn 1.0.109",
 ]

@@ -1485,7 +1531,7 @@ dependencies = [

 [[package]]
 name = "dump"
-version = "1.7.0"
+version = "1.7.1"
 dependencies = [
 "anyhow",
 "big_s",
@@ -1723,11 +1769,12 @@ dependencies = [

 [[package]]
 name = "file-store"
-version = "1.7.0"
+version = "1.7.1"
 dependencies = [
 "faux",
 "tempfile",
 "thiserror",
+ "tracing",
 "uuid",
 ]

@@ -1745,7 +1792,7 @@ dependencies = [

 [[package]]
 name = "filter-parser"
-version = "1.7.0"
+version = "1.7.1"
 dependencies = [
 "insta",
 "nom",
@@ -1765,7 +1812,7 @@ dependencies = [

 [[package]]
 name = "flatten-serde-json"
-version = "1.7.0"
+version = "1.7.1"
 dependencies = [
 "criterion",
 "serde_json",
@@ -1883,7 +1930,7 @@ dependencies = [

 [[package]]
 name = "fuzzers"
-version = "1.7.0"
+version = "1.7.1"
 dependencies = [
 "arbitrary",
 "clap",
@@ -2081,11 +2128,11 @@ checksum = "b6c80984affa11d98d1b88b66ac8853f143217b399d3c74116778ff8fdb4ed2e"

 [[package]]
 name = "git2"
-version = "0.16.1"
+version = "0.18.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ccf7f68c2995f392c49fffb4f95ae2c873297830eb25c6bc4c114ce8f4562acc"
+checksum = "1b3ba52851e73b46a4c3df1d89343741112003f0f6f13beb0dfac9e457c3fdcd"
 dependencies = [
- "bitflags 1.3.2",
+ "bitflags 2.4.1",
 "libc",
 "libgit2-sys",
 "log",
@@ -2101,8 +2148,7 @@ checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"
 [[package]]
 name = "grenad"
 version = "0.4.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6a007932af5475ebb5c63bef8812bb1c36f317983bb4ca663e9d6dd58d6a0f8c"
+source = "git+https://github.com/meilisearch/grenad.git?branch=keep-source-index-in-merger#5a7c10fcd689f5967a8979f6b66da1e0939439ff"
 dependencies = [
 "bytemuck",
 "byteorder",
@@ -2375,14 +2421,14 @@ checksum = "206ca75c9c03ba3d4ace2460e57b189f39f43de612c2f85836e65c929701bb2d"

 [[package]]
 name = "index-scheduler"
-version = "1.7.0"
+version = "1.7.1"
 dependencies = [
 "anyhow",
 "big_s",
 "bincode",
 "crossbeam",
 "csv",
- "derive_builder",
+ "derive_builder 0.12.0",
 "dump",
 "enum-iterator",
 "file-store",
@@ -2393,6 +2439,7 @@ dependencies = [
 "meilisearch-types",
 "page_size 0.5.0",
 "puffin",
+ "rayon",
 "roaring",
 "serde",
 "serde_json",
@@ -2498,7 +2545,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0bad00257d07be169d870ab665980b06cdb366d792ad690bf2e76876dc503455"
 dependencies = [
 "hermit-abi",
- "rustix 0.38.26",
+ "rustix 0.38.31",
 "windows-sys 0.52.0",
 ]

@@ -2561,7 +2608,7 @@ dependencies = [

 [[package]]
 name = "json-depth-checker"
-version = "1.7.0"
+version = "1.7.1"
 dependencies = [
 "criterion",
 "serde_json",
@@ -2620,15 +2667,15 @@ dependencies = [

 [[package]]
 name = "libc"
-version = "0.2.150"
+version = "0.2.153"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "89d92a4743f9a61002fae18374ed11e7973f530cb3a3255fb354818118b2203c"
+checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd"

 [[package]]
 name = "libgit2-sys"
-version = "0.14.2+1.5.1"
+version = "0.16.2+1.7.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7f3d95f6b51075fe9810a7ae22c7095f12b98005ab364d8544797a825ce946a4"
+checksum = "ee4126d8b4ee5c9d9ea891dd875cfdc1e9d0950437179104b183d7d8a74d24e8"
 dependencies = [
 "cc",
 "libc",
@@ -2675,9 +2722,9 @@ dependencies = [

 [[package]]
 name = "libz-sys"
-version = "1.1.12"
+version = "1.1.15"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d97137b25e321a73eef1418d1d5d2eda4d77e12813f8e6dead84bc52c5870a7b"
+checksum = "037731f5d3aaa87a5675e895b63ddff1a87624bc29f77004ea829809654e48f6"
 dependencies = [
 "cc",
 "libc",
@@ -3021,28 +3068,6 @@ version = "0.4.20"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f"

-[[package]]
-name = "logging_timer"
-version = "1.1.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "64e96f261d684b7089aa576bb74e823241dccd994b27d30fabf1dcb3af284fe9"
-dependencies = [
- "log",
- "logging_timer_proc_macros",
-]
-
-[[package]]
-name = "logging_timer_proc_macros"
-version = "1.1.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "10a9062912d7952c5588cc474795e0b9ee008e7e6781127945b85413d4b99d81"
-dependencies = [
- "log",
- "proc-macro2",
- "quote",
- "syn 1.0.109",
-]
-
 [[package]]
 name = "lz4_flex"
 version = "0.10.0"
@@ -3091,7 +3116,7 @@ checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771"

 [[package]]
 name = "meili-snap"
-version = "1.7.0"
+version = "1.7.1"
 dependencies = [
 "insta",
 "md5",
@@ -3100,7 +3125,7 @@ dependencies = [

 [[package]]
 name = "meilisearch"
-version = "1.7.0"
+version = "1.7.1"
 dependencies = [
 "actix-cors",
 "actix-http",
@@ -3114,6 +3139,7 @@ dependencies = [
 "async-trait",
 "brotli",
 "bstr",
+ "build-info",
 "byte-unit",
 "bytes",
 "cargo_toml",
@@ -3185,7 +3211,6 @@ dependencies = [
 "url",
 "urlencoding",
 "uuid",
- "vergen",
 "walkdir",
 "yaup",
 "zip",
@@ -3193,7 +3218,7 @@ dependencies = [

 [[package]]
 name = "meilisearch-auth"
-version = "1.7.0"
+version = "1.7.1"
 dependencies = [
 "base64 0.21.7",
 "enum-iterator",
@@ -3212,7 +3237,7 @@ dependencies = [

 [[package]]
 name = "meilisearch-types"
-version = "1.7.0"
+version = "1.7.1"
 dependencies = [
 "actix-web",
 "anyhow",
@@ -3242,7 +3267,7 @@ dependencies = [

 [[package]]
 name = "meilitool"
-version = "1.7.0"
+version = "1.7.1"
 dependencies = [
 "anyhow",
 "clap",
@@ -3281,7 +3306,7 @@ dependencies = [

 [[package]]
 name = "milli"
-version = "1.7.0"
+version = "1.7.1"
 dependencies = [
 "arroy",
 "big_s",
@@ -3314,7 +3339,6 @@ dependencies = [
 "json-depth-checker",
 "levenshtein_automata",
 "liquid",
- "logging_timer",
 "maplit",
 "md5",
 "meili-snap",
@@ -3486,6 +3510,12 @@ dependencies = [
 "num-traits",
 ]

+[[package]]
+name = "num-conv"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9"
+
 [[package]]
 name = "num-integer"
 version = "0.1.45"
@@ -3516,6 +3546,15 @@ dependencies = [
 "libc",
 ]

+[[package]]
+name = "num_threads"
+version = "0.1.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5c7398b9c8b70908f6371f47ed36737907c87c52af34c268fed0bf0ceb92ead9"
+dependencies = [
+ "libc",
+]
+
 [[package]]
 name = "number_prefix"
 version = "0.4.0"
@@ -3708,7 +3747,7 @@ checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e"

 [[package]]
 name = "permissive-json-pointer"
-version = "1.7.0"
+version = "1.7.1"
 dependencies = [
 "big_s",
 "serde_json",
@@ -4077,9 +4116,9 @@ dependencies = [

 [[package]]
 name = "rayon"
-version = "1.8.0"
+version = "1.8.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9c27db03db7734835b3f53954b534c91069375ce6ccaa2e065441e07d9b6cdb1"
+checksum = "fa7237101a77a10773db45d62004a272517633fbcc3df19d96455ede1122e051"
 dependencies = [
 "either",
 "rayon-core",
@@ -4098,9 +4137,9 @@ dependencies = [

 [[package]]
 name = "rayon-core"
-version = "1.12.0"
+version = "1.12.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5ce3fb6ad83f861aac485e76e1985cd109d9a3713802152be56c3b1f0e0658ed"
+checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2"
 dependencies = [
 "crossbeam-deque",
 "crossbeam-utils",
@@ -4130,15 +4169,6 @@ dependencies = [
 "bitflags 1.3.2",
 ]

-[[package]]
-name = "redox_syscall"
-version = "0.4.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4722d768eff46b75989dd134e5c353f0d6296e5aaa3132e776cbdb56be7731aa"
-dependencies = [
- "bitflags 1.3.2",
-]
-
 [[package]]
 name = "redox_users"
 version = "0.4.3"
@@ -4216,10 +4246,12 @@ dependencies = [
 "system-configuration",
 "tokio",
 "tokio-rustls 0.24.1",
+ "tokio-util",
 "tower-service",
 "url",
 "wasm-bindgen",
 "wasm-bindgen-futures",
+ "wasm-streams",
 "web-sys",
 "webpki-roots 0.25.3",
 "winreg",
@@ -4327,9 +4359,9 @@ dependencies = [

 [[package]]
 name = "rustix"
-version = "0.38.26"
+version = "0.38.31"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9470c4bf8246c8daf25f9598dca807fb6510347b1e1cfa55749113850c79d88a"
+checksum = "6ea3e1a662af26cd7a3ba09c0297a31af215563ecf42817c98df621387f4e949"
 dependencies = [
 "bitflags 2.4.1",
 "errno",
@@ -4865,14 +4897,13 @@ dependencies = [

 [[package]]
 name = "tempfile"
-version = "3.9.0"
+version = "3.10.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "01ce4141aa927a6d1bd34a041795abd0db1cccba5d5f24b009f694bdf3a1f3fa"
+checksum = "85b77fafb263dd9d05cbeac119526425676db3784113aa9295c88498cbf8bff1"
 dependencies = [
 "cfg-if",
 "fastrand",
- "redox_syscall 0.4.1",
- "rustix 0.38.26",
+ "rustix 0.38.31",
 "windows-sys 0.52.0",
 ]

@@ -4932,12 +4963,15 @@ dependencies = [

 [[package]]
 name = "time"
-version = "0.3.31"
+version = "0.3.34"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f657ba42c3f86e7680e53c8cd3af8abbe56b5491790b46e22e19c0d57463583e"
+checksum = "c8248b6521bb14bc45b4067159b9b6ad792e2d6d754d6c41fb50e29fefe38749"
 dependencies = [
 "deranged",
 "itoa",
+ "libc",
+ "num-conv",
+ "num_threads",
 "powerfmt",
 "serde",
 "time-core",
@@ -4952,10 +4986,11 @@ checksum = "ef927ca75afb808a4d64dd374f00a2adf8d0fcff8e7b184af886c3c87ec4a3f3"

 [[package]]
 name = "time-macros"
-version = "0.2.16"
+version = "0.2.17"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "26197e33420244aeb70c3e8c78376ca46571bc4e701e4791c2cd9f57dcb3a43f"
+checksum = "7ba3a3ef41e6672a2f0f001392bb5dcd3ff0a9992d618ca761a11c3121547774"
 dependencies = [
+ "num-conv",
 "time-core",
 ]

@@ -4990,7 +5025,7 @@ version = "0.14.1"
 source = "git+https://github.com/huggingface/tokenizers.git?tag=v0.14.1#6357206cdcce4d78ffb1e0372feb456caea09375"
 dependencies = [
 "aho-corasick",
- "derive_builder",
+ "derive_builder 0.12.0",
 "esaxx-rs",
 "getrandom",
 "itertools 0.11.0",
@@ -5393,10 +5428,11 @@ checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a"

 [[package]]
 name = "uuid"
-version = "1.6.1"
+version = "1.7.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5e395fcf16a7a3d8127ec99782007af141946b4795001f876d54fb0d55978560"
+checksum = "f00cc9702ca12d3c81455259621e676d0f7251cec66a21e98fe2e9a37db93b2a"
 dependencies = [
+ "atomic",
 "getrandom",
 "serde",
 ]
@@ -5415,18 +5451,42 @@ checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426"

 [[package]]
 name = "vergen"
-version = "7.5.1"
+version = "9.0.0-beta.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f21b881cd6636ece9735721cf03c1fe1e774fe258683d084bb2812ab67435749"
+checksum = "107dc53b443fe8cc380798abb75ad6b7038281165109afea1f1b28bb47047ed5"
 dependencies = [
 "anyhow",
- "cfg-if",
- "enum-iterator",
+ "derive_builder 0.13.1",
 "getset",
+ "rustversion",
+ "vergen-lib",
+]
+
+[[package]]
+name = "vergen-git2"
+version = "1.0.0-beta.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8875c5d71074bb67118774e3d795ab6fe77c3ae3161cb54e19104cabc49487f1"
+dependencies = [
+ "anyhow",
+ "derive_builder 0.13.1",
 "git2",
 "rustversion",
- "thiserror",
 "time",
+ "vergen",
+ "vergen-lib",
+]
+
+[[package]]
+name = "vergen-lib"
+version = "0.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "26ebfba72ba904559f25f41ea1512335b5a46459084258cea0857549d9645187"
+dependencies = [
+ "anyhow",
+ "derive_builder 0.13.1",
+ "getset",
+ "rustversion",
 ]

 [[package]]
@@ -5537,6 +5597,19 @@ version = "0.2.87"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ca6ad05a4870b2bf5fe995117d3728437bd27d7cd5f06f13c17443ef369775a1"

+[[package]]
+name = "wasm-streams"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b4609d447824375f43e1ffbc051b50ad8f4b3ae8219680c94452ea05eb240ac7"
+dependencies = [
+ "futures-util",
+ "js-sys",
+ "wasm-bindgen",
+ "wasm-bindgen-futures",
+ "web-sys",
+]
+
 [[package]]
 name = "wav"
 version = "1.0.0"
@@ -5841,9 +5914,9 @@ checksum = "dff9641d1cd4be8d1a070daf9e3773c5f67e78b4d9d42263020c057706765c04"

 [[package]]
 name = "winnow"
-version = "0.5.4"
+version = "0.5.40"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "acaaa1190073b2b101e15083c38ee8ec891b5e05cbee516521e94ec008f61e64"
+checksum = "f593a95398737aeed53e489c785df13f3618e41dbcd6718c6addbf1395aa6876"
 dependencies = [
 "memchr",
 ]
@@ -5869,10 +5942,25 @@ dependencies = [

 [[package]]
 name = "xtask"
-version = "1.7.0"
+version = "1.7.1"
 dependencies = [
+ "anyhow",
+ "build-info",
 "cargo_metadata",
 "clap",
+ "futures-core",
+ "futures-util",
+ "reqwest",
+ "serde",
+ "serde_json",
+ "sha2",
+ "sysinfo",
+ "time",
+ "tokio",
+ "tracing",
+ "tracing-subscriber",
+ "tracing-trace",
+ "uuid",
 ]

 [[package]]
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -17,11 +17,11 @@ members = [
    "benchmarks",
    "fuzzers",
    "tracing-trace",
-    "xtask",
+    "xtask", "build-info",
 ]

 [workspace.package]
-version = "1.7.0"
+version = "1.7.1"
 authors = [
    "Quentin de Quelen <quentin@dequelen.me>",
    "Clément Renault <clement@meilisearch.com>",
--- a/2
+++ b/2
@@ -8,7 +8,7 @@ WORKDIR /
 ARG     COMMIT_SHA
 ARG     COMMIT_DATE
 ARG     GIT_TAG
-ENV     VERGEN_GIT_SHA=${COMMIT_SHA} VERGEN_GIT_COMMIT_TIMESTAMP=${COMMIT_DATE} VERGEN_GIT_SEMVER_LIGHTWEIGHT=${GIT_TAG}
+ENV     VERGEN_GIT_SHA=${COMMIT_SHA} VERGEN_GIT_COMMIT_TIMESTAMP=${COMMIT_DATE} VERGEN_GIT_DESCRIBE=${GIT_TAG}
 ENV     RUSTFLAGS="-C target-feature=-crt-static"

 COPY    . .
--- a/build-info/Cargo.toml
+++ b/build-info/Cargo.toml
@@ -0,0 +1,18 @@
+[package]
+name = "build-info"
+version.workspace = true
+authors.workspace = true
+description.workspace = true
+homepage.workspace = true
+readme.workspace = true
+edition.workspace = true
+license.workspace = true
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
+time = { version = "0.3.34", features = ["parsing"] }
+
+[build-dependencies]
+anyhow = "1.0.80"
+vergen-git2 = "1.0.0-beta.2"
--- a/build-info/build.rs
+++ b/build-info/build.rs
@@ -0,0 +1,22 @@
+fn main() {
+    if let Err(err) = emit_git_variables() {
+        println!("cargo:warning=vergen: {}", err);
+    }
+}
+
+fn emit_git_variables() -> anyhow::Result<()> {
+    // Note: any code that needs VERGEN_ environment variables should take care to define them manually in the Dockerfile and pass them
+    // in the corresponding GitHub workflow (publish_docker.yml).
+    // This is due to the Dockerfile building the binary outside of the git directory.
+    let mut builder = vergen_git2::Git2Builder::default();
+
+    builder.branch(true);
+    builder.commit_timestamp(true);
+    builder.commit_message(true);
+    builder.describe(true, true, None);
+    builder.sha(false);
+
+    let git2 = builder.build()?;
+
+    vergen_git2::Emitter::default().fail_on_error().add_instructions(&git2)?.emit()
+}
--- a/build-info/src/lib.rs
+++ b/build-info/src/lib.rs
@@ -0,0 +1,203 @@
+use time::format_description::well_known::Iso8601;
+
+#[derive(Debug, Clone)]
+pub struct BuildInfo {
+    pub branch: Option<&'static str>,
+    pub describe: Option<DescribeResult>,
+    pub commit_sha1: Option<&'static str>,
+    pub commit_msg: Option<&'static str>,
+    pub commit_timestamp: Option<time::OffsetDateTime>,
+}
+
+impl BuildInfo {
+    pub fn from_build() -> Self {
+        let branch: Option<&'static str> = option_env!("VERGEN_GIT_BRANCH");
+        let describe = DescribeResult::from_build();
+        let commit_sha1 = option_env!("VERGEN_GIT_SHA");
+        let commit_msg = option_env!("VERGEN_GIT_COMMIT_MESSAGE");
+        let commit_timestamp = option_env!("VERGEN_GIT_COMMIT_TIMESTAMP");
+
+        let commit_timestamp = commit_timestamp.and_then(|commit_timestamp| {
+            time::OffsetDateTime::parse(commit_timestamp, &Iso8601::DEFAULT).ok()
+        });
+
+        Self { branch, describe, commit_sha1, commit_msg, commit_timestamp }
+    }
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
+pub enum DescribeResult {
+    Prototype { name: &'static str },
+    Release { version: &'static str, major: u64, minor: u64, patch: u64 },
+    Prerelease { version: &'static str, major: u64, minor: u64, patch: u64, rc: u64 },
+    NotATag { describe: &'static str },
+}
+
+impl DescribeResult {
+    pub fn new(describe: &'static str) -> Self {
+        if let Some(name) = prototype_name(describe) {
+            Self::Prototype { name }
+        } else if let Some(release) = release_version(describe) {
+            release
+        } else if let Some(prerelease) = prerelease_version(describe) {
+            prerelease
+        } else {
+            Self::NotATag { describe }
+        }
+    }
+
+    pub fn from_build() -> Option<Self> {
+        let describe: &'static str = option_env!("VERGEN_GIT_DESCRIBE")?;
+        Some(Self::new(describe))
+    }
+
+    pub fn as_tag(&self) -> Option<&'static str> {
+        match self {
+            DescribeResult::Prototype { name } => Some(name),
+            DescribeResult::Release { version, .. } => Some(version),
+            DescribeResult::Prerelease { version, .. } => Some(version),
+            DescribeResult::NotATag { describe: _ } => None,
+        }
+    }
+
+    pub fn as_prototype(&self) -> Option<&'static str> {
+        match self {
+            DescribeResult::Prototype { name } => Some(name),
+            DescribeResult::Release { .. }
+            | DescribeResult::Prerelease { .. }
+            | DescribeResult::NotATag { .. } => None,
+        }
+    }
+}
+
+/// Parses the input as a prototype name.
+///
+/// Returns `Some(prototype_name)` if the following conditions are met on this value:
+///
+/// 1. starts with `prototype-`,
+/// 2. ends with `-<some_number>`,
+/// 3. does not end with `<some_number>-<some_number>`.
+///
+/// Otherwise, returns `None`.
+fn prototype_name(describe: &'static str) -> Option<&'static str> {
+    if !describe.starts_with("prototype-") {
+        return None;
+    }
+
+    let mut rsplit_prototype = describe.rsplit('-');
+    // last component MUST be a number
+    rsplit_prototype.next()?.parse::<u64>().ok()?;
+    // before than last component SHALL NOT be a number
+    rsplit_prototype.next()?.parse::<u64>().err()?;
+
+    Some(describe)
+}
+
+fn release_version(describe: &'static str) -> Option<DescribeResult> {
+    if !describe.starts_with('v') {
+        return None;
+    }
+
+    // full release version don't contain a `-`
+    if describe.contains('-') {
+        return None;
+    }
+
+    // full release version parse as vX.Y.Z, with X, Y, Z numbers.
+    let mut dots = describe[1..].split('.');
+    let major: u64 = dots.next()?.parse().ok()?;
+    let minor: u64 = dots.next()?.parse().ok()?;
+    let patch: u64 = dots.next()?.parse().ok()?;
+
+    if dots.next().is_some() {
+        return None;
+    }
+
+    Some(DescribeResult::Release { version: describe, major, minor, patch })
+}
+
+fn prerelease_version(describe: &'static str) -> Option<DescribeResult> {
+    // prerelease version is in the shape vM.N.P-rc.C
+    let mut hyphen = describe.rsplit('-');
+    let prerelease = hyphen.next()?;
+    if !prerelease.starts_with("rc.") {
+        return None;
+    }
+
+    let rc: u64 = prerelease[3..].parse().ok()?;
+
+    let release = hyphen.next()?;
+
+    let DescribeResult::Release { version: _, major, minor, patch } = release_version(release)?
+    else {
+        return None;
+    };
+
+    Some(DescribeResult::Prerelease { version: describe, major, minor, patch, rc })
+}
+
+#[cfg(test)]
+mod test {
+    use super::DescribeResult;
+
+    fn assert_not_a_tag(describe: &'static str) {
+        assert_eq!(DescribeResult::NotATag { describe }, DescribeResult::new(describe))
+    }
+
+    fn assert_proto(describe: &'static str) {
+        assert_eq!(DescribeResult::Prototype { name: describe }, DescribeResult::new(describe))
+    }
+
+    fn assert_release(describe: &'static str, major: u64, minor: u64, patch: u64) {
+        assert_eq!(
+            DescribeResult::Release { version: describe, major, minor, patch },
+            DescribeResult::new(describe)
+        )
+    }
+
+    fn assert_prerelease(describe: &'static str, major: u64, minor: u64, patch: u64, rc: u64) {
+        assert_eq!(
+            DescribeResult::Prerelease { version: describe, major, minor, patch, rc },
+            DescribeResult::new(describe)
+        )
+    }
+
+    #[test]
+    fn not_a_tag() {
+        assert_not_a_tag("whatever-fuzzy");
+        assert_not_a_tag("whatever-fuzzy-5-ggg-dirty");
+        assert_not_a_tag("whatever-fuzzy-120-ggg-dirty");
+
+        // technically a tag, but not a proto nor a version, so not parsed as a tag
+        assert_not_a_tag("whatever");
+
+        // dirty version
+        assert_not_a_tag("v1.7.0-1-ggga-dirty");
+        assert_not_a_tag("v1.7.0-rc.1-1-ggga-dirty");
+
+        // after version
+        assert_not_a_tag("v1.7.0-1-ggga");
+        assert_not_a_tag("v1.7.0-rc.1-1-ggga");
+
+        // after proto
+        assert_not_a_tag("protoype-tag-0-1-ggga");
+        assert_not_a_tag("protoype-tag-0-1-ggga-dirty");
+    }
+
+    #[test]
+    fn prototype() {
+        assert_proto("prototype-tag-0");
+        assert_proto("prototype-tag-10");
+        assert_proto("prototype-long-name-tag-10");
+    }
+
+    #[test]
+    fn release() {
+        assert_release("v1.7.2", 1, 7, 2);
+    }
+
+    #[test]
+    fn prerelease() {
+        assert_prerelease("v1.7.2-rc.3", 1, 7, 2, 3);
+    }
+}
--- a/dump/src/reader/compat/v2_to_v3.rs
+++ b/dump/src/reader/compat/v2_to_v3.rs
@@ -1,4 +1,3 @@
-use std::convert::TryInto;
 use std::str::FromStr;

 use time::OffsetDateTime;
--- a/file-store/Cargo.toml
+++ b/file-store/Cargo.toml
@@ -13,6 +13,7 @@ license.workspace = true
 [dependencies]
 tempfile = "3.9.0"
 thiserror = "1.0.56"
+tracing = "0.1.40"
 uuid = { version = "1.6.1", features = ["serde", "v4"] }

 [dev-dependencies]
--- a/file-store/src/lib.rs
+++ b/file-store/src/lib.rs
@@ -1,5 +1,5 @@
 use std::fs::File as StdFile;
-use std::ops::{Deref, DerefMut};
+use std::io::Write;
 use std::path::{Path, PathBuf};
 use std::str::FromStr;

@@ -22,20 +22,6 @@ pub enum Error {

 pub type Result<T> = std::result::Result<T, Error>;

-impl Deref for File {
-    type Target = NamedTempFile;
-
-    fn deref(&self) -> &Self::Target {
-        &self.file
-    }
-}
-
-impl DerefMut for File {
-    fn deref_mut(&mut self) -> &mut Self::Target {
-        &mut self.file
-    }
-}
-
 #[derive(Clone, Debug)]
 pub struct FileStore {
    path: PathBuf,
@@ -56,7 +42,7 @@ impl FileStore {
        let file = NamedTempFile::new_in(&self.path)?;
        let uuid = Uuid::new_v4();
        let path = self.path.join(uuid.to_string());
-        let update_file = File { file, path };
+        let update_file = File { file: Some(file), path };

        Ok((uuid, update_file))
    }
@@ -67,7 +53,7 @@ impl FileStore {
        let file = NamedTempFile::new_in(&self.path)?;
        let uuid = Uuid::from_u128(uuid);
        let path = self.path.join(uuid.to_string());
-        let update_file = File { file, path };
+        let update_file = File { file: Some(file), path };

        Ok((uuid, update_file))
    }
@@ -75,7 +61,13 @@ impl FileStore {
    /// Returns the file corresponding to the requested uuid.
    pub fn get_update(&self, uuid: Uuid) -> Result<StdFile> {
        let path = self.get_update_path(uuid);
-        let file = StdFile::open(path)?;
+        let file = match StdFile::open(path) {
+            Ok(file) => file,
+            Err(e) => {
+                tracing::error!("Can't access update file {uuid}: {e}");
+                return Err(e.into());
+            }
+        };
        Ok(file)
    }

@@ -110,8 +102,12 @@ impl FileStore {

    pub fn delete(&self, uuid: Uuid) -> Result<()> {
        let path = self.path.join(uuid.to_string());
-        std::fs::remove_file(path)?;
-        Ok(())
+        if let Err(e) = std::fs::remove_file(path) {
+            tracing::error!("Can't delete file {uuid}: {e}");
+            Err(e.into())
+        } else {
+            Ok(())
+        }
    }

    /// List the Uuids of the files in the FileStore
@@ -136,16 +132,40 @@ impl FileStore {

 pub struct File {
    path: PathBuf,
-    file: NamedTempFile,
+    file: Option<NamedTempFile>,
 }

 impl File {
+    pub fn dry_file() -> Result<Self> {
+        Ok(Self { path: PathBuf::new(), file: None })
+    }
+
    pub fn persist(self) -> Result<()> {
-        self.file.persist(&self.path)?;
+        if let Some(file) = self.file {
+            file.persist(&self.path)?;
+        }
        Ok(())
    }
 }

+impl Write for File {
+    fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
+        if let Some(file) = self.file.as_mut() {
+            file.write(buf)
+        } else {
+            Ok(buf.len())
+        }
+    }
+
+    fn flush(&mut self) -> std::io::Result<()> {
+        if let Some(file) = self.file.as_mut() {
+            file.flush()
+        } else {
+            Ok(())
+        }
+    }
+}
+
 #[cfg(test)]
 mod test {
    use std::io::Write;
--- a/index-scheduler/Cargo.toml
+++ b/index-scheduler/Cargo.toml
@@ -23,6 +23,7 @@ meilisearch-auth = { path = "../meilisearch-auth" }
 meilisearch-types = { path = "../meilisearch-types" }
 page_size = "0.5.0"
 puffin = { version = "0.16.0", features = ["serialization"] }
+rayon = "1.8.1"
 roaring = { version = "0.10.2", features = ["serde"] }
 serde = { version = "1.0.195", features = ["derive"] }
 serde_json = { version = "1.0.111", features = ["preserve_order"] }
--- a/index-scheduler/src/batch.rs
+++ b/index-scheduler/src/batch.rs
@@ -142,22 +142,28 @@ pub(crate) enum IndexOperation {

 impl Batch {
    /// Return the task ids associated with this batch.
-    pub fn ids(&self) -> Vec<TaskId> {
+    pub fn ids(&self) -> RoaringBitmap {
        match self {
            Batch::TaskCancelation { task, .. }
            | Batch::Dump(task)
            | Batch::IndexCreation { task, .. }
-            | Batch::IndexUpdate { task, .. } => vec![task.uid],
+            | Batch::IndexUpdate { task, .. } => {
+                RoaringBitmap::from_sorted_iter(std::iter::once(task.uid)).unwrap()
+            }
            Batch::SnapshotCreation(tasks)
            | Batch::TaskDeletions(tasks)
-            | Batch::IndexDeletion { tasks, .. } => tasks.iter().map(|task| task.uid).collect(),
+            | Batch::IndexDeletion { tasks, .. } => {
+                RoaringBitmap::from_iter(tasks.iter().map(|task| task.uid))
+            }
            Batch::IndexOperation { op, .. } => match op {
                IndexOperation::DocumentOperation { tasks, .. }
                | IndexOperation::Settings { tasks, .. }
                | IndexOperation::DocumentClear { tasks, .. } => {
-                    tasks.iter().map(|task| task.uid).collect()
+                    RoaringBitmap::from_iter(tasks.iter().map(|task| task.uid))
+                }
+                IndexOperation::IndexDocumentDeletionByFilter { task, .. } => {
+                    RoaringBitmap::from_sorted_iter(std::iter::once(task.uid)).unwrap()
                }
-                IndexOperation::IndexDocumentDeletionByFilter { task, .. } => vec![task.uid],
                IndexOperation::SettingsAndDocumentOperation {
                    document_import_tasks: tasks,
                    settings_tasks: other,
@@ -167,9 +173,11 @@ impl Batch {
                    cleared_tasks: tasks,
                    settings_tasks: other,
                    ..
-                } => tasks.iter().chain(other).map(|task| task.uid).collect(),
+                } => RoaringBitmap::from_iter(tasks.iter().chain(other).map(|task| task.uid)),
            },
-            Batch::IndexSwap { task } => vec![task.uid],
+            Batch::IndexSwap { task } => {
+                RoaringBitmap::from_sorted_iter(std::iter::once(task.uid)).unwrap()
+            }
        }
    }

@@ -953,7 +961,22 @@ impl IndexScheduler {
                    .set_currently_updating_index(Some((index_uid.clone(), index.clone())));

                let mut index_wtxn = index.write_txn()?;
-                let tasks = self.apply_index_operation(&mut index_wtxn, &index, op)?;
+
+                let mut tasks = self.apply_index_operation(&mut index_wtxn, &index, op)?;
+
+                if index.is_corrupted(&index_wtxn)? {
+                    tracing::error!("Aborting task due to corrupted index");
+                    index_wtxn.abort();
+                    for task in tasks.iter_mut() {
+                        task.status = Status::Failed;
+                        task.error = Some(Error::CorruptedIndex.into());
+                    }
+
+                    return Ok(tasks);
+                }
+
+                index.check_document_facet_consistency(&index_wtxn)?.check();
+
                index_wtxn.commit()?;

                // if the update processed successfully, we're going to store the new
@@ -1331,6 +1354,7 @@ impl IndexScheduler {
                    } else {
                        unreachable!()
                    };
+
                let deleted_documents = delete_document_by_filter(
                    index_wtxn,
                    filter,
--- a/index-scheduler/src/error.rs
+++ b/index-scheduler/src/error.rs
@@ -48,6 +48,8 @@ impl From<DateField> for Code {
 pub enum Error {
    #[error("{1}")]
    WithCustomErrorCode(Code, Box<Self>),
+    #[error("Received bad task id: {received} should be >= to {expected}.")]
+    BadTaskId { received: TaskId, expected: TaskId },
    #[error("Index `{0}` not found.")]
    IndexNotFound(String),
    #[error("Index `{0}` already exists.")]
@@ -136,6 +138,8 @@ pub enum Error {
    CreateBatch(Box<Self>),
    #[error("Corrupted task queue.")]
    CorruptedTaskQueue,
+    #[error("Corrupted index.")]
+    CorruptedIndex,
    #[error(transparent)]
    TaskDatabaseUpdate(Box<Self>),
    #[error(transparent)]
@@ -161,6 +165,7 @@ impl Error {
        match self {
            Error::IndexNotFound(_)
            | Error::WithCustomErrorCode(_, _)
+            | Error::BadTaskId { .. }
            | Error::IndexAlreadyExists(_)
            | Error::SwapDuplicateIndexFound(_)
            | Error::SwapDuplicateIndexesFound(_)
@@ -189,6 +194,7 @@ impl Error {
            | Error::Anyhow(_) => true,
            Error::CreateBatch(_)
            | Error::CorruptedTaskQueue
+            | Error::CorruptedIndex
            | Error::TaskDatabaseUpdate(_)
            | Error::HeedTransaction(_) => false,
            #[cfg(test)]
@@ -205,6 +211,7 @@ impl ErrorCode for Error {
    fn error_code(&self) -> Code {
        match self {
            Error::WithCustomErrorCode(code, _) => *code,
+            Error::BadTaskId { .. } => Code::BadRequest,
            Error::IndexNotFound(_) => Code::IndexNotFound,
            Error::IndexAlreadyExists(_) => Code::IndexAlreadyExists,
            Error::SwapDuplicateIndexesFound(_) => Code::InvalidSwapDuplicateIndexFound,
@@ -238,6 +245,7 @@ impl ErrorCode for Error {
            Error::CorruptedDump => Code::Internal,
            Error::TaskDatabaseUpdate(_) => Code::Internal,
            Error::CreateBatch(_) => Code::Internal,
+            Error::CorruptedIndex => Code::Internal,

            // This one should never be seen by the end user
            Error::AbortedTask => Code::Internal,
--- a/index-scheduler/src/insta_snapshot.rs
+++ b/index-scheduler/src/insta_snapshot.rs
@@ -15,6 +15,7 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {

    let IndexScheduler {
        autobatching_enabled,
+        cleanup_enabled: _,
        must_stop_processing: _,
        processing_tasks,
        file_store,
--- a/index-scheduler/src/lib.rs
+++ b/index-scheduler/src/lib.rs
--- a/index-scheduler/src/snapshots/lib.rs/test_disable_auto_deletion_of_tasks/task_deletion_have_not_been_enqueued.snap
+++ b/index-scheduler/src/snapshots/lib.rs/test_disable_auto_deletion_of_tasks/task_deletion_have_not_been_enqueued.snap
@@ -0,0 +1,90 @@
+---
+source: index-scheduler/src/lib.rs
+---
+[
+  {
+    "uid": 0,
+    "enqueuedAt": "[date]",
+    "startedAt": "[date]",
+    "finishedAt": "[date]",
+    "error": null,
+    "canceledBy": null,
+    "details": {
+      "IndexInfo": {
+        "primary_key": null
+      }
+    },
+    "status": "succeeded",
+    "kind": {
+      "indexCreation": {
+        "index_uid": "doggo",
+        "primary_key": null
+      }
+    }
+  },
+  {
+    "uid": 1,
+    "enqueuedAt": "[date]",
+    "startedAt": "[date]",
+    "finishedAt": "[date]",
+    "error": {
+      "message": "Index `doggo` already exists.",
+      "code": "index_already_exists",
+      "type": "invalid_request",
+      "link": "https://docs.meilisearch.com/errors#index_already_exists"
+    },
+    "canceledBy": null,
+    "details": {
+      "IndexInfo": {
+        "primary_key": null
+      }
+    },
+    "status": "failed",
+    "kind": {
+      "indexCreation": {
+        "index_uid": "doggo",
+        "primary_key": null
+      }
+    }
+  },
+  {
+    "uid": 2,
+    "enqueuedAt": "[date]",
+    "startedAt": "[date]",
+    "finishedAt": "[date]",
+    "error": null,
+    "canceledBy": null,
+    "details": {
+      "IndexInfo": {
+        "primary_key": null
+      }
+    },
+    "status": "enqueued",
+    "kind": {
+      "indexCreation": {
+        "index_uid": "doggo",
+        "primary_key": null
+      }
+    }
+  },
+  {
+    "uid": 3,
+    "enqueuedAt": "[date]",
+    "startedAt": "[date]",
+    "finishedAt": "[date]",
+    "error": null,
+    "canceledBy": null,
+    "details": {
+      "IndexInfo": {
+        "primary_key": null
+      }
+    },
+    "status": "enqueued",
+    "kind": {
+      "indexCreation": {
+        "index_uid": "doggo",
+        "primary_key": null
+      }
+    }
+  }
+]
--- a/index-scheduler/src/snapshots/lib.rs/test_disable_auto_deletion_of_tasks/task_queue_is_full.snap
+++ b/index-scheduler/src/snapshots/lib.rs/test_disable_auto_deletion_of_tasks/task_queue_is_full.snap
@@ -0,0 +1,90 @@
+---
+source: index-scheduler/src/lib.rs
+---
+[
+  {
+    "uid": 0,
+    "enqueuedAt": "[date]",
+    "startedAt": "[date]",
+    "finishedAt": "[date]",
+    "error": null,
+    "canceledBy": null,
+    "details": {
+      "IndexInfo": {
+        "primary_key": null
+      }
+    },
+    "status": "succeeded",
+    "kind": {
+      "indexCreation": {
+        "index_uid": "doggo",
+        "primary_key": null
+      }
+    }
+  },
+  {
+    "uid": 1,
+    "enqueuedAt": "[date]",
+    "startedAt": "[date]",
+    "finishedAt": "[date]",
+    "error": {
+      "message": "Index `doggo` already exists.",
+      "code": "index_already_exists",
+      "type": "invalid_request",
+      "link": "https://docs.meilisearch.com/errors#index_already_exists"
+    },
+    "canceledBy": null,
+    "details": {
+      "IndexInfo": {
+        "primary_key": null
+      }
+    },
+    "status": "failed",
+    "kind": {
+      "indexCreation": {
+        "index_uid": "doggo",
+        "primary_key": null
+      }
+    }
+  },
+  {
+    "uid": 2,
+    "enqueuedAt": "[date]",
+    "startedAt": "[date]",
+    "finishedAt": "[date]",
+    "error": null,
+    "canceledBy": null,
+    "details": {
+      "IndexInfo": {
+        "primary_key": null
+      }
+    },
+    "status": "enqueued",
+    "kind": {
+      "indexCreation": {
+        "index_uid": "doggo",
+        "primary_key": null
+      }
+    }
+  },
+  {
+    "uid": 3,
+    "enqueuedAt": "[date]",
+    "startedAt": "[date]",
+    "finishedAt": "[date]",
+    "error": null,
+    "canceledBy": null,
+    "details": {
+      "IndexInfo": {
+        "primary_key": null
+      }
+    },
+    "status": "enqueued",
+    "kind": {
+      "indexCreation": {
+        "index_uid": "doggo",
+        "primary_key": null
+      }
+    }
+  }
+]
--- a/index-scheduler/src/uuid_codec.rs
+++ b/index-scheduler/src/uuid_codec.rs
@@ -1,5 +1,4 @@
 use std::borrow::Cow;
-use std::convert::TryInto;

 use meilisearch_types::heed::{BoxedError, BytesDecode, BytesEncode};
 use uuid::Uuid;
--- a/meilisearch-auth/src/store.rs
+++ b/meilisearch-auth/src/store.rs
@@ -1,7 +1,6 @@
 use std::borrow::Cow;
 use std::cmp::Reverse;
 use std::collections::HashSet;
-use std::convert::{TryFrom, TryInto};
 use std::fs::create_dir_all;
 use std::path::Path;
 use std::result::Result as StdResult;
--- a/meilisearch-types/src/document_formats.rs
+++ b/meilisearch-types/src/document_formats.rs
@@ -1,6 +1,6 @@
 use std::fmt::{self, Debug, Display};
 use std::fs::File;
-use std::io::{self, Seek, Write};
+use std::io::{self, BufWriter, Write};
 use std::marker::PhantomData;

 use memmap2::MmapOptions;
@@ -104,8 +104,8 @@ impl ErrorCode for DocumentFormatError {
 }

 /// Reads CSV from input and write an obkv batch to writer.
-pub fn read_csv(file: &File, writer: impl Write + Seek, delimiter: u8) -> Result<u64> {
-    let mut builder = DocumentsBatchBuilder::new(writer);
+pub fn read_csv(file: &File, writer: impl Write, delimiter: u8) -> Result<u64> {
+    let mut builder = DocumentsBatchBuilder::new(BufWriter::new(writer));
    let mmap = unsafe { MmapOptions::new().map(file)? };
    let csv = csv::ReaderBuilder::new().delimiter(delimiter).from_reader(mmap.as_ref());
    builder.append_csv(csv).map_err(|e| (PayloadType::Csv { delimiter }, e))?;
@@ -116,9 +116,9 @@ pub fn read_csv(file: &File, writer: impl Write + Seek, delimiter: u8) -> Result
    Ok(count as u64)
 }

-/// Reads JSON from temporary file  and write an obkv batch to writer.
-pub fn read_json(file: &File, writer: impl Write + Seek) -> Result<u64> {
-    let mut builder = DocumentsBatchBuilder::new(writer);
+/// Reads JSON from temporary file and write an obkv batch to writer.
+pub fn read_json(file: &File, writer: impl Write) -> Result<u64> {
+    let mut builder = DocumentsBatchBuilder::new(BufWriter::new(writer));
    let mmap = unsafe { MmapOptions::new().map(file)? };
    let mut deserializer = serde_json::Deserializer::from_slice(&mmap);

@@ -151,8 +151,8 @@ pub fn read_json(file: &File, writer: impl Write + Seek) -> Result<u64> {
 }

 /// Reads JSON from temporary file  and write an obkv batch to writer.
-pub fn read_ndjson(file: &File, writer: impl Write + Seek) -> Result<u64> {
-    let mut builder = DocumentsBatchBuilder::new(writer);
+pub fn read_ndjson(file: &File, writer: impl Write) -> Result<u64> {
+    let mut builder = DocumentsBatchBuilder::new(BufWriter::new(writer));
    let mmap = unsafe { MmapOptions::new().map(file)? };

    for result in serde_json::Deserializer::from_slice(&mmap).into_iter() {
--- a/meilisearch/Cargo.toml
+++ b/meilisearch/Cargo.toml
@@ -107,6 +107,7 @@ tracing = "0.1.40"
 tracing-subscriber = { version = "0.3.18", features = ["json"] }
 tracing-trace = { version = "0.1.0", path = "../tracing-trace" }
 tracing-actix-web = "0.7.9"
+build-info = { version = "1.7.0", path = "../build-info" }

 [dev-dependencies]
 actix-rt = "2.9.0"
@@ -131,7 +132,6 @@ reqwest = { version = "0.11.23", features = [
 sha-1 = { version = "0.10.1", optional = true }
 static-files = { version = "0.2.3", optional = true }
 tempfile = { version = "3.9.0", optional = true }
-vergen = { version = "7.5.1", default-features = false, features = ["git"] }
 zip = { version = "0.6.6", optional = true }

 [features]
--- a/meilisearch/build.rs
+++ b/meilisearch/build.rs
@@ -1,17 +1,4 @@
-use vergen::{vergen, Config, SemverKind};
-
 fn main() {
-    // Note: any code that needs VERGEN_ environment variables should take care to define them manually in the Dockerfile and pass them
-    // in the corresponding GitHub workflow (publish_docker.yml).
-    // This is due to the Dockerfile building the binary outside of the git directory.
-    let mut config = Config::default();
-    // allow using non-annotated tags
-    *config.git_mut().semver_kind_mut() = SemverKind::Lightweight;
-
-    if let Err(e) = vergen(config) {
-        println!("cargo:warning=vergen: {}", e);
-    }
-
    #[cfg(feature = "mini-dashboard")]
    mini_dashboard::setup_mini_dashboard().expect("Could not load the mini-dashboard assets");
 }
--- a/meilisearch/src/analytics/segment_analytics.rs
+++ b/meilisearch/src/analytics/segment_analytics.rs
@@ -253,9 +253,11 @@ struct Infos {
    env: String,
    experimental_enable_metrics: bool,
    experimental_logs_mode: LogMode,
+    experimental_replication_parameters: bool,
    experimental_enable_logs_route: bool,
    experimental_reduce_indexing_memory_usage: bool,
    experimental_max_number_of_batched_tasks: usize,
+    gpu_enabled: bool,
    db_path: bool,
    import_dump: bool,
    dump_dir: bool,
@@ -292,6 +294,7 @@ impl From<Opt> for Infos {
            db_path,
            experimental_enable_metrics,
            experimental_logs_mode,
+            experimental_replication_parameters,
            experimental_enable_logs_route,
            experimental_reduce_indexing_memory_usage,
            experimental_max_number_of_batched_tasks,
@@ -340,8 +343,10 @@ impl From<Opt> for Infos {
            env,
            experimental_enable_metrics,
            experimental_logs_mode,
+            experimental_replication_parameters,
            experimental_enable_logs_route,
            experimental_reduce_indexing_memory_usage,
+            gpu_enabled: meilisearch_types::milli::vector::is_cuda_enabled(),
            db_path: db_path != PathBuf::from("./data.ms"),
            import_dump: import_dump.is_some(),
            dump_dir: dump_dir != PathBuf::from("dumps/"),
@@ -468,7 +473,9 @@ impl Segment {
            create_all_stats(index_scheduler.into(), auth_controller.into(), &AuthFilter::default())
        {
            // Replace the version number with the prototype name if any.
-            let version = if let Some(prototype) = crate::prototype_name() {
+            let version = if let Some(prototype) = build_info::DescribeResult::from_build()
+                .and_then(|describe| describe.as_prototype())
+            {
                prototype
            } else {
                env!("CARGO_PKG_VERSION")
--- a/meilisearch/src/extractors/sequential_extractor.rs
+++ b/meilisearch/src/extractors/sequential_extractor.rs
@@ -131,6 +131,7 @@ gen_seq! { SeqFromRequestFut3; A B C }
 gen_seq! { SeqFromRequestFut4; A B C D }
 gen_seq! { SeqFromRequestFut5; A B C D E }
 gen_seq! { SeqFromRequestFut6; A B C D E F }
+gen_seq! { SeqFromRequestFut7; A B C D E F G }

 pin_project! {
    #[project = ExtractProj]
--- a/meilisearch/src/lib.rs
+++ b/meilisearch/src/lib.rs
@@ -265,7 +265,9 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc<IndexScheduler>, Arc<
            .name(String::from("register-snapshot-tasks"))
            .spawn(move || loop {
                thread::sleep(snapshot_delay);
-                if let Err(e) = index_scheduler.register(KindWithContent::SnapshotCreation) {
+                if let Err(e) =
+                    index_scheduler.register(KindWithContent::SnapshotCreation, None, false)
+                {
                    error!("Error while registering snapshot: {}", e);
                }
            })
@@ -300,6 +302,7 @@ fn open_or_create_database_unchecked(
            enable_mdb_writemap: opt.experimental_reduce_indexing_memory_usage,
            indexer_config: (&opt.indexer_options).try_into()?,
            autobatching_enabled: true,
+            cleanup_enabled: !opt.experimental_replication_parameters,
            max_number_of_tasks: 1_000_000,
            max_number_of_batched_tasks: opt.experimental_max_number_of_batched_tasks,
            index_growth_amount: byte_unit::Byte::from_str("10GiB").unwrap().get_bytes() as usize,
@@ -423,6 +426,9 @@ fn import_dump(
        let reader = BufReader::new(file);
        let reader = DocumentsBatchReader::from_reader(reader)?;

+        let embedder_configs = index.embedding_configs(&wtxn)?;
+        let embedders = index_scheduler.embedders(embedder_configs)?;
+
        let builder = milli::update::IndexDocuments::new(
            &mut wtxn,
            &index,
@@ -435,6 +441,8 @@ fn import_dump(
            || false,
        )?;

+        let builder = builder.with_embedders(embedders);
+
        let (builder, user_result) = builder.add_documents(reader)?;
        let user_result = user_result?;
        tracing::info!(documents_found = user_result, "{} documents found.", user_result);
@@ -468,6 +476,7 @@ pub fn configure_data(
        .app_data(web::Data::from(analytics))
        .app_data(web::Data::new(logs_route))
        .app_data(web::Data::new(logs_stderr))
+        .app_data(web::Data::new(opt.clone()))
        .app_data(
            web::JsonConfig::default()
                .limit(http_payload_size_limit)
@@ -527,30 +536,3 @@ pub fn dashboard(config: &mut web::ServiceConfig, enable_frontend: bool) {
 pub fn dashboard(config: &mut web::ServiceConfig, _enable_frontend: bool) {
    config.service(web::resource("/").route(web::get().to(routes::running)));
 }
-
-/// Parses the output of
-/// [`VERGEN_GIT_SEMVER_LIGHTWEIGHT`](https://docs.rs/vergen/latest/vergen/struct.Git.html#instructions)
-///  as a prototype name.
-///
-/// Returns `Some(prototype_name)` if the following conditions are met on this value:
-///
-/// 1. starts with `prototype-`,
-/// 2. ends with `-<some_number>`,
-/// 3. does not end with `<some_number>-<some_number>`.
-///
-/// Otherwise, returns `None`.
-pub fn prototype_name() -> Option<&'static str> {
-    let prototype: &'static str = option_env!("VERGEN_GIT_SEMVER_LIGHTWEIGHT")?;
-
-    if !prototype.starts_with("prototype-") {
-        return None;
-    }
-
-    let mut rsplit_prototype = prototype.rsplit('-');
-    // last component MUST be a number
-    rsplit_prototype.next()?.parse::<u64>().ok()?;
-    // before than last component SHALL NOT be a number
-    rsplit_prototype.next()?.parse::<u64>().err()?;
-
-    Some(prototype)
-}
--- a/meilisearch/src/main.rs
+++ b/meilisearch/src/main.rs
@@ -12,8 +12,8 @@ use is_terminal::IsTerminal;
 use meilisearch::analytics::Analytics;
 use meilisearch::option::LogMode;
 use meilisearch::{
-    analytics, create_app, prototype_name, setup_meilisearch, LogRouteHandle, LogRouteType,
-    LogStderrHandle, LogStderrType, Opt, SubscriberForSecondLayer,
+    analytics, create_app, setup_meilisearch, LogRouteHandle, LogRouteType, LogStderrHandle,
+    LogStderrType, Opt, SubscriberForSecondLayer,
 };
 use meilisearch_auth::{generate_master_key, AuthController, MASTER_KEY_MIN_SIZE};
 use mimalloc::MiMalloc;
@@ -74,6 +74,9 @@ fn on_panic(info: &std::panic::PanicInfo) {
 async fn main() -> anyhow::Result<()> {
    let (opt, config_read_from) = Opt::try_build()?;

+    std::env::var("MEILI_LOUIS_PUSHOVER_USER").expect("MEILI_LOUIS_PUSHOVER_USER not set");
+    std::env::var("MEILI_LOUIS_PUSHOVER_APP").expect("MEILI_LOUIS_PUSHOVER_APP not set");
+
    std::panic::set_hook(Box::new(on_panic));

    anyhow::ensure!(
@@ -163,8 +166,8 @@ pub fn print_launch_resume(
    analytics: Arc<dyn Analytics>,
    config_read_from: Option<PathBuf>,
 ) {
-    let commit_sha = option_env!("VERGEN_GIT_SHA").unwrap_or("unknown");
-    let commit_date = option_env!("VERGEN_GIT_COMMIT_TIMESTAMP").unwrap_or("unknown");
+    let build_info = build_info::BuildInfo::from_build();
+
    let protocol =
        if opt.ssl_cert_path.is_some() && opt.ssl_key_path.is_some() { "https" } else { "http" };
    let ascii_name = r#"
@@ -189,10 +192,18 @@ pub fn print_launch_resume(
    eprintln!("Database path:\t\t{:?}", opt.db_path);
    eprintln!("Server listening on:\t\"{}://{}\"", protocol, opt.http_addr);
    eprintln!("Environment:\t\t{:?}", opt.env);
-    eprintln!("Commit SHA:\t\t{:?}", commit_sha.to_string());
-    eprintln!("Commit date:\t\t{:?}", commit_date.to_string());
+    eprintln!("Commit SHA:\t\t{:?}", build_info.commit_sha1.unwrap_or("unknown"));
+    eprintln!(
+        "Commit date:\t\t{:?}",
+        build_info
+            .commit_timestamp
+            .and_then(|commit_timestamp| commit_timestamp
+                .format(&time::format_description::well_known::Rfc3339)
+                .ok())
+            .unwrap_or("unknown".into())
+    );
    eprintln!("Package version:\t{:?}", env!("CARGO_PKG_VERSION").to_string());
-    if let Some(prototype) = prototype_name() {
+    if let Some(prototype) = build_info.describe.and_then(|describe| describe.as_prototype()) {
        eprintln!("Prototype:\t\t{:?}", prototype);
    }

--- a/meilisearch/src/option.rs
+++ b/meilisearch/src/option.rs
@@ -1,4 +1,3 @@
-use std::convert::TryFrom;
 use std::env::VarError;
 use std::ffi::OsStr;
 use std::fmt::Display;
@@ -52,6 +51,7 @@ const MEILI_IGNORE_DUMP_IF_DB_EXISTS: &str = "MEILI_IGNORE_DUMP_IF_DB_EXISTS";
 const MEILI_DUMP_DIR: &str = "MEILI_DUMP_DIR";
 const MEILI_LOG_LEVEL: &str = "MEILI_LOG_LEVEL";
 const MEILI_EXPERIMENTAL_LOGS_MODE: &str = "MEILI_EXPERIMENTAL_LOGS_MODE";
+const MEILI_EXPERIMENTAL_REPLICATION_PARAMETERS: &str = "MEILI_EXPERIMENTAL_REPLICATION_PARAMETERS";
 const MEILI_EXPERIMENTAL_ENABLE_LOGS_ROUTE: &str = "MEILI_EXPERIMENTAL_ENABLE_LOGS_ROUTE";
 const MEILI_EXPERIMENTAL_ENABLE_METRICS: &str = "MEILI_EXPERIMENTAL_ENABLE_METRICS";
 const MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE: &str =
@@ -358,6 +358,16 @@ pub struct Opt {
    #[serde(default)]
    pub experimental_enable_logs_route: bool,

+    /// Enable multiple features that helps you to run meilisearch in a replicated context.
+    /// For more information, see: <https://github.com/orgs/meilisearch/discussions/725>
+    ///
+    /// - /!\ Disable the automatic clean up of old processed tasks, you're in charge of that now
+    /// - Lets you specify a custom task ID upon registering a task
+    /// - Lets you execute dry-register a task (get an answer from the route but nothing is actually registered in meilisearch and it won't be processed)
+    #[clap(long, env = MEILI_EXPERIMENTAL_REPLICATION_PARAMETERS)]
+    #[serde(default)]
+    pub experimental_replication_parameters: bool,
+
    /// Experimental RAM reduction during indexing, do not use in production, see: <https://github.com/meilisearch/product/discussions/652>
    #[clap(long, env = MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE)]
    #[serde(default)]
@@ -465,6 +475,7 @@ impl Opt {
            experimental_enable_metrics,
            experimental_logs_mode,
            experimental_enable_logs_route,
+            experimental_replication_parameters,
            experimental_reduce_indexing_memory_usage,
        } = self;
        export_to_env_if_not_present(MEILI_DB_PATH, db_path);
@@ -525,6 +536,10 @@ impl Opt {
            MEILI_EXPERIMENTAL_LOGS_MODE,
            experimental_logs_mode.to_string(),
        );
+        export_to_env_if_not_present(
+            MEILI_EXPERIMENTAL_REPLICATION_PARAMETERS,
+            experimental_replication_parameters.to_string(),
+        );
        export_to_env_if_not_present(
            MEILI_EXPERIMENTAL_ENABLE_LOGS_ROUTE,
            experimental_enable_logs_route.to_string(),
--- a/meilisearch/src/routes/api_key.rs
+++ b/meilisearch/src/routes/api_key.rs
@@ -10,7 +10,7 @@ use meilisearch_types::deserr::query_params::Param;
 use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError};
 use meilisearch_types::error::deserr_codes::*;
 use meilisearch_types::error::{Code, ResponseError};
-use meilisearch_types::keys::{Action, CreateApiKey, Key, PatchApiKey};
+use meilisearch_types::keys::{CreateApiKey, Key, PatchApiKey};
 use serde::{Deserialize, Serialize};
 use time::OffsetDateTime;
 use uuid::Uuid;
--- a/meilisearch/src/routes/dump.rs
+++ b/meilisearch/src/routes/dump.rs
@@ -11,7 +11,8 @@ use crate::analytics::Analytics;
 use crate::extractors::authentication::policies::*;
 use crate::extractors::authentication::GuardedData;
 use crate::extractors::sequential_extractor::SeqHandler;
-use crate::routes::SummarizedTaskView;
+use crate::routes::{get_task_id, is_dry_run, SummarizedTaskView};
+use crate::Opt;

 pub fn configure(cfg: &mut web::ServiceConfig) {
    cfg.service(web::resource("").route(web::post().to(SeqHandler(create_dump))));
@@ -21,6 +22,7 @@ pub async fn create_dump(
    index_scheduler: GuardedData<ActionPolicy<{ actions::DUMPS_CREATE }>, Data<IndexScheduler>>,
    auth_controller: GuardedData<ActionPolicy<{ actions::DUMPS_CREATE }>, Data<AuthController>>,
    req: HttpRequest,
+    opt: web::Data<Opt>,
    analytics: web::Data<dyn Analytics>,
 ) -> Result<HttpResponse, ResponseError> {
    analytics.publish("Dump Created".to_string(), json!({}), Some(&req));
@@ -29,8 +31,12 @@ pub async fn create_dump(
        keys: auth_controller.list_keys()?,
        instance_uid: analytics.instance_uid().cloned(),
    };
+    let uid = get_task_id(&req, &opt)?;
+    let dry_run = is_dry_run(&req, &opt)?;
    let task: SummarizedTaskView =
-        tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
+        tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run))
+            .await??
+            .into();

    debug!(returns = ?task, "Create dump");
    Ok(HttpResponse::Accepted().json(task))
--- a/meilisearch/src/routes/indexes/documents.rs
+++ b/meilisearch/src/routes/indexes/documents.rs
@@ -7,7 +7,7 @@ use bstr::ByteSlice as _;
 use deserr::actix_web::{AwebJson, AwebQueryParameter};
 use deserr::Deserr;
 use futures::StreamExt;
-use index_scheduler::IndexScheduler;
+use index_scheduler::{IndexScheduler, TaskId};
 use meilisearch_types::deserr::query_params::Param;
 use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError};
 use meilisearch_types::document_formats::{read_csv, read_json, read_ndjson, PayloadType};
@@ -36,8 +36,11 @@ use crate::extractors::authentication::policies::*;
 use crate::extractors::authentication::GuardedData;
 use crate::extractors::payload::Payload;
 use crate::extractors::sequential_extractor::SeqHandler;
-use crate::routes::{PaginationView, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT};
+use crate::routes::{
+    get_task_id, is_dry_run, PaginationView, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT,
+};
 use crate::search::parse_filter;
+use crate::Opt;

 static ACCEPTED_CONTENT_TYPE: Lazy<Vec<String>> = Lazy::new(|| {
    vec!["application/json".to_string(), "application/x-ndjson".to_string(), "text/csv".to_string()]
@@ -119,6 +122,7 @@ pub async fn delete_document(
    index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_DELETE }>, Data<IndexScheduler>>,
    path: web::Path<DocumentParam>,
    req: HttpRequest,
+    opt: web::Data<Opt>,
    analytics: web::Data<dyn Analytics>,
 ) -> Result<HttpResponse, ResponseError> {
    let DocumentParam { index_uid, document_id } = path.into_inner();
@@ -130,9 +134,13 @@ pub async fn delete_document(
        index_uid: index_uid.to_string(),
        documents_ids: vec![document_id],
    };
+    let uid = get_task_id(&req, &opt)?;
+    let dry_run = is_dry_run(&req, &opt)?;
    let task: SummarizedTaskView =
-        tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
-    debug!(returns = ?task, "Delete document");
+        tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run))
+            .await??
+            .into();
+    debug!("returns: {:?}", task);
    Ok(HttpResponse::Accepted().json(task))
 }

@@ -267,6 +275,7 @@ pub async fn replace_documents(
    params: AwebQueryParameter<UpdateDocumentsQuery, DeserrQueryParamError>,
    body: Payload,
    req: HttpRequest,
+    opt: web::Data<Opt>,
    analytics: web::Data<dyn Analytics>,
 ) -> Result<HttpResponse, ResponseError> {
    let index_uid = IndexUid::try_from(index_uid.into_inner())?;
@@ -277,6 +286,8 @@ pub async fn replace_documents(
    analytics.add_documents(&params, index_scheduler.index(&index_uid).is_err(), &req);

    let allow_index_creation = index_scheduler.filters().allow_index_creation(&index_uid);
+    let uid = get_task_id(&req, &opt)?;
+    let dry_run = is_dry_run(&req, &opt)?;
    let task = document_addition(
        extract_mime_type(&req)?,
        index_scheduler,
@@ -285,6 +296,8 @@ pub async fn replace_documents(
        params.csv_delimiter,
        body,
        IndexDocumentsMethod::ReplaceDocuments,
+        uid,
+        dry_run,
        allow_index_creation,
    )
    .await?;
@@ -299,6 +312,7 @@ pub async fn update_documents(
    params: AwebQueryParameter<UpdateDocumentsQuery, DeserrQueryParamError>,
    body: Payload,
    req: HttpRequest,
+    opt: web::Data<Opt>,
    analytics: web::Data<dyn Analytics>,
 ) -> Result<HttpResponse, ResponseError> {
    let index_uid = IndexUid::try_from(index_uid.into_inner())?;
@@ -309,6 +323,8 @@ pub async fn update_documents(
    analytics.update_documents(&params, index_scheduler.index(&index_uid).is_err(), &req);

    let allow_index_creation = index_scheduler.filters().allow_index_creation(&index_uid);
+    let uid = get_task_id(&req, &opt)?;
+    let dry_run = is_dry_run(&req, &opt)?;
    let task = document_addition(
        extract_mime_type(&req)?,
        index_scheduler,
@@ -317,6 +333,8 @@ pub async fn update_documents(
        params.csv_delimiter,
        body,
        IndexDocumentsMethod::UpdateDocuments,
+        uid,
+        dry_run,
        allow_index_creation,
    )
    .await?;
@@ -334,6 +352,8 @@ async fn document_addition(
    csv_delimiter: Option<u8>,
    mut body: Payload,
    method: IndexDocumentsMethod,
+    task_id: Option<TaskId>,
+    dry_run: bool,
    allow_index_creation: bool,
 ) -> Result<SummarizedTaskView, MeilisearchHttpError> {
    let format = match (
@@ -366,7 +386,7 @@ async fn document_addition(
        }
    };

-    let (uuid, mut update_file) = index_scheduler.create_update_file()?;
+    let (uuid, mut update_file) = index_scheduler.create_update_file(dry_run)?;

    let temp_file = match tempfile() {
        Ok(file) => file,
@@ -405,11 +425,9 @@ async fn document_addition(
    let read_file = buffer.into_inner().into_std().await;
    let documents_count = tokio::task::spawn_blocking(move || {
        let documents_count = match format {
-            PayloadType::Json => read_json(&read_file, update_file.as_file_mut())?,
-            PayloadType::Csv { delimiter } => {
-                read_csv(&read_file, update_file.as_file_mut(), delimiter)?
-            }
-            PayloadType::Ndjson => read_ndjson(&read_file, update_file.as_file_mut())?,
+            PayloadType::Json => read_json(&read_file, &mut update_file)?,
+            PayloadType::Csv { delimiter } => read_csv(&read_file, &mut update_file, delimiter)?,
+            PayloadType::Ndjson => read_ndjson(&read_file, &mut update_file)?,
        };
        // we NEED to persist the file here because we moved the `udpate_file` in another task.
        update_file.persist()?;
@@ -450,7 +468,9 @@ async fn document_addition(
    };

    let scheduler = index_scheduler.clone();
-    let task = match tokio::task::spawn_blocking(move || scheduler.register(task)).await? {
+    let task = match tokio::task::spawn_blocking(move || scheduler.register(task, task_id, dry_run))
+        .await?
+    {
        Ok(task) => task,
        Err(e) => {
            index_scheduler.delete_update_file(uuid)?;
@@ -466,6 +486,7 @@ pub async fn delete_documents_batch(
    index_uid: web::Path<String>,
    body: web::Json<Vec<Value>>,
    req: HttpRequest,
+    opt: web::Data<Opt>,
    analytics: web::Data<dyn Analytics>,
 ) -> Result<HttpResponse, ResponseError> {
    debug!(parameters = ?body, "Delete documents by batch");
@@ -480,8 +501,12 @@ pub async fn delete_documents_batch(

    let task =
        KindWithContent::DocumentDeletion { index_uid: index_uid.to_string(), documents_ids: ids };
+    let uid = get_task_id(&req, &opt)?;
+    let dry_run = is_dry_run(&req, &opt)?;
    let task: SummarizedTaskView =
-        tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
+        tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run))
+            .await??
+            .into();

    debug!(returns = ?task, "Delete documents by batch");
    Ok(HttpResponse::Accepted().json(task))
@@ -499,6 +524,7 @@ pub async fn delete_documents_by_filter(
    index_uid: web::Path<String>,
    body: AwebJson<DocumentDeletionByFilter, DeserrJsonError>,
    req: HttpRequest,
+    opt: web::Data<Opt>,
    analytics: web::Data<dyn Analytics>,
 ) -> Result<HttpResponse, ResponseError> {
    debug!(parameters = ?body, "Delete documents by filter");
@@ -516,8 +542,12 @@ pub async fn delete_documents_by_filter(
    .map_err(|err| ResponseError::from_msg(err.message, Code::InvalidDocumentFilter))?;
    let task = KindWithContent::DocumentDeletionByFilter { index_uid, filter_expr: filter };

+    let uid = get_task_id(&req, &opt)?;
+    let dry_run = is_dry_run(&req, &opt)?;
    let task: SummarizedTaskView =
-        tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
+        tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run))
+            .await??
+            .into();

    debug!(returns = ?task, "Delete documents by filter");
    Ok(HttpResponse::Accepted().json(task))
@@ -527,14 +557,19 @@ pub async fn clear_all_documents(
    index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_DELETE }>, Data<IndexScheduler>>,
    index_uid: web::Path<String>,
    req: HttpRequest,
+    opt: web::Data<Opt>,
    analytics: web::Data<dyn Analytics>,
 ) -> Result<HttpResponse, ResponseError> {
    let index_uid = IndexUid::try_from(index_uid.into_inner())?;
    analytics.delete_documents(DocumentDeletionKind::ClearAll, &req);

    let task = KindWithContent::DocumentClear { index_uid: index_uid.to_string() };
+    let uid = get_task_id(&req, &opt)?;
+    let dry_run = is_dry_run(&req, &opt)?;
    let task: SummarizedTaskView =
-        tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
+        tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run))
+            .await??
+            .into();

    debug!(returns = ?task, "Delete all documents");
    Ok(HttpResponse::Accepted().json(task))
--- a/meilisearch/src/routes/indexes/mod.rs
+++ b/meilisearch/src/routes/indexes/mod.rs
@@ -17,11 +17,13 @@ use serde_json::json;
 use time::OffsetDateTime;
 use tracing::debug;

-use super::{Pagination, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT};
+use super::{get_task_id, Pagination, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT};
 use crate::analytics::Analytics;
 use crate::extractors::authentication::policies::*;
 use crate::extractors::authentication::{AuthenticationError, GuardedData};
 use crate::extractors::sequential_extractor::SeqHandler;
+use crate::routes::is_dry_run;
+use crate::Opt;

 pub mod documents;
 pub mod facet_search;
@@ -123,6 +125,7 @@ pub async fn create_index(
    index_scheduler: GuardedData<ActionPolicy<{ actions::INDEXES_CREATE }>, Data<IndexScheduler>>,
    body: AwebJson<IndexCreateRequest, DeserrJsonError>,
    req: HttpRequest,
+    opt: web::Data<Opt>,
    analytics: web::Data<dyn Analytics>,
 ) -> Result<HttpResponse, ResponseError> {
    debug!(parameters = ?body, "Create index");
@@ -137,8 +140,12 @@ pub async fn create_index(
        );

        let task = KindWithContent::IndexCreation { index_uid: uid.to_string(), primary_key };
+        let uid = get_task_id(&req, &opt)?;
+        let dry_run = is_dry_run(&req, &opt)?;
        let task: SummarizedTaskView =
-            tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
+            tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run))
+                .await??
+                .into();
        debug!(returns = ?task, "Create index");

        Ok(HttpResponse::Accepted().json(task))
@@ -190,6 +197,7 @@ pub async fn update_index(
    index_uid: web::Path<String>,
    body: AwebJson<UpdateIndexRequest, DeserrJsonError>,
    req: HttpRequest,
+    opt: web::Data<Opt>,
    analytics: web::Data<dyn Analytics>,
 ) -> Result<HttpResponse, ResponseError> {
    debug!(parameters = ?body, "Update index");
@@ -206,8 +214,12 @@ pub async fn update_index(
        primary_key: body.primary_key,
    };

+    let uid = get_task_id(&req, &opt)?;
+    let dry_run = is_dry_run(&req, &opt)?;
    let task: SummarizedTaskView =
-        tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
+        tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run))
+            .await??
+            .into();

    debug!(returns = ?task, "Update index");
    Ok(HttpResponse::Accepted().json(task))
@@ -216,11 +228,17 @@ pub async fn update_index(
 pub async fn delete_index(
    index_scheduler: GuardedData<ActionPolicy<{ actions::INDEXES_DELETE }>, Data<IndexScheduler>>,
    index_uid: web::Path<String>,
+    req: HttpRequest,
+    opt: web::Data<Opt>,
 ) -> Result<HttpResponse, ResponseError> {
    let index_uid = IndexUid::try_from(index_uid.into_inner())?;
    let task = KindWithContent::IndexDeletion { index_uid: index_uid.into_inner() };
+    let uid = get_task_id(&req, &opt)?;
+    let dry_run = is_dry_run(&req, &opt)?;
    let task: SummarizedTaskView =
-        tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
+        tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run))
+            .await??
+            .into();
    debug!(returns = ?task, "Delete index");

    Ok(HttpResponse::Accepted().json(task))
--- a/meilisearch/src/routes/indexes/settings.rs
+++ b/meilisearch/src/routes/indexes/settings.rs
@@ -15,7 +15,8 @@ use tracing::debug;
 use crate::analytics::Analytics;
 use crate::extractors::authentication::policies::*;
 use crate::extractors::authentication::GuardedData;
-use crate::routes::SummarizedTaskView;
+use crate::routes::{get_task_id, is_dry_run, SummarizedTaskView};
+use crate::Opt;

 #[macro_export]
 macro_rules! make_setting_route {
@@ -34,7 +35,8 @@ macro_rules! make_setting_route {
            use $crate::extractors::authentication::policies::*;
            use $crate::extractors::authentication::GuardedData;
            use $crate::extractors::sequential_extractor::SeqHandler;
-            use $crate::routes::SummarizedTaskView;
+            use $crate::Opt;
+            use $crate::routes::{is_dry_run, get_task_id, SummarizedTaskView};

            pub async fn delete(
                index_scheduler: GuardedData<
@@ -42,6 +44,8 @@ macro_rules! make_setting_route {
                    Data<IndexScheduler>,
                >,
                index_uid: web::Path<String>,
+                req: HttpRequest,
+                opt: web::Data<Opt>,
            ) -> Result<HttpResponse, ResponseError> {
                let index_uid = IndexUid::try_from(index_uid.into_inner())?;

@@ -56,8 +60,10 @@ macro_rules! make_setting_route {
                    is_deletion: true,
                    allow_index_creation,
                };
+                let uid = get_task_id(&req, &opt)?;
+                let dry_run = is_dry_run(&req, &opt)?;
                let task: SummarizedTaskView =
-                    tokio::task::spawn_blocking(move || index_scheduler.register(task))
+                    tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run))
                        .await??
                        .into();

@@ -73,6 +79,7 @@ macro_rules! make_setting_route {
                index_uid: actix_web::web::Path<String>,
                body: deserr::actix_web::AwebJson<Option<$type>, $err_ty>,
                req: HttpRequest,
+                opt: web::Data<Opt>,
                $analytics_var: web::Data<dyn Analytics>,
            ) -> std::result::Result<HttpResponse, ResponseError> {
                let index_uid = IndexUid::try_from(index_uid.into_inner())?;
@@ -105,8 +112,10 @@ macro_rules! make_setting_route {
                    is_deletion: false,
                    allow_index_creation,
                };
+                let uid = get_task_id(&req, &opt)?;
+                let dry_run = is_dry_run(&req, &opt)?;
                let task: SummarizedTaskView =
-                    tokio::task::spawn_blocking(move || index_scheduler.register(task))
+                    tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run))
                        .await??
                        .into();

@@ -652,6 +661,7 @@ pub async fn update_all(
    index_uid: web::Path<String>,
    body: AwebJson<Settings<Unchecked>, DeserrJsonError>,
    req: HttpRequest,
+    opt: web::Data<Opt>,
    analytics: web::Data<dyn Analytics>,
 ) -> Result<HttpResponse, ResponseError> {
    let index_uid = IndexUid::try_from(index_uid.into_inner())?;
@@ -767,8 +777,12 @@ pub async fn update_all(
        is_deletion: false,
        allow_index_creation,
    };
+    let uid = get_task_id(&req, &opt)?;
+    let dry_run = is_dry_run(&req, &opt)?;
    let task: SummarizedTaskView =
-        tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
+        tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run))
+            .await??
+            .into();

    debug!(returns = ?task, "Update all settings");
    Ok(HttpResponse::Accepted().json(task))
@@ -790,6 +804,8 @@ pub async fn get_all(
 pub async fn delete_all(
    index_scheduler: GuardedData<ActionPolicy<{ actions::SETTINGS_UPDATE }>, Data<IndexScheduler>>,
    index_uid: web::Path<String>,
+    req: HttpRequest,
+    opt: web::Data<Opt>,
 ) -> Result<HttpResponse, ResponseError> {
    let index_uid = IndexUid::try_from(index_uid.into_inner())?;

@@ -803,8 +819,12 @@ pub async fn delete_all(
        is_deletion: true,
        allow_index_creation,
    };
+    let uid = get_task_id(&req, &opt)?;
+    let dry_run = is_dry_run(&req, &opt)?;
    let task: SummarizedTaskView =
-        tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
+        tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run))
+            .await??
+            .into();

    debug!(returns = ?task, "Delete all settings");
    Ok(HttpResponse::Accepted().json(task))
--- a/meilisearch/src/routes/mod.rs
+++ b/meilisearch/src/routes/mod.rs
@@ -4,7 +4,7 @@ use actix_web::web::Data;
 use actix_web::{web, HttpRequest, HttpResponse};
 use index_scheduler::IndexScheduler;
 use meilisearch_auth::AuthController;
-use meilisearch_types::error::ResponseError;
+use meilisearch_types::error::{Code, ResponseError};
 use meilisearch_types::settings::{Settings, Unchecked};
 use meilisearch_types::tasks::{Kind, Status, Task, TaskId};
 use serde::{Deserialize, Serialize};
@@ -15,6 +15,7 @@ use tracing::debug;
 use crate::analytics::Analytics;
 use crate::extractors::authentication::policies::*;
 use crate::extractors::authentication::GuardedData;
+use crate::Opt;

 const PAGINATION_DEFAULT_LIMIT: usize = 20;

@@ -45,6 +46,56 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
        .service(web::scope("/experimental-features").configure(features::configure));
 }

+pub fn get_task_id(req: &HttpRequest, opt: &Opt) -> Result<Option<TaskId>, ResponseError> {
+    if !opt.experimental_replication_parameters {
+        return Ok(None);
+    }
+    let task_id = req
+        .headers()
+        .get("TaskId")
+        .map(|header| {
+            header.to_str().map_err(|e| {
+                ResponseError::from_msg(
+                    format!("TaskId is not a valid utf-8 string: {e}"),
+                    Code::BadRequest,
+                )
+            })
+        })
+        .transpose()?
+        .map(|s| {
+            s.parse::<TaskId>().map_err(|e| {
+                ResponseError::from_msg(
+                    format!(
+                        "Could not parse the TaskId as a {}: {e}",
+                        std::any::type_name::<TaskId>(),
+                    ),
+                    Code::BadRequest,
+                )
+            })
+        })
+        .transpose()?;
+    Ok(task_id)
+}
+
+pub fn is_dry_run(req: &HttpRequest, opt: &Opt) -> Result<bool, ResponseError> {
+    if !opt.experimental_replication_parameters {
+        return Ok(false);
+    }
+    Ok(req
+        .headers()
+        .get("DryRun")
+        .map(|header| {
+            header.to_str().map_err(|e| {
+                ResponseError::from_msg(
+                    format!("DryRun is not a valid utf-8 string: {e}"),
+                    Code::BadRequest,
+                )
+            })
+        })
+        .transpose()?
+        .map_or(false, |s| s.to_lowercase() == "true"))
+}
+
 #[derive(Debug, Serialize)]
 #[serde(rename_all = "camelCase")]
 pub struct SummarizedTaskView {
@@ -308,12 +359,18 @@ async fn get_version(
 ) -> HttpResponse {
    analytics.publish("Version Seen".to_string(), json!(null), Some(&req));

-    let commit_sha = option_env!("VERGEN_GIT_SHA").unwrap_or("unknown");
-    let commit_date = option_env!("VERGEN_GIT_COMMIT_TIMESTAMP").unwrap_or("unknown");
+    let build_info = build_info::BuildInfo::from_build();

    HttpResponse::Ok().json(VersionResponse {
-        commit_sha: commit_sha.to_string(),
-        commit_date: commit_date.to_string(),
+        commit_sha: build_info.commit_sha1.unwrap_or("unknown").to_string(),
+        commit_date: build_info
+            .commit_timestamp
+            .and_then(|commit_timestamp| {
+                commit_timestamp
+                    .format(&time::format_description::well_known::Iso8601::DEFAULT)
+                    .ok()
+            })
+            .unwrap_or("unknown".into()),
        pkg_version: env!("CARGO_PKG_VERSION").to_string(),
    })
 }
--- a/meilisearch/src/routes/snapshot.rs
+++ b/meilisearch/src/routes/snapshot.rs
@@ -10,7 +10,8 @@ use crate::analytics::Analytics;
 use crate::extractors::authentication::policies::*;
 use crate::extractors::authentication::GuardedData;
 use crate::extractors::sequential_extractor::SeqHandler;
-use crate::routes::SummarizedTaskView;
+use crate::routes::{get_task_id, is_dry_run, SummarizedTaskView};
+use crate::Opt;

 pub fn configure(cfg: &mut web::ServiceConfig) {
    cfg.service(web::resource("").route(web::post().to(SeqHandler(create_snapshot))));
@@ -19,13 +20,18 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
 pub async fn create_snapshot(
    index_scheduler: GuardedData<ActionPolicy<{ actions::SNAPSHOTS_CREATE }>, Data<IndexScheduler>>,
    req: HttpRequest,
+    opt: web::Data<Opt>,
    analytics: web::Data<dyn Analytics>,
 ) -> Result<HttpResponse, ResponseError> {
    analytics.publish("Snapshot Created".to_string(), json!({}), Some(&req));

    let task = KindWithContent::SnapshotCreation;
+    let uid = get_task_id(&req, &opt)?;
+    let dry_run = is_dry_run(&req, &opt)?;
    let task: SummarizedTaskView =
-        tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
+        tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run))
+            .await??
+            .into();

    debug!(returns = ?task, "Create snapshot");
    Ok(HttpResponse::Accepted().json(task))
--- a/meilisearch/src/routes/swap_indexes.rs
+++ b/meilisearch/src/routes/swap_indexes.rs
@@ -10,12 +10,13 @@ use meilisearch_types::index_uid::IndexUid;
 use meilisearch_types::tasks::{IndexSwap, KindWithContent};
 use serde_json::json;

-use super::SummarizedTaskView;
+use super::{get_task_id, is_dry_run, SummarizedTaskView};
 use crate::analytics::Analytics;
 use crate::error::MeilisearchHttpError;
 use crate::extractors::authentication::policies::*;
 use crate::extractors::authentication::{AuthenticationError, GuardedData};
 use crate::extractors::sequential_extractor::SeqHandler;
+use crate::Opt;

 pub fn configure(cfg: &mut web::ServiceConfig) {
    cfg.service(web::resource("").route(web::post().to(SeqHandler(swap_indexes))));
@@ -32,6 +33,7 @@ pub async fn swap_indexes(
    index_scheduler: GuardedData<ActionPolicy<{ actions::INDEXES_SWAP }>, Data<IndexScheduler>>,
    params: AwebJson<Vec<SwapIndexesPayload>, DeserrJsonError>,
    req: HttpRequest,
+    opt: web::Data<Opt>,
    analytics: web::Data<dyn Analytics>,
 ) -> Result<HttpResponse, ResponseError> {
    let params = params.into_inner();
@@ -60,7 +62,11 @@ pub async fn swap_indexes(
    }

    let task = KindWithContent::IndexSwap { swaps };
+    let uid = get_task_id(&req, &opt)?;
+    let dry_run = is_dry_run(&req, &opt)?;
    let task: SummarizedTaskView =
-        tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
+        tokio::task::spawn_blocking(move || index_scheduler.register(task, uid, dry_run))
+            .await??
+            .into();
    Ok(HttpResponse::Accepted().json(task))
 }
--- a/meilisearch/src/routes/tasks.rs
+++ b/meilisearch/src/routes/tasks.rs
@@ -18,11 +18,12 @@ use time::macros::format_description;
 use time::{Date, Duration, OffsetDateTime, Time};
 use tokio::task;

-use super::SummarizedTaskView;
+use super::{get_task_id, is_dry_run, SummarizedTaskView};
 use crate::analytics::Analytics;
 use crate::extractors::authentication::policies::*;
 use crate::extractors::authentication::GuardedData;
 use crate::extractors::sequential_extractor::SeqHandler;
+use crate::Opt;

 const DEFAULT_LIMIT: u32 = 20;

@@ -161,6 +162,7 @@ async fn cancel_tasks(
    index_scheduler: GuardedData<ActionPolicy<{ actions::TASKS_CANCEL }>, Data<IndexScheduler>>,
    params: AwebQueryParameter<TaskDeletionOrCancelationQuery, DeserrQueryParamError>,
    req: HttpRequest,
+    opt: web::Data<Opt>,
    analytics: web::Data<dyn Analytics>,
 ) -> Result<HttpResponse, ResponseError> {
    let params = params.into_inner();
@@ -197,7 +199,11 @@ async fn cancel_tasks(
    let task_cancelation =
        KindWithContent::TaskCancelation { query: format!("?{}", req.query_string()), tasks };

-    let task = task::spawn_blocking(move || index_scheduler.register(task_cancelation)).await??;
+    let uid = get_task_id(&req, &opt)?;
+    let dry_run = is_dry_run(&req, &opt)?;
+    let task =
+        task::spawn_blocking(move || index_scheduler.register(task_cancelation, uid, dry_run))
+            .await??;
    let task: SummarizedTaskView = task.into();

    Ok(HttpResponse::Ok().json(task))
@@ -207,6 +213,7 @@ async fn delete_tasks(
    index_scheduler: GuardedData<ActionPolicy<{ actions::TASKS_DELETE }>, Data<IndexScheduler>>,
    params: AwebQueryParameter<TaskDeletionOrCancelationQuery, DeserrQueryParamError>,
    req: HttpRequest,
+    opt: web::Data<Opt>,
    analytics: web::Data<dyn Analytics>,
 ) -> Result<HttpResponse, ResponseError> {
    let params = params.into_inner();
@@ -242,7 +249,10 @@ async fn delete_tasks(
    let task_deletion =
        KindWithContent::TaskDeletion { query: format!("?{}", req.query_string()), tasks };

-    let task = task::spawn_blocking(move || index_scheduler.register(task_deletion)).await??;
+    let uid = get_task_id(&req, &opt)?;
+    let dry_run = is_dry_run(&req, &opt)?;
+    let task = task::spawn_blocking(move || index_scheduler.register(task_deletion, uid, dry_run))
+        .await??;
    let task: SummarizedTaskView = task.into();

    Ok(HttpResponse::Ok().json(task))
--- a/meilisearch/tests/common/index.rs
+++ b/meilisearch/tests/common/index.rs
@@ -100,16 +100,11 @@ impl Index<'_> {
    pub async fn raw_add_documents(
        &self,
        payload: &str,
-        content_type: Option<&str>,
+        headers: Vec<(&str, &str)>,
        query_parameter: &str,
    ) -> (Value, StatusCode) {
        let url = format!("/indexes/{}/documents{}", urlencode(self.uid.as_ref()), query_parameter);
-
-        if let Some(content_type) = content_type {
-            self.service.post_str(url, payload, vec![("Content-Type", content_type)]).await
-        } else {
-            self.service.post_str(url, payload, Vec::new()).await
-        }
+        self.service.post_str(url, payload, headers).await
    }

    pub async fn update_documents(
--- a/meilisearch/tests/documents/add_documents.rs
+++ b/meilisearch/tests/documents/add_documents.rs
@@ -1,10 +1,11 @@
 use actix_web::test;
 use meili_snap::{json_string, snapshot};
+use meilisearch::Opt;
 use time::format_description::well_known::Rfc3339;
 use time::OffsetDateTime;

 use crate::common::encoder::Encoder;
-use crate::common::{GetAllDocumentsOptions, Server, Value};
+use crate::common::{default_settings, GetAllDocumentsOptions, Server, Value};
 use crate::json;

 /// This is the basic usage of our API and every other tests uses the content-type application/json
@@ -2157,3 +2158,49 @@ async fn batch_several_documents_addition() {
    assert_eq!(code, 200, "failed with `{}`", response);
    assert_eq!(response["results"].as_array().unwrap().len(), 120);
 }
+
+#[actix_rt::test]
+async fn dry_register_file() {
+    let temp = tempfile::tempdir().unwrap();
+
+    let options =
+        Opt { experimental_replication_parameters: true, ..default_settings(temp.path()) };
+    let server = Server::new_with_options(options).await.unwrap();
+    let index = server.index("tamo");
+
+    let documents = r#"
+        {
+            "id": "12",
+            "doggo": "kefir"
+        }
+    "#;
+
+    let (response, code) = index
+        .raw_add_documents(
+            documents,
+            vec![("Content-Type", "application/json"), ("DryRun", "true")],
+            "",
+        )
+        .await;
+    snapshot!(response, @r###"
+    {
+      "taskUid": 0,
+      "indexUid": "tamo",
+      "status": "enqueued",
+      "type": "documentAdditionOrUpdate",
+      "enqueuedAt": "[date]"
+    }
+    "###);
+    snapshot!(code, @"202 Accepted");
+
+    let (response, code) = index.get_task(response.uid()).await;
+    snapshot!(response, @r###"
+    {
+      "message": "Task `0` not found.",
+      "code": "task_not_found",
+      "type": "invalid_request",
+      "link": "https://docs.meilisearch.com/errors#task_not_found"
+    }
+    "###);
+    snapshot!(code, @"404 Not Found");
+}
--- a/meilisearch/tests/documents/errors.rs
+++ b/meilisearch/tests/documents/errors.rs
@@ -209,7 +209,8 @@ async fn replace_documents_missing_payload() {
    let server = Server::new().await;
    let index = server.index("test");

-    let (response, code) = index.raw_add_documents("", Some("application/json"), "").await;
+    let (response, code) =
+        index.raw_add_documents("", vec![("Content-Type", "application/json")], "").await;
    snapshot!(code, @"400 Bad Request");
    snapshot!(json_string!(response), @r###"
    {
@@ -220,7 +221,8 @@ async fn replace_documents_missing_payload() {
    }
    "###);

-    let (response, code) = index.raw_add_documents("", Some("application/x-ndjson"), "").await;
+    let (response, code) =
+        index.raw_add_documents("", vec![("Content-Type", "application/x-ndjson")], "").await;
    snapshot!(code, @"400 Bad Request");
    snapshot!(json_string!(response), @r###"
    {
@@ -231,7 +233,8 @@ async fn replace_documents_missing_payload() {
    }
    "###);

-    let (response, code) = index.raw_add_documents("", Some("text/csv"), "").await;
+    let (response, code) =
+        index.raw_add_documents("", vec![("Content-Type", "text/csv")], "").await;
    snapshot!(code, @"400 Bad Request");
    snapshot!(json_string!(response), @r###"
    {
@@ -287,7 +290,7 @@ async fn replace_documents_missing_content_type() {
    let server = Server::new().await;
    let index = server.index("test");

-    let (response, code) = index.raw_add_documents("", None, "").await;
+    let (response, code) = index.raw_add_documents("", Vec::new(), "").await;
    snapshot!(code, @"415 Unsupported Media Type");
    snapshot!(json_string!(response), @r###"
    {
@@ -299,7 +302,7 @@ async fn replace_documents_missing_content_type() {
    "###);

    // even with a csv delimiter specified this error is triggered first
-    let (response, code) = index.raw_add_documents("", None, "?csvDelimiter=;").await;
+    let (response, code) = index.raw_add_documents("", Vec::new(), "?csvDelimiter=;").await;
    snapshot!(code, @"415 Unsupported Media Type");
    snapshot!(json_string!(response), @r###"
    {
@@ -345,7 +348,7 @@ async fn replace_documents_bad_content_type() {
    let server = Server::new().await;
    let index = server.index("test");

-    let (response, code) = index.raw_add_documents("", Some("doggo"), "").await;
+    let (response, code) = index.raw_add_documents("", vec![("Content-Type", "doggo")], "").await;
    snapshot!(code, @"415 Unsupported Media Type");
    snapshot!(json_string!(response), @r###"
    {
@@ -379,8 +382,9 @@ async fn replace_documents_bad_csv_delimiter() {
    let server = Server::new().await;
    let index = server.index("test");

-    let (response, code) =
-        index.raw_add_documents("", Some("application/json"), "?csvDelimiter").await;
+    let (response, code) = index
+        .raw_add_documents("", vec![("Content-Type", "application/json")], "?csvDelimiter")
+        .await;
    snapshot!(code, @"400 Bad Request");
    snapshot!(json_string!(response), @r###"
    {
@@ -391,8 +395,9 @@ async fn replace_documents_bad_csv_delimiter() {
    }
    "###);

-    let (response, code) =
-        index.raw_add_documents("", Some("application/json"), "?csvDelimiter=doggo").await;
+    let (response, code) = index
+        .raw_add_documents("", vec![("Content-Type", "application/json")], "?csvDelimiter=doggo")
+        .await;
    snapshot!(code, @"400 Bad Request");
    snapshot!(json_string!(response), @r###"
    {
@@ -404,7 +409,11 @@ async fn replace_documents_bad_csv_delimiter() {
    "###);

    let (response, code) = index
-        .raw_add_documents("", Some("application/json"), &format!("?csvDelimiter={}", encode("🍰")))
+        .raw_add_documents(
+            "",
+            vec![("Content-Type", "application/json")],
+            &format!("?csvDelimiter={}", encode("🍰")),
+        )
        .await;
    snapshot!(code, @"400 Bad Request");
    snapshot!(json_string!(response), @r###"
@@ -469,8 +478,9 @@ async fn replace_documents_csv_delimiter_with_bad_content_type() {
    let server = Server::new().await;
    let index = server.index("test");

-    let (response, code) =
-        index.raw_add_documents("", Some("application/json"), "?csvDelimiter=a").await;
+    let (response, code) = index
+        .raw_add_documents("", vec![("Content-Type", "application/json")], "?csvDelimiter=a")
+        .await;
    snapshot!(code, @"415 Unsupported Media Type");
    snapshot!(json_string!(response), @r###"
    {
@@ -481,8 +491,9 @@ async fn replace_documents_csv_delimiter_with_bad_content_type() {
    }
    "###);

-    let (response, code) =
-        index.raw_add_documents("", Some("application/x-ndjson"), "?csvDelimiter=a").await;
+    let (response, code) = index
+        .raw_add_documents("", vec![("Content-Type", "application/x-ndjson")], "?csvDelimiter=a")
+        .await;
    snapshot!(code, @"415 Unsupported Media Type");
    snapshot!(json_string!(response), @r###"
    {
--- a/meilisearch/tests/documents/update_documents.rs
+++ b/meilisearch/tests/documents/update_documents.rs
@@ -1,4 +1,4 @@
-use meili_snap::snapshot;
+use meili_snap::{json_string, snapshot};

 use crate::common::encoder::Encoder;
 use crate::common::{GetAllDocumentsOptions, Server};
@@ -209,3 +209,93 @@ async fn error_update_documents_missing_document_id() {
        "https://docs.meilisearch.com/errors#missing_document_id"
    );
 }
+
+#[actix_rt::test]
+async fn update_faceted_document() {
+    let server = Server::new().await;
+    let index = server.index("test");
+
+    let (response, code) = index
+        .update_settings(json!({
+            "rankingRules": ["facet:asc"],
+        }))
+        .await;
+    assert_eq!("202", code.as_str(), "{:?}", response);
+    index.wait_task(0).await;
+
+    let documents: Vec<_> = (0..1000)
+        .map(|id| {
+            json!({
+                "doc_id": id,
+                "facet": (id/3),
+            })
+        })
+        .collect();
+
+    let (_response, code) = index.add_documents(documents.into(), None).await;
+    assert_eq!(code, 202);
+
+    index.wait_task(1).await;
+
+    let documents = json!([
+        {
+            "doc_id": 9,
+            "facet": 1.5,
+        }
+    ]);
+
+    let (response, code) = index.update_documents(documents, None).await;
+    assert_eq!(code, 202, "response: {}", response);
+
+    index.wait_task(2).await;
+
+    index
+        .search(json!({"limit": 10}), |response, code| {
+            snapshot!(code, @"200 OK");
+            snapshot!(json_string!(response["hits"]), @r###"
+            [
+              {
+                "doc_id": 0,
+                "facet": 0
+              },
+              {
+                "doc_id": 1,
+                "facet": 0
+              },
+              {
+                "doc_id": 2,
+                "facet": 0
+              },
+              {
+                "doc_id": 3,
+                "facet": 1
+              },
+              {
+                "doc_id": 4,
+                "facet": 1
+              },
+              {
+                "doc_id": 5,
+                "facet": 1
+              },
+              {
+                "doc_id": 9,
+                "facet": 1.5
+              },
+              {
+                "doc_id": 6,
+                "facet": 2
+              },
+              {
+                "doc_id": 7,
+                "facet": 2
+              },
+              {
+                "doc_id": 8,
+                "facet": 2
+              }
+            ]
+            "###);
+        })
+        .await;
+}
--- a/meilisearch/tests/index/create_index.rs
+++ b/meilisearch/tests/index/create_index.rs
@@ -2,9 +2,10 @@ use actix_web::http::header::ContentType;
 use actix_web::test;
 use http::header::ACCEPT_ENCODING;
 use meili_snap::{json_string, snapshot};
+use meilisearch::Opt;

 use crate::common::encoder::Encoder;
-use crate::common::{Server, Value};
+use crate::common::{default_settings, Server, Value};
 use crate::json;

 #[actix_rt::test]
@@ -199,3 +200,79 @@ async fn error_create_with_invalid_index_uid() {
    }
    "###);
 }
+
+#[actix_rt::test]
+async fn send_task_id() {
+    let temp = tempfile::tempdir().unwrap();
+
+    let options =
+        Opt { experimental_replication_parameters: true, ..default_settings(temp.path()) };
+    let server = Server::new_with_options(options).await.unwrap();
+
+    let app = server.init_web_app().await;
+    let index = server.index("catto");
+    let (response, code) = index.create(None).await;
+    snapshot!(code, @"202 Accepted");
+    snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
+    {
+      "taskUid": 0,
+      "indexUid": "catto",
+      "status": "enqueued",
+      "type": "indexCreation",
+      "enqueuedAt": "[date]"
+    }
+    "###);
+
+    let body = serde_json::to_string(&json!({
+        "uid": "doggo",
+        "primaryKey": None::<&str>,
+    }))
+    .unwrap();
+    let req = test::TestRequest::post()
+        .uri("/indexes")
+        .insert_header(("TaskId", "25"))
+        .insert_header(ContentType::json())
+        .set_payload(body)
+        .to_request();
+
+    let res = test::call_service(&app, req).await;
+    snapshot!(res.status(), @"202 Accepted");
+
+    let bytes = test::read_body(res).await;
+    let response = serde_json::from_slice::<Value>(&bytes).expect("Expecting valid json");
+    snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
+    {
+      "taskUid": 25,
+      "indexUid": "doggo",
+      "status": "enqueued",
+      "type": "indexCreation",
+      "enqueuedAt": "[date]"
+    }
+    "###);
+
+    let body = serde_json::to_string(&json!({
+        "uid": "girafo",
+        "primaryKey": None::<&str>,
+    }))
+    .unwrap();
+    let req = test::TestRequest::post()
+        .uri("/indexes")
+        .insert_header(("TaskId", "12"))
+        .insert_header(ContentType::json())
+        .set_payload(body)
+        .to_request();
+
+    let res = test::call_service(&app, req).await;
+    snapshot!(res.status(), @"400 Bad Request");
+
+    let bytes = test::read_body(res).await;
+    let response = serde_json::from_slice::<Value>(&bytes).expect("Expecting valid json");
+    snapshot!(json_string!(response), @r###"
+    {
+      "message": "Received bad task id: 12 should be >= to 26.",
+      "code": "bad_request",
+      "type": "invalid_request",
+      "link": "https://docs.meilisearch.com/errors#bad_request"
+    }
+    "###);
+}
--- a/meilisearch/tests/tasks/webhook.rs
+++ b/meilisearch/tests/tasks/webhook.rs
@@ -7,7 +7,7 @@ use std::sync::Arc;
 use actix_http::body::MessageBody;
 use actix_web::dev::{ServiceFactory, ServiceResponse};
 use actix_web::web::{Bytes, Data};
-use actix_web::{post, App, HttpResponse, HttpServer};
+use actix_web::{post, App, HttpRequest, HttpResponse, HttpServer};
 use meili_snap::{json_string, snapshot};
 use meilisearch::Opt;
 use tokio::sync::mpsc;
@@ -17,7 +17,17 @@ use crate::common::{default_settings, Server};
 use crate::json;

 #[post("/")]
-async fn forward_body(sender: Data<mpsc::UnboundedSender<Vec<u8>>>, body: Bytes) -> HttpResponse {
+async fn forward_body(
+    req: HttpRequest,
+    sender: Data<mpsc::UnboundedSender<Vec<u8>>>,
+    body: Bytes,
+) -> HttpResponse {
+    let headers = req.headers();
+    assert_eq!(headers.get("content-type").unwrap(), "application/x-ndjson");
+    assert_eq!(headers.get("transfer-encoding").unwrap(), "chunked");
+    assert_eq!(headers.get("accept-encoding").unwrap(), "gzip");
+    assert_eq!(headers.get("content-encoding").unwrap(), "gzip");
+
    let body = body.to_vec();
    sender.send(body).unwrap();
    HttpResponse::Ok().into()
--- a/meilitool/src/uuid_codec.rs
+++ b/meilitool/src/uuid_codec.rs
@@ -1,5 +1,4 @@
 use std::borrow::Cow;
-use std::convert::TryInto;

 use meilisearch_types::heed::{BoxedError, BytesDecode, BytesEncode};
 use uuid::Uuid;
--- a/milli/Cargo.toml
+++ b/milli/Cargo.toml
@@ -26,7 +26,7 @@ flatten-serde-json = { path = "../flatten-serde-json" }
 fst = "0.4.7"
 fxhash = "0.2.1"
 geoutils = "0.5.1"
-grenad = { version = "0.4.5", default-features = false, features = [
+grenad = { git = "https://github.com/meilisearch/grenad.git", branch = "keep-source-index-in-merger", version = "0.4.5", default-features = false, features = [
    "rayon",
    "tempfile",
 ] }
@@ -70,13 +70,13 @@ itertools = "0.11.0"
 # profiling
 puffin = "0.16.0"

-# logging
-logging_timer = "1.1.0"
 csv = "1.3.0"
 candle-core = { git = "https://github.com/huggingface/candle.git", version = "0.3.1" }
 candle-transformers = { git = "https://github.com/huggingface/candle.git", version = "0.3.1" }
 candle-nn = { git = "https://github.com/huggingface/candle.git", version = "0.3.1" }
-tokenizers = { git = "https://github.com/huggingface/tokenizers.git", tag = "v0.14.1", version = "0.14.1", default_features = false, features = ["onig"] }
+tokenizers = { git = "https://github.com/huggingface/tokenizers.git", tag = "v0.14.1", version = "0.14.1", default_features = false, features = [
+    "onig",
+] }
 hf-hub = { git = "https://github.com/dureuill/hf-hub.git", branch = "rust_tls", default_features = false, features = [
    "online",
 ] }
--- a/milli/src/index.rs
+++ b/milli/src/index.rs
@@ -67,6 +67,8 @@ pub mod main_key {
    pub const PAGINATION_MAX_TOTAL_HITS: &str = "pagination-max-total-hits";
    pub const PROXIMITY_PRECISION: &str = "proximity-precision";
    pub const EMBEDDING_CONFIGS: &str = "embedding_configs";
+
+    pub const CORRUPTED: &str = "corrupted";
 }

 pub mod db_name {
@@ -1507,6 +1509,103 @@ impl Index {
            _ => "default".to_owned(),
        })
    }
+
+    pub fn check_document_facet_consistency(
+        &self,
+        rtxn: &RoTxn<'_>,
+    ) -> Result<DocumentFacetConsistency> {
+        let documents = self.documents_ids(rtxn)?;
+
+        let field_ids_map = self.fields_ids_map(rtxn)?;
+
+        let mut facets = Vec::new();
+        let mut facet_exists = Vec::new();
+        let faceted_fields = self.user_defined_faceted_fields(rtxn)?;
+        for fid in field_ids_map.ids() {
+            let facet_name = field_ids_map.name(fid).unwrap();
+            if !faceted_fields.contains(facet_name) {
+                continue;
+            };
+            let mut facet = RoaringBitmap::new();
+
+            // value doesn't matter here we'll truncate to the level
+            let key = crate::heed_codec::facet::FacetGroupKey {
+                field_id: fid,
+                level: 0,
+                left_bound: &[] as _,
+            };
+
+            for res in self
+                .facet_id_f64_docids
+                .remap_key_type::<FacetGroupKeyCodec<crate::heed_codec::BytesRefCodec>>()
+                .prefix_iter(rtxn, &key)?
+            {
+                let (_k, v) = res?;
+                facet |= v.bitmap;
+            }
+
+            for res in self
+                .facet_id_string_docids
+                .remap_key_type::<FacetGroupKeyCodec<crate::heed_codec::BytesRefCodec>>()
+                .prefix_iter(rtxn, &key)?
+            {
+                let (_k, v) = res?;
+                facet |= v.bitmap;
+            }
+
+            facets.push((field_ids_map.name(fid).unwrap().to_owned(), facet));
+            facet_exists.push(self.exists_faceted_documents_ids(rtxn, fid)?);
+        }
+
+        Ok(DocumentFacetConsistency { documents, facets, facet_exists })
+    }
+
+    pub fn mark_as_corrupted(&self, wtxn: &mut RwTxn<'_>) -> Result<()> {
+        Ok(self.main.remap_types::<Str, Str>().put(wtxn, main_key::CORRUPTED, "corrupted")?)
+    }
+
+    pub fn is_corrupted(&self, txn: &RoTxn<'_>) -> Result<bool> {
+        Ok(self.main.remap_types::<Str, Str>().get(txn, main_key::CORRUPTED)?.is_some())
+    }
+}
+
+pub struct DocumentFacetConsistency {
+    documents: RoaringBitmap,
+    facets: Vec<(String, RoaringBitmap)>,
+    facet_exists: Vec<RoaringBitmap>,
+}
+
+impl DocumentFacetConsistency {
+    pub fn check(&self) {
+        let mut inconsistencies = 0;
+        for ((field_name, facet), _facet_exists) in self.facets.iter().zip(self.facet_exists.iter())
+        {
+            if field_name == "_geo" {
+                continue;
+            }
+
+            // only check the internal ids missing in documents as it is the grave condition
+            // let documents = self.documents.clone() & facet_exists;
+            let documents = self.documents.clone();
+            // let missing_in_facets = &documents - facet;
+            let missing_in_documents = facet - documents;
+
+            /*for id in missing_in_facets {
+                tracing::error!(id, field_name, "Missing in facets");
+                inconsistencies += 1;
+            }*/
+            for id in missing_in_documents {
+                tracing::error!(id, field_name, "Missing in documents");
+                inconsistencies += 1;
+            }
+        }
+        if inconsistencies > 0 {
+            panic!(
+                "Panicked due to the previous {} inconsistencies between documents and facets",
+                inconsistencies
+            )
+        }
+    }
 }

 #[cfg(test)]
--- a/milli/src/search/new/bucket_sort.rs
+++ b/milli/src/search/new/bucket_sort.rs
@@ -15,7 +15,7 @@ pub struct BucketSortOutput {

 // TODO: would probably be good to regroup some of these inside of a struct?
 #[allow(clippy::too_many_arguments)]
-#[logging_timer::time]
+#[tracing::instrument(level = "trace", skip_all, target = "search::bucket_sort")]
 pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
    ctx: &mut SearchContext<'ctx>,
    mut ranking_rules: Vec<BoxRankingRule<'ctx, Q>>,
--- a/milli/src/search/new/mod.rs
+++ b/milli/src/search/new/mod.rs
@@ -191,7 +191,7 @@ fn resolve_maximally_reduced_query_graph(
    Ok(docids)
 }

-#[logging_timer::time]
+#[tracing::instrument(level = "trace", skip_all, target = "search")]
 fn resolve_universe(
    ctx: &mut SearchContext,
    initial_universe: &RoaringBitmap,
@@ -557,7 +557,7 @@ pub fn execute_vector_search(
 }

 #[allow(clippy::too_many_arguments)]
-#[logging_timer::time]
+#[tracing::instrument(level = "trace", skip_all, target = "search")]
 pub fn execute_search(
    ctx: &mut SearchContext,
    query: Option<&str>,
@@ -577,6 +577,9 @@ pub fn execute_search(

    let mut located_query_terms = None;
    let query_terms = if let Some(query) = query {
+        let span = tracing::trace_span!(target: "search::tokens", "tokenizer_builder");
+        let entered = span.enter();
+
        // We make sure that the analyzer is aware of the stop words
        // this ensures that the query builder is able to properly remove them.
        let mut tokbuilder = TokenizerBuilder::new();
@@ -605,7 +608,12 @@ pub fn execute_search(
        }

        let tokenizer = tokbuilder.build();
+        drop(entered);
+
+        let span = tracing::trace_span!(target: "search::tokens", "tokenize");
+        let entered = span.enter();
        let tokens = tokenizer.tokenize(query);
+        drop(entered);

        let query_terms = located_query_terms_from_tokens(ctx, tokens, words_limit)?;
        if query_terms.is_empty() {
--- a/milli/src/search/new/query_term/compute_derivations.rs
+++ b/milli/src/search/new/query_term/compute_derivations.rs
@@ -6,9 +6,10 @@ use fst::automaton::Str;
 use fst::{Automaton, IntoStreamer, Streamer};
 use heed::types::DecodeIgnore;

-use super::*;
+use super::{OneTypoTerm, Phrase, QueryTerm, ZeroTypoTerm};
 use crate::search::fst_utils::{Complement, Intersection, StartsWith, Union};
-use crate::search::new::query_term::TwoTypoTerm;
+use crate::search::new::interner::{DedupInterner, Interned};
+use crate::search::new::query_term::{Lazy, TwoTypoTerm};
 use crate::search::new::{limits, SearchContext};
 use crate::search::{build_dfa, get_first};
 use crate::{Result, MAX_WORD_LENGTH};
--- a/milli/src/search/new/query_term/mod.rs
+++ b/milli/src/search/new/query_term/mod.rs
@@ -7,7 +7,6 @@ use std::collections::BTreeSet;
 use std::iter::FromIterator;
 use std::ops::RangeInclusive;

-use compute_derivations::partially_initialized_term_from_word;
 use either::Either;
 pub use ntypo_subset::NTypoTermSubset;
 pub use parse_query::{located_query_terms_from_tokens, make_ngram, number_of_typos_allowed};
--- a/milli/src/search/new/query_term/parse_query.rs
+++ b/milli/src/search/new/query_term/parse_query.rs
@@ -1,11 +1,15 @@
+use std::collections::BTreeSet;
+
 use charabia::normalizer::NormalizedTokenIter;
 use charabia::{SeparatorKind, TokenKind};

-use super::*;
+use super::compute_derivations::partially_initialized_term_from_word;
+use super::{LocatedQueryTerm, ZeroTypoTerm};
+use crate::search::new::query_term::{Lazy, Phrase, QueryTerm};
 use crate::{Result, SearchContext, MAX_WORD_LENGTH};

 /// Convert the tokenised search query into a list of located query terms.
-#[logging_timer::time]
+#[tracing::instrument(level = "trace", skip_all, target = "search::query")]
 pub fn located_query_terms_from_tokens(
    ctx: &mut SearchContext,
    query: NormalizedTokenIter,
@@ -225,7 +229,7 @@ pub fn make_ngram(
 }

 struct PhraseBuilder {
-    words: Vec<Option<Interned<String>>>,
+    words: Vec<Option<crate::search::new::Interned<String>>>,
    start: u16,
    end: u16,
 }
--- a/milli/src/update/facet/incremental.rs
+++ b/milli/src/update/facet/incremental.rs
@@ -18,15 +18,39 @@ use crate::update::index_documents::valid_lmdb_key;
 use crate::update::MergeFn;
 use crate::{CboRoaringBitmapCodec, Index, Result};

-enum InsertionResult {
+/// Enum used as a return value for the facet incremental indexing.
+///
+/// - `ModificationResult::InPlace` means that modifying the `facet_value` into the `level` did not have
+/// an effect on the number of keys in that level. Therefore, it did not increase the number of children
+/// of the parent node.
+///
+/// - `ModificationResult::Insert` means that modifying the `facet_value` into the `level` resulted
+/// in the addition of a new key in that level, and that therefore the number of children
+/// of the parent node should be incremented.
+///
+/// - `ModificationResult::Remove` means that modifying the `facet_value` into the `level` resulted in a change in the
+/// number of keys in the level. For example, removing a document id from the facet value `3` could
+/// cause it to have no corresponding document in level 0 anymore, and therefore the key was deleted
+/// entirely. In that case, `ModificationResult::Remove` is returned. The parent of the deleted key must
+/// then adjust its group size. If its group size falls to 0, then it will need to be deleted as well.
+///
+/// - `ModificationResult::Reduce/Expand` means that modifying the `facet_value` into the `level` resulted in a change in the
+/// bounds of the keys of the level. For example, removing a document id from the facet value
+/// `3` might have caused the facet value `3` to have no corresponding document in level 0. Therefore,
+/// in level 1, the key with the left bound `3` had to be changed to the next facet value (e.g. 4).
+/// In that case `ModificationResult::Reduce` is returned. The parent of the reduced key may need to adjust
+/// its left bound as well.
+///
+/// - `ModificationResult::Nothing` means that modifying the `facet_value` didn't have any impact into the `level`.
+/// This case is reachable when a document id is removed from a sub-level node but is still present in another one.
+/// For example, removing `2` from a document containing `2` and `3`, the document id will removed form the `level 0` but should remain in the group node [1..4] in `level 1`.
+enum ModificationResult {
    InPlace,
    Expand,
    Insert,
-}
-enum DeletionResult {
-    InPlace,
    Reduce { next: Option<Vec<u8>> },
    Remove { next: Option<Vec<u8>> },
+    Nothing,
 }

 /// Algorithm to incrementally insert and delete elememts into the
@@ -65,8 +89,9 @@ impl FacetsUpdateIncremental {

    #[tracing::instrument(level = "trace", skip_all, target = "indexing::facets::incremental")]
    pub fn execute(self, wtxn: &mut RwTxn) -> crate::Result<()> {
+        let mut current_field_id = None;
+        let mut facet_level_may_be_updated = false;
        let mut iter = self.delta_data.into_stream_merger_iter()?;
-
        while let Some((key, value)) = iter.next()? {
            if !valid_lmdb_key(key) {
                continue;
@@ -74,25 +99,47 @@ impl FacetsUpdateIncremental {

            let key = FacetGroupKeyCodec::<BytesRefCodec>::bytes_decode(key)
                .map_err(heed::Error::Encoding)?;
+
+            if facet_level_may_be_updated
+                && current_field_id.map_or(false, |fid| fid != key.field_id)
+            {
+                // Only add or remove a level after making all the field modifications.
+                self.inner.add_or_delete_level(wtxn, current_field_id.unwrap())?;
+                facet_level_may_be_updated = false;
+            }
+            current_field_id = Some(key.field_id);
+
            let value = KvReader::new(value);
            let docids_to_delete = value
                .get(DelAdd::Deletion)
                .map(CboRoaringBitmapCodec::bytes_decode)
-                .map(|o| o.map_err(heed::Error::Encoding));
+                .map(|o| o.map_err(heed::Error::Encoding))
+                .transpose()?;

            let docids_to_add = value
                .get(DelAdd::Addition)
                .map(CboRoaringBitmapCodec::bytes_decode)
-                .map(|o| o.map_err(heed::Error::Encoding));
+                .map(|o| o.map_err(heed::Error::Encoding))
+                .transpose()?;

-            if let Some(docids_to_delete) = docids_to_delete {
-                let docids_to_delete = docids_to_delete?;
-                self.inner.delete(wtxn, key.field_id, key.left_bound, &docids_to_delete)?;
+            let level_size_changed = self.inner.modify(
+                wtxn,
+                key.field_id,
+                key.left_bound,
+                docids_to_add.as_ref(),
+                docids_to_delete.as_ref(),
+            )?;
+
+            if level_size_changed {
+                // if a node has been added or removed from the highest level,
+                // we may have to update the facet level.
+                facet_level_may_be_updated = true;
            }
+        }

-            if let Some(docids_to_add) = docids_to_add {
-                let docids_to_add = docids_to_add?;
-                self.inner.insert(wtxn, key.field_id, key.left_bound, &docids_to_add)?;
+        if let Some(field_id) = current_field_id {
+            if facet_level_may_be_updated {
+                self.inner.add_or_delete_level(wtxn, field_id)?;
            }
        }

@@ -166,138 +213,78 @@ impl FacetsUpdateIncrementalInner {
    ///
    /// ## Return
    /// See documentation of `insert_in_level`
-    fn insert_in_level_0(
+    fn modify_in_level_0(
        &self,
        txn: &mut RwTxn,
        field_id: u16,
        facet_value: &[u8],
-        docids: &RoaringBitmap,
-    ) -> Result<InsertionResult> {
+        add_docids: Option<&RoaringBitmap>,
+        del_docids: Option<&RoaringBitmap>,
+    ) -> Result<ModificationResult> {
        let key = FacetGroupKey { field_id, level: 0, left_bound: facet_value };
-        let value = FacetGroupValue { bitmap: docids.clone(), size: 1 };

-        let mut level0_prefix = vec![];
-        level0_prefix.extend_from_slice(&field_id.to_be_bytes());
-        level0_prefix.push(0);
-
-        let mut iter =
-            self.db.remap_types::<Bytes, DecodeIgnore>().prefix_iter(txn, &level0_prefix)?;
-
-        if iter.next().is_none() {
-            drop(iter);
-            self.db.put(txn, &key, &value)?;
-            Ok(InsertionResult::Insert)
-        } else {
-            drop(iter);
-            let old_value = self.db.get(txn, &key)?;
-            match old_value {
-                Some(mut updated_value) => {
-                    // now merge the two
-                    updated_value.bitmap |= value.bitmap;
-                    self.db.put(txn, &key, &updated_value)?;
-                    Ok(InsertionResult::InPlace)
-                }
-                None => {
+        let old_value = self.db.get(txn, &key)?;
+        match (old_value, add_docids, del_docids) {
+            // Addition + deletion on an existing value
+            (Some(FacetGroupValue { bitmap, .. }), Some(add_docids), Some(del_docids)) => {
+                let value = FacetGroupValue { bitmap: (bitmap - del_docids) | add_docids, size: 1 };
+                self.db.put(txn, &key, &value)?;
+                Ok(ModificationResult::InPlace)
+            }
+            // Addition on an existing value
+            (Some(FacetGroupValue { bitmap, .. }), Some(add_docids), None) => {
+                let value = FacetGroupValue { bitmap: bitmap | add_docids, size: 1 };
+                self.db.put(txn, &key, &value)?;
+                Ok(ModificationResult::InPlace)
+            }
+            // Addition of a new value (ignore deletion)
+            (None, Some(add_docids), _) => {
+                let value = FacetGroupValue { bitmap: add_docids.clone(), size: 1 };
+                self.db.put(txn, &key, &value)?;
+                Ok(ModificationResult::Insert)
+            }
+            // Deletion on an existing value, fully delete the key if the resulted value is empty.
+            (Some(FacetGroupValue { mut bitmap, .. }), None, Some(del_docids)) => {
+                bitmap -= del_docids;
+                if bitmap.is_empty() {
+                    // Full deletion
+                    let mut next_key = None;
+                    if let Some((next, _)) =
+                        self.db.remap_data_type::<DecodeIgnore>().get_greater_than(txn, &key)?
+                    {
+                        if next.field_id == field_id && next.level == 0 {
+                            next_key = Some(next.left_bound.to_vec());
+                        }
+                    }
+                    self.db.delete(txn, &key)?;
+                    Ok(ModificationResult::Remove { next: next_key })
+                } else {
+                    // Partial deletion
+                    let value = FacetGroupValue { bitmap, size: 1 };
                    self.db.put(txn, &key, &value)?;
-                    Ok(InsertionResult::Insert)
+                    Ok(ModificationResult::InPlace)
                }
            }
+            // Otherwise do nothing (None + no addition + deletion == Some + no addition + no deletion == Nothing),
+            // may be unreachable at some point.
+            (None, None, _) | (Some(_), None, None) => Ok(ModificationResult::Nothing),
        }
    }

-    /// Insert the given facet value  and corresponding document ids in all the levels of the database up to the given `level`.
-    /// This function works recursively.
+    /// Split a level node into two balanced nodes.
    ///
-    /// ## Return
-    /// Returns the effect of adding the facet value to the database on the given `level`.
-    ///
-    /// - `InsertionResult::InPlace` means that inserting the `facet_value` into the `level` did not have
-    /// an effect on the number of keys in that level. Therefore, it did not increase the number of children
-    /// of the parent node.
-    ///
-    /// - `InsertionResult::Insert` means that inserting the `facet_value` into the `level` resulted
-    /// in the addition of a new key in that level, and that therefore the number of children
-    /// of the parent node should be incremented.
-    fn insert_in_level(
+    /// # Return
+    /// Returns `ModificationResult::Insert` if the split is successful.
+    fn split_group(
        &self,
        txn: &mut RwTxn,
        field_id: u16,
        level: u8,
-        facet_value: &[u8],
-        docids: &RoaringBitmap,
-    ) -> Result<InsertionResult> {
-        if level == 0 {
-            return self.insert_in_level_0(txn, field_id, facet_value, docids);
-        }
-
-        let max_group_size = self.max_group_size;
-
-        let result = self.insert_in_level(txn, field_id, level - 1, facet_value, docids)?;
-        // level below inserted an element
-
-        let (insertion_key, insertion_value) =
-            self.find_insertion_key_value(field_id, level, facet_value, txn)?;
-
-        match result {
-            // because we know that we inserted in place, the facet_value is not a new one
-            // thus it doesn't extend a group, and thus the insertion key computed above is
-            // still correct
-            InsertionResult::InPlace => {
-                let mut updated_value = insertion_value;
-                updated_value.bitmap |= docids;
-                self.db.put(txn, &insertion_key.as_ref(), &updated_value)?;
-
-                return Ok(InsertionResult::InPlace);
-            }
-            InsertionResult::Expand => {}
-            InsertionResult::Insert => {}
-        }
-
-        // Here we know that inserting the facet value in the level below resulted in the creation
-        // of a new key. Therefore, it may be the case that we need to modify the left bound of the
-        // insertion key (see documentation of `find_insertion_key_value` for an example of when that
-        // could happen).
-        let (insertion_key, insertion_key_was_modified) = {
-            let mut new_insertion_key = insertion_key.clone();
-            let mut key_should_be_modified = false;
-
-            if facet_value < insertion_key.left_bound.as_slice() {
-                new_insertion_key.left_bound = facet_value.to_vec();
-                key_should_be_modified = true;
-            }
-            if key_should_be_modified {
-                let is_deleted = self.db.delete(txn, &insertion_key.as_ref())?;
-                assert!(is_deleted);
-                self.db.put(txn, &new_insertion_key.as_ref(), &insertion_value)?;
-            }
-            (new_insertion_key, key_should_be_modified)
-        };
-        // Now we know that the insertion key contains the `facet_value`.
-
-        // We still need to update the insertion value by:
-        // 1. Incrementing the number of children (since the recursive call returned `InsertionResult::Insert`)
-        // 2. Merge the previous docids with the new one
-        let mut updated_value = insertion_value;
-
-        if matches!(result, InsertionResult::Insert) {
-            updated_value.size += 1;
-        }
-
-        if updated_value.size < max_group_size {
-            updated_value.bitmap |= docids;
-            self.db.put(txn, &insertion_key.as_ref(), &updated_value)?;
-            if insertion_key_was_modified {
-                return Ok(InsertionResult::Expand);
-            } else {
-                return Ok(InsertionResult::InPlace);
-            }
-        }
-
-        // We've increased the group size of the value and realised it has become greater than or equal to `max_group_size`
-        // Therefore it must be split into two nodes.
-
-        let size_left = updated_value.size / 2;
-        let size_right = updated_value.size - size_left;
+        insertion_key: FacetGroupKey<Vec<u8>>,
+        insertion_value: FacetGroupValue,
+    ) -> Result<ModificationResult> {
+        let size_left = insertion_value.size / 2;
+        let size_right = insertion_value.size - size_left;

        let level_below = level - 1;

@@ -351,34 +338,228 @@ impl FacetsUpdateIncrementalInner {
        self.db.put(txn, &group_left.0.as_ref(), &group_left.1)?;
        self.db.put(txn, &group_right.0.as_ref(), &group_right.1)?;

-        Ok(InsertionResult::Insert)
+        Ok(ModificationResult::Insert)
    }

-    /// Insert the given facet value and corresponding document ids in the database.
-    pub fn insert(
+    /// Remove the docids still present in the related sub-level nodes from the del_docids.
+    ///
+    /// This process is needed to avoid removing docids from a group node where the docid is present in several sub-nodes.
+    fn trim_del_docids<'a>(
+        &self,
+        txn: &mut RwTxn,
+        field_id: u16,
+        level: u8,
+        insertion_key: &FacetGroupKey<Vec<u8>>,
+        insertion_value_size: usize,
+        del_docids: &'a RoaringBitmap,
+    ) -> Result<std::borrow::Cow<'a, RoaringBitmap>> {
+        let level_below = level - 1;
+
+        let start_key = FacetGroupKey {
+            field_id,
+            level: level_below,
+            left_bound: insertion_key.left_bound.as_slice(),
+        };
+
+        let mut del_docids = std::borrow::Cow::Borrowed(del_docids);
+        let iter = self.db.range(txn, &(start_key..))?.take(insertion_value_size);
+        for next in iter {
+            let (_, value) = next?;
+            // if a sublevel bitmap as common docids with del_docids,
+            // then these docids shouldn't be removed and so, remove them from the deletion list.
+            if !value.bitmap.is_disjoint(&del_docids) {
+                *del_docids.to_mut() -= value.bitmap;
+            }
+        }
+
+        Ok(del_docids)
+    }
+
+    /// Modify the given facet value and corresponding document ids in all the levels of the database up to the given `level`.
+    /// This function works recursively.
+    ///
+    /// ## Return
+    /// Returns the effect of modifying the facet value to the database on the given `level`.
+    ///
+    fn modify_in_level(
+        &self,
+        txn: &mut RwTxn,
+        field_id: u16,
+        level: u8,
+        facet_value: &[u8],
+        add_docids: Option<&RoaringBitmap>,
+        del_docids: Option<&RoaringBitmap>,
+    ) -> Result<ModificationResult> {
+        if level == 0 {
+            return self.modify_in_level_0(txn, field_id, facet_value, add_docids, del_docids);
+        }
+
+        let result =
+            self.modify_in_level(txn, field_id, level - 1, facet_value, add_docids, del_docids)?;
+        // level below inserted an element
+
+        if let ModificationResult::Nothing = result {
+            // if the previous level has not been modified,
+            // early return ModificationResult::Nothing.
+            return Ok(ModificationResult::Nothing);
+        }
+
+        let (insertion_key, insertion_value) =
+            self.find_insertion_key_value(field_id, level, facet_value, txn)?;
+        let insertion_value_size = insertion_value.size as usize;
+
+        let mut insertion_value_was_modified = false;
+        let mut updated_value = insertion_value;
+
+        if let ModificationResult::Insert = result {
+            // if a key has been inserted in the sub-level raise the value size.
+            updated_value.size += 1;
+            insertion_value_was_modified = true;
+        } else if let ModificationResult::Remove { .. } = result {
+            if updated_value.size <= 1 {
+                // if the only remaining node is the one to delete,
+                // delete the key instead and early return.
+                let is_deleted = self.db.delete(txn, &insertion_key.as_ref())?;
+                assert!(is_deleted);
+                return Ok(result);
+            } else {
+                // Reduce the value size
+                updated_value.size -= 1;
+                insertion_value_was_modified = true;
+            }
+        }
+
+        let (insertion_key, insertion_key_modification) =
+            if let ModificationResult::InPlace = result {
+                (insertion_key, ModificationResult::InPlace)
+            } else {
+                // Inserting or deleting the facet value in the level below resulted in the creation
+                // of a new key. Therefore, it may be the case that we need to modify the left bound of the
+                // insertion key (see documentation of `find_insertion_key_value` for an example of when that
+                // could happen).
+                let mut new_insertion_key = insertion_key.clone();
+                let mut key_modification = ModificationResult::InPlace;
+
+                if let ModificationResult::Remove { next } | ModificationResult::Reduce { next } =
+                    result
+                {
+                    // if the deleted facet_value is the left_bound of the current node,
+                    // the left_bound should be updated reducing the current node.
+                    let reduced_range = facet_value == insertion_key.left_bound;
+                    if reduced_range {
+                        new_insertion_key.left_bound = next.clone().unwrap();
+                        key_modification = ModificationResult::Reduce { next };
+                    }
+                } else if facet_value < insertion_key.left_bound.as_slice() {
+                    // if the added facet_value is the under the left_bound of the current node,
+                    // the left_bound should be updated expanding the current node.
+                    new_insertion_key.left_bound = facet_value.to_vec();
+                    key_modification = ModificationResult::Expand;
+                }
+
+                if matches!(
+                    key_modification,
+                    ModificationResult::Expand | ModificationResult::Reduce { .. }
+                ) {
+                    // if the node should be updated, delete it, it will be recreated using a new key later.
+                    let is_deleted = self.db.delete(txn, &insertion_key.as_ref())?;
+                    assert!(is_deleted);
+                }
+                (new_insertion_key, key_modification)
+            };
+
+        if updated_value.size < self.max_group_size {
+            // If there are docids to delete, trim them avoiding unexpected removal.
+            if let Some(del_docids) = del_docids
+                .map(|ids| {
+                    self.trim_del_docids(
+                        txn,
+                        field_id,
+                        level,
+                        &insertion_key,
+                        insertion_value_size,
+                        ids,
+                    )
+                })
+                .transpose()?
+                .filter(|ids| !ids.is_empty())
+            {
+                updated_value.bitmap -= &*del_docids;
+                insertion_value_was_modified = true;
+            }
+
+            if let Some(add_docids) = add_docids {
+                updated_value.bitmap |= add_docids;
+                insertion_value_was_modified = true;
+            }
+
+            if insertion_value_was_modified
+                || matches!(
+                    insertion_key_modification,
+                    ModificationResult::Expand | ModificationResult::Reduce { .. }
+                )
+            {
+                // if any modification occured, insert it in the database.
+                self.db.put(txn, &insertion_key.as_ref(), &updated_value)?;
+                Ok(insertion_key_modification)
+            } else {
+                // this case is reachable when a docid is removed from a sub-level node but is still present in another one.
+                // For instance, a document containing 2 and 3, if 2 is removed, the docid should remain in the group node [1..4].
+                Ok(ModificationResult::Nothing)
+            }
+        } else {
+            // We've increased the group size of the value and realised it has become greater than or equal to `max_group_size`
+            // Therefore it must be split into two nodes.
+            self.split_group(txn, field_id, level, insertion_key, updated_value)
+        }
+    }
+
+    /// Modify the given facet value and corresponding document ids in the database.
+    /// If no more document ids correspond to the facet value, delete it completely.
+    ///
+    /// ## Return
+    /// Returns `true` if some tree-nodes of the highest level have been removed or added implying a potential
+    /// addition or deletion of a facet level.
+    /// Otherwise returns `false` if the tree-nodes have been modified in place.
+    pub fn modify(
        &self,
        txn: &mut RwTxn,
        field_id: u16,
        facet_value: &[u8],
-        docids: &RoaringBitmap,
-    ) -> Result<()> {
-        if docids.is_empty() {
-            return Ok(());
+        add_docids: Option<&RoaringBitmap>,
+        del_docids: Option<&RoaringBitmap>,
+    ) -> Result<bool> {
+        if add_docids.map_or(true, RoaringBitmap::is_empty)
+            && del_docids.map_or(true, RoaringBitmap::is_empty)
+        {
+            return Ok(false);
        }
-        let group_size = self.group_size;

        let highest_level = get_highest_level(txn, self.db, field_id)?;

-        let result = self.insert_in_level(txn, field_id, highest_level, facet_value, docids)?;
+        let result = self.modify_in_level(
+            txn,
+            field_id,
+            highest_level,
+            facet_value,
+            add_docids,
+            del_docids,
+        )?;
        match result {
-            InsertionResult::InPlace => return Ok(()),
-            InsertionResult::Expand => return Ok(()),
-            InsertionResult::Insert => {}
+            ModificationResult::InPlace
+            | ModificationResult::Expand
+            | ModificationResult::Nothing
+            | ModificationResult::Reduce { .. } => Ok(false),
+            ModificationResult::Insert | ModificationResult::Remove { .. } => Ok(true),
        }
+    }

-        // Here we check whether the highest level has exceeded `min_level_size` * `self.group_size`.
-        // If it has, we must build an addition level above it.
-
+    /// Check whether the highest level has exceeded `min_level_size` * `self.group_size`.
+    /// If it has, we must build an addition level above it.
+    /// Then check whether the highest level is under `min_level_size`.
+    /// If it has, we must remove the complete level.
+    pub(crate) fn add_or_delete_level(&self, txn: &mut RwTxn, field_id: u16) -> Result<()> {
+        let highest_level = get_highest_level(txn, self.db, field_id)?;
        let mut highest_level_prefix = vec![];
        highest_level_prefix.extend_from_slice(&field_id.to_be_bytes());
        highest_level_prefix.push(highest_level);
@@ -386,14 +567,48 @@ impl FacetsUpdateIncrementalInner {
        let size_highest_level =
            self.db.remap_types::<Bytes, Bytes>().prefix_iter(txn, &highest_level_prefix)?.count();

-        if size_highest_level < self.group_size as usize * self.min_level_size as usize {
-            return Ok(());
+        if size_highest_level >= self.group_size as usize * self.min_level_size as usize {
+            self.add_level(txn, field_id, highest_level, &highest_level_prefix, size_highest_level)
+        } else if size_highest_level < self.min_level_size as usize && highest_level != 0 {
+            self.delete_level(txn, &highest_level_prefix)
+        } else {
+            Ok(())
        }
+    }

+    /// Delete a level.
+    fn delete_level(&self, txn: &mut RwTxn, highest_level_prefix: &[u8]) -> Result<()> {
+        let mut to_delete = vec![];
+        let mut iter =
+            self.db.remap_types::<Bytes, Bytes>().prefix_iter(txn, highest_level_prefix)?;
+        for el in iter.by_ref() {
+            let (k, _) = el?;
+            to_delete.push(
+                FacetGroupKeyCodec::<BytesRefCodec>::bytes_decode(k)
+                    .map_err(Error::Encoding)?
+                    .into_owned(),
+            );
+        }
+        drop(iter);
+        for k in to_delete {
+            self.db.delete(txn, &k.as_ref())?;
+        }
+        Ok(())
+    }
+
+    /// Build an additional level for the field id.
+    fn add_level(
+        &self,
+        txn: &mut RwTxn,
+        field_id: u16,
+        highest_level: u8,
+        highest_level_prefix: &[u8],
+        size_highest_level: usize,
+    ) -> Result<()> {
        let mut groups_iter = self
            .db
            .remap_types::<Bytes, FacetGroupValueCodec>()
-            .prefix_iter(txn, &highest_level_prefix)?;
+            .prefix_iter(txn, highest_level_prefix)?;

        let nbr_new_groups = size_highest_level / self.group_size as usize;
        let nbr_leftover_elements = size_highest_level % self.group_size as usize;
@@ -402,7 +617,7 @@ impl FacetsUpdateIncrementalInner {
        for _ in 0..nbr_new_groups {
            let mut first_key = None;
            let mut values = RoaringBitmap::new();
-            for _ in 0..group_size {
+            for _ in 0..self.group_size {
                let (key_bytes, value_i) = groups_iter.next().unwrap()?;
                let key_i = FacetGroupKeyCodec::<BytesRefCodec>::bytes_decode(key_bytes)
                    .map_err(Error::Encoding)?;
@@ -417,7 +632,7 @@ impl FacetsUpdateIncrementalInner {
                level: highest_level + 1,
                left_bound: first_key.unwrap().left_bound,
            };
-            let value = FacetGroupValue { size: group_size, bitmap: values };
+            let value = FacetGroupValue { size: self.group_size, bitmap: values };
            to_add.push((key.into_owned(), value));
        }
        // now we add the rest of the level, in case its size is > group_size * min_level_size
@@ -452,173 +667,6 @@ impl FacetsUpdateIncrementalInner {
        }
        Ok(())
    }
-
-    /// Delete the given document id from the given facet value in the database, from level 0 to the
-    /// the given level.
-    ///
-    /// ## Return
-    /// Returns the effect of removing the document id from the database on the given `level`.
-    ///
-    /// - `DeletionResult::InPlace` means that deleting the document id did not have
-    /// an effect on the keys in that level.
-    ///
-    /// - `DeletionResult::Reduce` means that deleting the document id resulted in a change in the
-    /// number of keys in the level. For example, removing a document id from the facet value `3` could
-    /// cause it to have no corresponding document in level 0 anymore, and therefore the key was deleted
-    /// entirely. In that case, `DeletionResult::Remove` is returned. The parent of the deleted key must
-    /// then adjust its group size. If its group size falls to 0, then it will need to be deleted as well.
-    ///
-    /// - `DeletionResult::Reduce` means that deleting the document id resulted in a change in the
-    /// bounds of the keys of the level. For example, removing a document id from the facet value
-    /// `3` might have caused the facet value `3` to have no corresponding document in level 0. Therefore,
-    /// in level 1, the key with the left bound `3` had to be changed to the next facet value (e.g. 4).
-    /// In that case `DeletionResult::Reduce` is returned. The parent of the reduced key may need to adjust
-    /// its left bound as well.
-    fn delete_in_level(
-        &self,
-        txn: &mut RwTxn,
-        field_id: u16,
-        level: u8,
-        facet_value: &[u8],
-        docids: &RoaringBitmap,
-    ) -> Result<DeletionResult> {
-        if level == 0 {
-            return self.delete_in_level_0(txn, field_id, facet_value, docids);
-        }
-        let (deletion_key, mut bitmap) =
-            self.find_insertion_key_value(field_id, level, facet_value, txn)?;
-
-        let result = self.delete_in_level(txn, field_id, level - 1, facet_value, docids)?;
-
-        let mut decrease_size = false;
-        let next_key = match result {
-            DeletionResult::InPlace => {
-                bitmap.bitmap -= docids;
-                self.db.put(txn, &deletion_key.as_ref(), &bitmap)?;
-                return Ok(DeletionResult::InPlace);
-            }
-            DeletionResult::Reduce { next } => next,
-            DeletionResult::Remove { next } => {
-                decrease_size = true;
-                next
-            }
-        };
-        // If either DeletionResult::Reduce or DeletionResult::Remove was returned,
-        // then we may need to adjust the left_bound of the deletion key.
-
-        // If DeletionResult::Remove was returned, then we need to decrease the group
-        // size of the deletion key.
-        let mut updated_value = bitmap;
-        if decrease_size {
-            updated_value.size -= 1;
-        }
-
-        if updated_value.size == 0 {
-            self.db.delete(txn, &deletion_key.as_ref())?;
-            Ok(DeletionResult::Remove { next: next_key })
-        } else {
-            let mut updated_deletion_key = deletion_key.clone();
-            let reduced_range = facet_value == deletion_key.left_bound;
-            if reduced_range {
-                updated_deletion_key.left_bound = next_key.clone().unwrap();
-            }
-            updated_value.bitmap -= docids;
-            let _ = self.db.delete(txn, &deletion_key.as_ref())?;
-            self.db.put(txn, &updated_deletion_key.as_ref(), &updated_value)?;
-            if reduced_range {
-                Ok(DeletionResult::Reduce { next: next_key })
-            } else {
-                Ok(DeletionResult::InPlace)
-            }
-        }
-    }
-
-    fn delete_in_level_0(
-        &self,
-        txn: &mut RwTxn,
-        field_id: u16,
-        facet_value: &[u8],
-        docids: &RoaringBitmap,
-    ) -> Result<DeletionResult> {
-        let key = FacetGroupKey { field_id, level: 0, left_bound: facet_value };
-        let mut bitmap = self.db.get(txn, &key)?.unwrap().bitmap;
-        bitmap -= docids;
-
-        if bitmap.is_empty() {
-            let mut next_key = None;
-            if let Some((next, _)) =
-                self.db.remap_data_type::<DecodeIgnore>().get_greater_than(txn, &key)?
-            {
-                if next.field_id == field_id && next.level == 0 {
-                    next_key = Some(next.left_bound.to_vec());
-                }
-            }
-            self.db.delete(txn, &key)?;
-            Ok(DeletionResult::Remove { next: next_key })
-        } else {
-            self.db.put(txn, &key, &FacetGroupValue { size: 1, bitmap })?;
-            Ok(DeletionResult::InPlace)
-        }
-    }
-
-    pub fn delete(
-        &self,
-        txn: &mut RwTxn,
-        field_id: u16,
-        facet_value: &[u8],
-        docids: &RoaringBitmap,
-    ) -> Result<()> {
-        if self
-            .db
-            .remap_data_type::<DecodeIgnore>()
-            .get(txn, &FacetGroupKey { field_id, level: 0, left_bound: facet_value })?
-            .is_none()
-        {
-            return Ok(());
-        }
-        let highest_level = get_highest_level(txn, self.db, field_id)?;
-
-        let result = self.delete_in_level(txn, field_id, highest_level, facet_value, docids)?;
-        match result {
-            DeletionResult::InPlace => return Ok(()),
-            DeletionResult::Reduce { .. } => return Ok(()),
-            DeletionResult::Remove { .. } => {}
-        }
-
-        // if we either removed a key from the highest level, its size may have fallen
-        // below `min_level_size`, in which case we need to remove the entire level
-
-        let mut highest_level_prefix = vec![];
-        highest_level_prefix.extend_from_slice(&field_id.to_be_bytes());
-        highest_level_prefix.push(highest_level);
-
-        if highest_level == 0
-            || self
-                .db
-                .remap_types::<Bytes, Bytes>()
-                .prefix_iter(txn, &highest_level_prefix)?
-                .count()
-                >= self.min_level_size as usize
-        {
-            return Ok(());
-        }
-        let mut to_delete = vec![];
-        let mut iter =
-            self.db.remap_types::<Bytes, Bytes>().prefix_iter(txn, &highest_level_prefix)?;
-        for el in iter.by_ref() {
-            let (k, _) = el?;
-            to_delete.push(
-                FacetGroupKeyCodec::<BytesRefCodec>::bytes_decode(k)
-                    .map_err(Error::Encoding)?
-                    .into_owned(),
-            );
-        }
-        drop(iter);
-        for k in to_delete {
-            self.db.delete(txn, &k.as_ref())?;
-        }
-        Ok(())
-    }
 }

 impl<'a> FacetGroupKey<&'a [u8]> {
--- a/milli/src/update/facet/mod.rs
+++ b/milli/src/update/facet/mod.rs
@@ -149,7 +149,7 @@ impl<'i> FacetsUpdate<'i> {
        self.index.set_updated_at(wtxn, &OffsetDateTime::now_utc())?;

        // See self::comparison_bench::benchmark_facet_indexing
-        if self.data_size >= (self.database.len(wtxn)? / 50) {
+        if self.data_size >= (self.database.len(wtxn)? / 500) {
            let field_ids =
                self.index.faceted_fields_ids(wtxn)?.iter().copied().collect::<Vec<_>>();
            let bulk_update = FacetsUpdateBulk::new(
@@ -429,7 +429,8 @@ pub(crate) mod test_helpers {
                max_group_size: self.max_group_size.get(),
            };
            let key_bytes = BoundCodec::bytes_encode(key).unwrap();
-            update.insert(wtxn, field_id, &key_bytes, docids).unwrap();
+            update.modify(wtxn, field_id, &key_bytes, Some(docids), None).unwrap();
+            update.add_or_delete_level(wtxn, field_id).unwrap();
        }
        pub fn delete_single_docid<'a>(
            &self,
@@ -455,7 +456,8 @@ pub(crate) mod test_helpers {
                max_group_size: self.max_group_size.get(),
            };
            let key_bytes = BoundCodec::bytes_encode(key).unwrap();
-            update.delete(wtxn, field_id, &key_bytes, docids).unwrap();
+            update.modify(wtxn, field_id, &key_bytes, None, Some(docids)).unwrap();
+            update.add_or_delete_level(wtxn, field_id).unwrap();
        }

        pub fn bulk_insert<'a, 'b>(
--- a/milli/src/update/index_documents/extract/mod.rs
+++ b/milli/src/update/index_documents/extract/mod.rs
@@ -210,8 +210,7 @@ fn run_extraction_task<FE, FS, M>(
    let current_span = tracing::Span::current();

    rayon::spawn(move || {
-        let child_span =
-            tracing::trace_span!(target: "", parent: &current_span, "extract_multiple_chunks");
+        let child_span = tracing::trace_span!(target: "indexing::extract::details", parent: &current_span, "extract_multiple_chunks");
        let _entered = child_span.enter();
        puffin::profile_scope!("extract_multiple_chunks", name);
        match extract_fn(chunk, indexer) {
--- a/milli/src/update/index_documents/mod.rs
+++ b/milli/src/update/index_documents/mod.rs
@@ -284,7 +284,7 @@ where
    #[tracing::instrument(
        level = "trace",
        skip_all,
-        target = "profile::indexing::details",
+        target = "indexing::details",
        name = "index_documents_raw"
    )]
    pub fn execute_raw(self, output: TransformOutput) -> Result<u64>
--- a/milli/src/update/index_documents/typed_chunk.rs
+++ b/milli/src/update/index_documents/typed_chunk.rs
@@ -473,7 +473,7 @@ pub(crate) fn write_typed_chunk_into_index(
            is_merged_database = true;
        }
        TypedChunk::FieldIdFacetIsEmptyDocids(_) => {
-            let span = tracing::trace_span!(target: "profile::indexing::write_db", "field_id_facet_is_empty_docids");
+            let span = tracing::trace_span!(target: "indexing::write_db", "field_id_facet_is_empty_docids");
            let _entered = span.enter();

            let mut builder = MergerBuilder::new(merge_deladd_cbo_roaring_bitmaps as MergeFn);
--- a/milli/src/update/settings.rs
+++ b/milli/src/update/settings.rs
@@ -1032,6 +1032,13 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
    {
        self.index.set_updated_at(self.wtxn, &OffsetDateTime::now_utc())?;

+        let existing_fields: HashSet<_> = self
+            .index
+            .field_distribution(self.wtxn)?
+            .into_iter()
+            .filter_map(|(field, count)| (count != 0).then_some(field))
+            .collect();
+
        let old_faceted_fields = self.index.user_defined_faceted_fields(self.wtxn)?;
        let old_fields_ids_map = self.index.fields_ids_map(self.wtxn)?;

@@ -1052,7 +1059,8 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
        // index new fields as facets. It means that the distinct attribute,
        // an Asc/Desc criterion or a filtered attribute as be added or removed.
        let new_faceted_fields = self.index.user_defined_faceted_fields(self.wtxn)?;
-        let faceted_updated = old_faceted_fields != new_faceted_fields;
+        let faceted_updated =
+            (&existing_fields - &old_faceted_fields) != (&existing_fields - &new_faceted_fields);

        let stop_words_updated = self.update_stop_words()?;
        let non_separator_tokens_updated = self.update_non_separator_tokens()?;
--- a/milli/src/vector/error.rs
+++ b/milli/src/vector/error.rs
@@ -59,8 +59,8 @@ pub enum EmbedErrorKind {
    OpenAiAuth(OpenAiError),
    #[error("sent too many requests to OpenAI: {0}")]
    OpenAiTooManyRequests(OpenAiError),
-    #[error("received internal error from OpenAI: {0}")]
-    OpenAiInternalServerError(OpenAiError),
+    #[error("received internal error from OpenAI: {0:?}")]
+    OpenAiInternalServerError(Option<OpenAiError>),
    #[error("sent too many tokens in a request to OpenAI: {0}")]
    OpenAiTooManyTokens(OpenAiError),
    #[error("received unhandled HTTP status code {0} from OpenAI")]
@@ -106,7 +106,7 @@ impl EmbedError {
        Self { kind: EmbedErrorKind::OpenAiTooManyRequests(inner), fault: FaultSource::Runtime }
    }

-    pub(crate) fn openai_internal_server_error(inner: OpenAiError) -> EmbedError {
+    pub(crate) fn openai_internal_server_error(inner: Option<OpenAiError>) -> EmbedError {
        Self { kind: EmbedErrorKind::OpenAiInternalServerError(inner), fault: FaultSource::Runtime }
    }

--- a/milli/src/vector/mod.rs
+++ b/milli/src/vector/mod.rs
@@ -261,3 +261,7 @@ impl DistributionShift {
        score
    }
 }
+
+pub const fn is_cuda_enabled() -> bool {
+    cfg!(feature = "cuda")
+}
--- a/milli/src/vector/openai.rs
+++ b/milli/src/vector/openai.rs
@@ -178,6 +178,8 @@ impl Embedder {
                    retry.into_duration(attempt)
                }
            }?;
+
+            let retry_duration = retry_duration.min(std::time::Duration::from_secs(60)); // don't wait more than a minute
            tracing::warn!(
                "Attempt #{}, retrying after {}ms.",
                attempt,
@@ -220,24 +222,12 @@ impl Embedder {
                        error_response.error,
                    )));
                }
-                StatusCode::INTERNAL_SERVER_ERROR => {
-                    let error_response: OpenAiErrorResponse = response
-                        .json()
-                        .await
-                        .map_err(EmbedError::openai_unexpected)
-                        .map_err(Retry::retry_later)?;
+                StatusCode::INTERNAL_SERVER_ERROR
+                | StatusCode::BAD_GATEWAY
+                | StatusCode::SERVICE_UNAVAILABLE => {
+                    let error_response: Result<OpenAiErrorResponse, _> = response.json().await;
                    return Err(Retry::retry_later(EmbedError::openai_internal_server_error(
-                        error_response.error,
-                    )));
-                }
-                StatusCode::SERVICE_UNAVAILABLE => {
-                    let error_response: OpenAiErrorResponse = response
-                        .json()
-                        .await
-                        .map_err(EmbedError::openai_unexpected)
-                        .map_err(Retry::retry_later)?;
-                    return Err(Retry::retry_later(EmbedError::openai_internal_server_error(
-                        error_response.error,
+                        error_response.ok().map(|error_response| error_response.error),
                    )));
                }
                StatusCode::BAD_REQUEST => {
@@ -248,14 +238,14 @@ impl Embedder {
                        .map_err(EmbedError::openai_unexpected)
                        .map_err(Retry::retry_later)?;

-                    tracing::warn!("OpenAI: input was too long, retrying on tokenized version. For best performance, limit the size of your prompt.");
+                    tracing::warn!("OpenAI: received `BAD_REQUEST`. Input was maybe too long, retrying on tokenized version. For best performance, limit the size of your prompt.");

                    return Err(Retry::retry_tokenized(EmbedError::openai_too_many_tokens(
                        error_response.error,
                    )));
                }
                code => {
-                    return Err(Retry::give_up(EmbedError::openai_unhandled_status_code(
+                    return Err(Retry::retry_later(EmbedError::openai_unhandled_status_code(
                        code.as_u16(),
                    )));
                }
--- a/tracing-trace/src/processor/span_stats.rs
+++ b/tracing-trace/src/processor/span_stats.rs
@@ -1,4 +1,5 @@
 use std::collections::{BTreeMap, HashMap};
+use std::ops::Range;
 use std::time::Duration;

 use serde::{Deserialize, Serialize};
@@ -16,6 +17,51 @@ enum SpanStatus {
 pub struct CallStats {
    pub call_count: usize,
    pub time: u64,
+    pub self_time: u64,
+}
+
+#[derive(Debug, Default)]
+pub struct SelfTime {
+    child_ranges: Vec<Range<Duration>>,
+}
+
+impl SelfTime {
+    pub fn new() -> Self {
+        Default::default()
+    }
+
+    pub fn add_child_range(&mut self, child_range: Range<Duration>) {
+        self.child_ranges.push(child_range)
+    }
+
+    pub fn self_duration(&mut self, self_range: Range<Duration>) -> Duration {
+        if self.child_ranges.is_empty() {
+            return self_range.end - self_range.start;
+        }
+
+        // by sorting child ranges by their start time,
+        // we make sure that no child will start before the last one we visited.
+        self.child_ranges
+            .sort_by(|left, right| left.start.cmp(&right.start).then(left.end.cmp(&right.end)));
+        // self duration computed by adding all the segments where the span is not executing a child
+        let mut self_duration = Duration::from_nanos(0);
+
+        // last point in time where we are certain that this span was not executing a child.
+        let mut committed_point = self_range.start;
+
+        for child_range in &self.child_ranges {
+            if child_range.start > committed_point {
+                // we add to the self duration the point between the end of the latest span and the beginning of the next span
+                self_duration += child_range.start - committed_point;
+            }
+            if committed_point < child_range.end {
+                // then we set ourselves to the end of the latest span
+                committed_point = child_range.end;
+            }
+        }
+
+        self_duration
+    }
 }

 pub fn to_call_stats<R: std::io::Read>(
@@ -23,6 +69,9 @@ pub fn to_call_stats<R: std::io::Read>(
 ) -> Result<BTreeMap<String, CallStats>, Error> {
    let mut calls = HashMap::new();
    let mut spans = HashMap::new();
+    let mut last_point = Duration::from_nanos(0);
+    let mut first_point = None;
+    let mut total_self_time = SelfTime::new();
    for entry in trace {
        let entry = entry?;
        match entry {
@@ -31,10 +80,11 @@ pub fn to_call_stats<R: std::io::Read>(
            }
            Entry::NewThread(_) => {}
            Entry::NewSpan(span) => {
-                spans.insert(span.id, (span, SpanStatus::Outside));
+                spans.insert(span.id, (span, SpanStatus::Outside, SelfTime::new()));
            }
            Entry::SpanEnter(SpanEnter { id, time, memory: _ }) => {
-                let (_, status) = spans.get_mut(&id).unwrap();
+                first_point.get_or_insert(time);
+                let (_, status, _) = spans.get_mut(&id).unwrap();

                let SpanStatus::Outside = status else {
                    continue;
@@ -43,18 +93,32 @@ pub fn to_call_stats<R: std::io::Read>(
                *status = SpanStatus::Inside(time);
            }
            Entry::SpanExit(SpanExit { id, time: end, memory: _ }) => {
-                let (span, status) = spans.get_mut(&id).unwrap();
+                let (span, status, self_time) = spans.get_mut(&id).unwrap();

                let SpanStatus::Inside(begin) = status else {
                    continue;
                };
                let begin = *begin;

+                if last_point < end {
+                    last_point = end;
+                }
+
                *status = SpanStatus::Outside;

+                let self_range = begin..end;
+
+                let self_duration = self_time.self_duration(self_range.clone());
+                *self_time = SelfTime::new();
+
                let span = *span;
+                if let Some(parent_id) = span.parent_id {
+                    let (_, _, parent_self_time) = spans.get_mut(&parent_id).unwrap();
+                    parent_self_time.add_child_range(self_range.clone())
+                }
+                total_self_time.add_child_range(self_range);
                let (_, call_list) = calls.get_mut(&span.call_id).unwrap();
-                call_list.push(end - begin);
+                call_list.push((end - begin, self_duration));
            }
            Entry::SpanClose(SpanClose { id, time: _ }) => {
                spans.remove(&id);
@@ -63,17 +127,31 @@ pub fn to_call_stats<R: std::io::Read>(
        }
    }

+    let total_self_time = first_point
+        .map(|first_point| (first_point, total_self_time.self_duration(first_point..last_point)));
+
    Ok(calls
        .into_iter()
        .map(|(_, (call_site, calls))| (site_to_string(call_site), calls_to_stats(calls)))
+        .chain(total_self_time.map(|(first_point, total_self_time)| {
+            (
+                "::meta::total".to_string(),
+                CallStats {
+                    call_count: 1,
+                    time: (last_point - first_point).as_nanos() as u64,
+                    self_time: total_self_time.as_nanos() as u64,
+                },
+            )
+        }))
        .collect())
 }

 fn site_to_string(call_site: NewCallsite) -> String {
    format!("{}::{}", call_site.target, call_site.name)
 }
-fn calls_to_stats(calls: Vec<Duration>) -> CallStats {
+fn calls_to_stats(calls: Vec<(Duration, Duration)>) -> CallStats {
    let nb = calls.len();
-    let sum: Duration = calls.iter().sum();
-    CallStats { call_count: nb, time: sum.as_nanos() as u64 }
+    let sum: Duration = calls.iter().map(|(total, _)| total).sum();
+    let self_sum: Duration = calls.iter().map(|(_, self_duration)| self_duration).sum();
+    CallStats { call_count: nb, time: sum.as_nanos() as u64, self_time: self_sum.as_nanos() as u64 }
 }
--- a/workloads/hackernews.json
+++ b/workloads/hackernews.json
@@ -0,0 +1,164 @@
+{
+  "name": "hackernews.ndjson_1M",
+  "run_count": 3,
+  "extra_cli_args": [],
+  "assets": {
+    "hackernews-100_000.ndjson": {
+      "local_location": null,
+      "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/hackernews-100_000.ndjson",
+      "sha256": "60ecd23485d560edbd90d9ca31f0e6dba1455422f2a44e402600fbb5f7f1b213"
+    },
+    "hackernews-200_000.ndjson": {
+      "local_location": null,
+      "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/hackernews-200_000.ndjson",
+      "sha256": "785b0271fdb47cba574fab617d5d332276b835c05dd86e4a95251cf7892a1685"
+    },
+    "hackernews-300_000.ndjson": {
+      "local_location": null,
+      "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/hackernews-300_000.ndjson",
+      "sha256": "de73c7154652eddfaf69cdc3b2f824d5c452f095f40a20a1c97bb1b5c4d80ab2"
+    },
+    "hackernews-400_000.ndjson": {
+      "local_location": null,
+      "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/hackernews-400_000.ndjson",
+      "sha256": "c1b00a24689110f366447e434c201c086d6f456d54ed1c4995894102794d8fe7"
+    },
+    "hackernews-500_000.ndjson": {
+      "local_location": null,
+      "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/hackernews-500_000.ndjson",
+      "sha256": "ae98f9dbef8193d750e3e2dbb6a91648941a1edca5f6e82c143e7996f4840083"
+    },
+    "hackernews-600_000.ndjson": {
+      "local_location": null,
+      "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/hackernews-600_000.ndjson",
+      "sha256": "b495fdc72c4a944801f786400f22076ab99186bee9699f67cbab2f21f5b74dbe"
+    },
+    "hackernews-700_000.ndjson": {
+      "local_location": null,
+      "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/hackernews-700_000.ndjson",
+      "sha256": "4b2c63974f3dabaa4954e3d4598b48324d03c522321ac05b0d583f36cb78a28b"
+    },
+    "hackernews-800_000.ndjson": {
+      "local_location": null,
+      "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/hackernews-800_000.ndjson",
+      "sha256": "cb7b6afe0e6caa1be111be256821bc63b0771b2a0e1fad95af7aaeeffd7ba546"
+    },
+    "hackernews-900_000.ndjson": {
+      "local_location": null,
+      "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/hackernews-900_000.ndjson",
+      "sha256": "e1154ddcd398f1c867758a93db5bcb21a07b9e55530c188a2917fdef332d3ba9"
+    },
+    "hackernews-1_000_000.ndjson": {
+      "local_location": null,
+      "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/hackernews/hackernews-1_000_000.ndjson",
+      "sha256": "27e25efd0b68b159b8b21350d9af76938710cb29ce0393fa71b41c4f3c630ffe"
+    }
+  },
+  "commands": [
+    {
+      "route": "indexes/movies/settings",
+      "method": "PATCH",
+      "body": {
+        "inline": {
+          "displayedAttributes": [
+            "title",
+            "by",
+            "score",
+            "time"
+          ],
+          "searchableAttributes": [
+            "title"
+          ],
+          "filterableAttributes": [
+            "by"
+          ],
+          "sortableAttributes": [
+            "score",
+            "time"
+          ]
+        }
+      },
+      "synchronous": "DontWait"
+    },
+    {
+      "route": "indexes/movies/documents",
+      "method": "POST",
+      "body": {
+        "asset": "hackernews-100_000.ndjson"
+      },
+      "synchronous": "WaitForTask"
+    },
+    {
+      "route": "indexes/movies/documents",
+      "method": "POST",
+      "body": {
+        "asset": "hackernews-200_000.ndjson"
+      },
+      "synchronous": "WaitForResponse"
+    },
+    {
+      "route": "indexes/movies/documents",
+      "method": "POST",
+      "body": {
+        "asset": "hackernews-300_000.ndjson"
+      },
+      "synchronous": "WaitForResponse"
+    },
+    {
+      "route": "indexes/movies/documents",
+      "method": "POST",
+      "body": {
+        "asset": "hackernews-400_000.ndjson"
+      },
+      "synchronous": "WaitForResponse"
+    },
+    {
+      "route": "indexes/movies/documents",
+      "method": "POST",
+      "body": {
+        "asset": "hackernews-500_000.ndjson"
+      },
+      "synchronous": "WaitForResponse"
+    },
+    {
+      "route": "indexes/movies/documents",
+      "method": "POST",
+      "body": {
+        "asset": "hackernews-600_000.ndjson"
+      },
+      "synchronous": "WaitForResponse"
+    },
+    {
+      "route": "indexes/movies/documents",
+      "method": "POST",
+      "body": {
+        "asset": "hackernews-700_000.ndjson"
+      },
+      "synchronous": "WaitForResponse"
+    },
+    {
+      "route": "indexes/movies/documents",
+      "method": "POST",
+      "body": {
+        "asset": "hackernews-800_000.ndjson"
+      },
+      "synchronous": "WaitForResponse"
+    },
+    {
+      "route": "indexes/movies/documents",
+      "method": "POST",
+      "body": {
+        "asset": "hackernews-900_000.ndjson"
+      },
+      "synchronous": "WaitForResponse"
+    },
+    {
+      "route": "indexes/movies/documents",
+      "method": "POST",
+      "body": {
+        "asset": "hackernews-1_000_000.ndjson"
+      },
+      "synchronous": "WaitForTask"
+    }
+  ]
+}
--- a/workloads/movies-nothreads.json
+++ b/workloads/movies-nothreads.json
@@ -0,0 +1,44 @@
+{
+  "name": "movies.json,no-threads",
+  "run_count": 2,
+  "extra_cli_args": [
+    "--max-indexing-threads=1"
+  ],
+  "assets": {
+    "movies.json": {
+      "local_location": null,
+      "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/movies.json",
+      "sha256": "5b6e4cb660bc20327776e8a33ea197b43d9ec84856710ead1cc87ab24df77de1"
+    }
+  },
+  "commands": [
+    {
+      "route": "indexes/movies/settings",
+      "method": "PATCH",
+      "body": {
+        "inline": {
+          "searchableAttributes": [
+            "title",
+            "overview"
+          ],
+          "filterableAttributes": [
+            "genres",
+            "release_date"
+          ],
+          "sortableAttributes": [
+            "release_date"
+          ]
+        }
+      },
+      "synchronous": "DontWait"
+    },
+    {
+      "route": "indexes/movies/documents",
+      "method": "POST",
+      "body": {
+        "asset": "movies.json"
+      },
+      "synchronous": "WaitForTask"
+    }
+  ]
+}
--- a/workloads/movies.json
+++ b/workloads/movies.json
@@ -0,0 +1,42 @@
+{
+  "name": "movies.json",
+  "run_count": 10,
+  "extra_cli_args": [],
+  "assets": {
+    "movies.json": {
+      "local_location": null,
+      "remote_location": "https://milli-benchmarks.fra1.digitaloceanspaces.com/bench/datasets/movies.json",
+      "sha256": "5b6e4cb660bc20327776e8a33ea197b43d9ec84856710ead1cc87ab24df77de1"
+    }
+  },
+  "commands": [
+    {
+      "route": "indexes/movies/settings",
+      "method": "PATCH",
+      "body": {
+        "inline": {
+          "searchableAttributes": [
+            "title",
+            "overview"
+          ],
+          "filterableAttributes": [
+            "genres",
+            "release_date"
+          ],
+          "sortableAttributes": [
+            "release_date"
+          ]
+        }
+      },
+      "synchronous": "DontWait"
+    },
+    {
+      "route": "indexes/movies/documents",
+      "method": "POST",
+      "body": {
+        "asset": "movies.json"
+      },
+      "synchronous": "WaitForTask"
+    }
+  ]
+}
--- a/xtask/Cargo.toml
+++ b/xtask/Cargo.toml
@@ -11,5 +11,34 @@ license.workspace = true
 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

 [dependencies]
+anyhow = "1.0.79"
+build-info = { version = "1.7.0", path = "../build-info" }
 cargo_metadata = "0.18.1"
 clap = { version = "4.4.14", features = ["derive"] }
+futures-core = "0.3.30"
+futures-util = "0.3.30"
+reqwest = { version = "0.11.23", features = [
+    "stream",
+    "json",
+    "rustls-tls",
+], default_features = false }
+serde = { version = "1.0.195", features = ["derive"] }
+serde_json = "1.0.111"
+sha2 = "0.10.8"
+sysinfo = "0.30.5"
+time = { version = "0.3.32", features = [
+    "serde",
+    "serde-human-readable",
+    "macros",
+] }
+tokio = { version = "1.35.1", features = [
+    "rt",
+    "net",
+    "time",
+    "process",
+    "signal",
+] }
+tracing = "0.1.40"
+tracing-subscriber = "0.3.18"
+tracing-trace = { version = "0.1.0", path = "../tracing-trace" }
+uuid = { version = "1.7.0", features = ["v7", "serde"] }
--- a/xtask/src/bench/assets.rs
+++ b/xtask/src/bench/assets.rs
@@ -0,0 +1,250 @@
+use std::collections::BTreeMap;
+use std::io::{Read as _, Seek as _, Write as _};
+
+use anyhow::{bail, Context};
+use futures_util::TryStreamExt as _;
+use serde::Deserialize;
+use sha2::Digest;
+
+use super::client::Client;
+
+#[derive(Deserialize, Clone)]
+pub struct Asset {
+    pub local_location: Option<String>,
+    pub remote_location: Option<String>,
+    #[serde(default)]
+    pub format: AssetFormat,
+    pub sha256: Option<String>,
+}
+
+#[derive(Deserialize, Default, Copy, Clone)]
+pub enum AssetFormat {
+    #[default]
+    Auto,
+    Json,
+    NdJson,
+    Raw,
+}
+
+impl AssetFormat {
+    pub fn to_content_type(self, filename: &str) -> &'static str {
+        match self {
+            AssetFormat::Auto => Self::auto_detect(filename).to_content_type(filename),
+            AssetFormat::Json => "application/json",
+            AssetFormat::NdJson => "application/x-ndjson",
+            AssetFormat::Raw => "application/octet-stream",
+        }
+    }
+
+    fn auto_detect(filename: &str) -> Self {
+        let path = std::path::Path::new(filename);
+        match path.extension().and_then(|extension| extension.to_str()) {
+            Some(extension) if extension.eq_ignore_ascii_case("json") => Self::Json,
+            Some(extension) if extension.eq_ignore_ascii_case("ndjson") => Self::NdJson,
+            extension => {
+                tracing::warn!(asset = filename, ?extension, "asset has format `Auto`, but extension was not recognized. Specify `Raw` format to suppress this warning.");
+                AssetFormat::Raw
+            }
+        }
+    }
+}
+
+pub fn fetch_asset(
+    name: &str,
+    assets: &BTreeMap<String, Asset>,
+    asset_folder: &str,
+) -> anyhow::Result<(std::fs::File, AssetFormat)> {
+    let asset =
+        assets.get(name).with_context(|| format!("could not find asset with name '{name}'"))?;
+    let filename = if let Some(local_filename) = &asset.local_location {
+        local_filename.clone()
+    } else {
+        format!("{asset_folder}/{name}")
+    };
+
+    Ok((
+        std::fs::File::open(&filename)
+            .with_context(|| format!("could not open asset '{name}' at '{filename}'"))?,
+        asset.format,
+    ))
+}
+
+#[tracing::instrument(skip(client, assets), fields(asset_count = assets.len()))]
+pub async fn fetch_assets(
+    client: &Client,
+    assets: &BTreeMap<String, Asset>,
+    asset_folder: &str,
+) -> anyhow::Result<()> {
+    let mut download_tasks = tokio::task::JoinSet::new();
+    for (name, asset) in assets {
+        // trying local
+        if let Some(local) = &asset.local_location {
+            match std::fs::File::open(local) {
+                Ok(file) => {
+                    if check_sha256(name, asset, file)? {
+                        continue;
+                    } else {
+                        tracing::warn!(asset = name, file = local, "found local resource for asset but hash differed, skipping to asset store");
+                    }
+                }
+                Err(error) => match error.kind() {
+                    std::io::ErrorKind::NotFound => { /* file does not exist, go to remote, no need for logs */
+                    }
+                    _ => tracing::warn!(
+                        error = &error as &dyn std::error::Error,
+                        "error checking local resource, skipping to asset store"
+                    ),
+                },
+            }
+        }
+
+        // checking asset store
+        let store_filename = format!("{}/{}", asset_folder, name);
+
+        match std::fs::File::open(&store_filename) {
+            Ok(file) => {
+                if check_sha256(name, asset, file)? {
+                    continue;
+                } else {
+                    tracing::warn!(asset = name, file = store_filename, "found resource for asset in asset store, but hash differed, skipping to remote method");
+                }
+            }
+            Err(error) => match error.kind() {
+                std::io::ErrorKind::NotFound => { /* file does not exist, go to remote, no need for logs */
+                }
+                _ => tracing::warn!(
+                    error = &error as &dyn std::error::Error,
+                    "error checking resource in store, skipping to remote method"
+                ),
+            },
+        }
+
+        // downloading remote
+        match &asset.remote_location {
+            Some(location) => {
+                std::fs::create_dir_all(asset_folder).with_context(|| format!("could not create asset folder at {asset_folder}"))?;
+                download_tasks.spawn({
+                    let client = client.clone();
+                    let name = name.to_string();
+                    let location = location.to_string();
+                    let store_filename = store_filename.clone();
+                    let asset = asset.clone();
+                    download_asset(client, name, asset, location, store_filename)});
+            },
+            None => bail!("asset {name} has no remote location, but was not found locally or in the asset store"),
+        }
+    }
+
+    while let Some(res) = download_tasks.join_next().await {
+        res.context("download task panicked")?.context("download task failed")?;
+    }
+
+    Ok(())
+}
+
+fn check_sha256(name: &str, asset: &Asset, mut file: std::fs::File) -> anyhow::Result<bool> {
+    let mut bytes = Vec::new();
+    file.read_to_end(&mut bytes).with_context(|| format!("hashing file for asset {name}"))?;
+    let mut file_hash = sha2::Sha256::new();
+    file_hash.update(&bytes);
+    let file_hash = file_hash.finalize();
+    let file_hash = format!("{:x}", file_hash);
+    tracing::debug!(hash = file_hash, "hashed local file");
+
+    Ok(match &asset.sha256 {
+        Some(hash) => {
+            tracing::debug!(hash, "hash from workload");
+            if hash.to_ascii_lowercase() == file_hash {
+                true
+            } else {
+                tracing::warn!(
+                    file_hash,
+                    asset_hash = hash.to_ascii_lowercase(),
+                    "hashes don't match"
+                );
+                false
+            }
+        }
+        None => {
+            tracing::warn!(sha256 = file_hash, "Skipping hash for asset {name} that doesn't have one. Please add it to workload file");
+            true
+        }
+    })
+}
+
+#[tracing::instrument(skip(client, asset, name), fields(asset = name))]
+async fn download_asset(
+    client: Client,
+    name: String,
+    asset: Asset,
+    src: String,
+    dest_filename: String,
+) -> anyhow::Result<()> {
+    let context = || format!("failure downloading asset {name} from {src}");
+
+    let response = client.get(&src).send().await.with_context(context)?;
+
+    let file = std::fs::File::options()
+        .create(true)
+        .truncate(true)
+        .write(true)
+        .read(true)
+        .open(&dest_filename)
+        .with_context(|| format!("creating destination file {dest_filename}"))
+        .with_context(context)?;
+
+    let mut dest = std::io::BufWriter::new(
+        file.try_clone().context("cloning I/O handle").with_context(context)?,
+    );
+
+    let total_len: Option<u64> = response
+        .headers()
+        .get(reqwest::header::CONTENT_LENGTH)
+        .and_then(|value| value.to_str().ok())
+        .and_then(|value| value.parse().ok());
+
+    let progress = tokio::spawn({
+        let name = name.clone();
+        async move {
+            loop {
+                match file.metadata().context("could not get file metadata") {
+                    Ok(metadata) => {
+                        let len = metadata.len();
+                        tracing::info!(
+                            asset = name,
+                            downloaded_bytes = len,
+                            total_bytes = total_len,
+                            "asset download in progress"
+                        );
+                    }
+                    Err(error) => {
+                        tracing::warn!(%error, "could not get file metadata");
+                    }
+                }
+                tokio::time::sleep(std::time::Duration::from_secs(60)).await;
+            }
+        }
+    });
+
+    let writing_context = || format!("while writing to destination file at {dest_filename}");
+
+    let mut response = response.bytes_stream();
+
+    while let Some(bytes) =
+        response.try_next().await.context("while downloading file").with_context(context)?
+    {
+        dest.write_all(&bytes).with_context(writing_context).with_context(context)?;
+    }
+
+    progress.abort();
+
+    let mut file = dest.into_inner().with_context(writing_context).with_context(context)?;
+
+    file.rewind().context("while rewinding asset file")?;
+
+    if !check_sha256(&name, &asset, file)? {
+        bail!("asset '{name}': sha256 mismatch for file {dest_filename} downloaded from {src}")
+    }
+
+    Ok(())
+}
--- a/xtask/src/bench/client.rs
+++ b/xtask/src/bench/client.rs
@@ -0,0 +1,80 @@
+use anyhow::Context;
+use serde::Deserialize;
+
+#[derive(Debug, Clone)]
+pub struct Client {
+    base_url: Option<String>,
+    client: reqwest::Client,
+}
+
+impl Client {
+    pub fn new(
+        base_url: Option<String>,
+        api_key: Option<&str>,
+        timeout: Option<std::time::Duration>,
+    ) -> anyhow::Result<Self> {
+        let mut headers = reqwest::header::HeaderMap::new();
+        if let Some(api_key) = api_key {
+            headers.append(
+                reqwest::header::AUTHORIZATION,
+                reqwest::header::HeaderValue::from_str(&format!("Bearer {api_key}"))
+                    .context("Invalid authorization header")?,
+            );
+        }
+
+        let client = reqwest::ClientBuilder::new().default_headers(headers);
+        let client = if let Some(timeout) = timeout { client.timeout(timeout) } else { client };
+        let client = client.build()?;
+        Ok(Self { base_url, client })
+    }
+
+    pub fn request(&self, method: reqwest::Method, route: &str) -> reqwest::RequestBuilder {
+        if let Some(base_url) = &self.base_url {
+            if route.is_empty() {
+                self.client.request(method, base_url)
+            } else {
+                self.client.request(method, format!("{}/{}", base_url, route))
+            }
+        } else {
+            self.client.request(method, route)
+        }
+    }
+
+    pub fn get(&self, route: &str) -> reqwest::RequestBuilder {
+        self.request(reqwest::Method::GET, route)
+    }
+
+    pub fn put(&self, route: &str) -> reqwest::RequestBuilder {
+        self.request(reqwest::Method::PUT, route)
+    }
+
+    pub fn post(&self, route: &str) -> reqwest::RequestBuilder {
+        self.request(reqwest::Method::POST, route)
+    }
+
+    pub fn delete(&self, route: &str) -> reqwest::RequestBuilder {
+        self.request(reqwest::Method::DELETE, route)
+    }
+}
+
+#[derive(Debug, Clone, Copy, Deserialize)]
+#[serde(rename_all = "SCREAMING_SNAKE_CASE")]
+pub enum Method {
+    Get,
+    Post,
+    Patch,
+    Delete,
+    Put,
+}
+
+impl From<Method> for reqwest::Method {
+    fn from(value: Method) -> Self {
+        match value {
+            Method::Get => Self::GET,
+            Method::Post => Self::POST,
+            Method::Patch => Self::PATCH,
+            Method::Delete => Self::DELETE,
+            Method::Put => Self::PUT,
+        }
+    }
+}
--- a/xtask/src/bench/command.rs
+++ b/xtask/src/bench/command.rs
@@ -0,0 +1,194 @@
+use std::collections::BTreeMap;
+use std::fmt::Display;
+use std::io::Read as _;
+
+use anyhow::{bail, Context as _};
+use serde::Deserialize;
+
+use super::assets::{fetch_asset, Asset};
+use super::client::{Client, Method};
+
+#[derive(Clone, Deserialize)]
+pub struct Command {
+    pub route: String,
+    pub method: Method,
+    #[serde(default)]
+    pub body: Body,
+    #[serde(default)]
+    pub synchronous: SyncMode,
+}
+
+#[derive(Default, Clone, Deserialize)]
+#[serde(untagged)]
+pub enum Body {
+    Inline {
+        inline: serde_json::Value,
+    },
+    Asset {
+        asset: String,
+    },
+    #[default]
+    Empty,
+}
+
+impl Body {
+    pub fn get(
+        self,
+        assets: &BTreeMap<String, Asset>,
+        asset_folder: &str,
+    ) -> anyhow::Result<Option<(Vec<u8>, &'static str)>> {
+        Ok(match self {
+            Body::Inline { inline: body } => Some((
+                serde_json::to_vec(&body)
+                    .context("serializing to bytes")
+                    .context("while getting inline body")?,
+                "application/json",
+            )),
+            Body::Asset { asset: name } => Some({
+                let context = || format!("while getting body from asset '{name}'");
+                let (mut file, format) =
+                    fetch_asset(&name, assets, asset_folder).with_context(context)?;
+                let mut buf = Vec::new();
+                file.read_to_end(&mut buf).with_context(context)?;
+                (buf, format.to_content_type(&name))
+            }),
+            Body::Empty => None,
+        })
+    }
+}
+
+impl Display for Command {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "{:?} {} ({:?})", self.method, self.route, self.synchronous)
+    }
+}
+
+#[derive(Default, Debug, Clone, Copy, Deserialize)]
+pub enum SyncMode {
+    DontWait,
+    #[default]
+    WaitForResponse,
+    WaitForTask,
+}
+
+pub async fn run_batch(
+    client: &Client,
+    batch: &[Command],
+    assets: &BTreeMap<String, Asset>,
+    asset_folder: &str,
+) -> anyhow::Result<()> {
+    let [.., last] = batch else { return Ok(()) };
+    let sync = last.synchronous;
+
+    let mut tasks = tokio::task::JoinSet::new();
+
+    for command in batch {
+        // FIXME: you probably don't want to copy assets everytime here
+        tasks.spawn({
+            let client = client.clone();
+            let command = command.clone();
+            let assets = assets.clone();
+            let asset_folder = asset_folder.to_owned();
+
+            async move { run(client, command, &assets, &asset_folder).await }
+        });
+    }
+
+    while let Some(result) = tasks.join_next().await {
+        result
+            .context("panicked while executing command")?
+            .context("error while executing command")?;
+    }
+
+    match sync {
+        SyncMode::DontWait => {}
+        SyncMode::WaitForResponse => {}
+        SyncMode::WaitForTask => wait_for_tasks(client).await?,
+    }
+
+    Ok(())
+}
+
+async fn wait_for_tasks(client: &Client) -> anyhow::Result<()> {
+    loop {
+        let response = client
+            .get("tasks?statuses=enqueued,processing")
+            .send()
+            .await
+            .context("could not wait for tasks")?;
+        let response: serde_json::Value = response
+            .json()
+            .await
+            .context("could not deserialize response to JSON")
+            .context("could not wait for tasks")?;
+        match response.get("total") {
+            Some(serde_json::Value::Number(number)) => {
+                let number = number.as_u64().with_context(|| {
+                    format!("waiting for tasks: could not parse 'total' as integer, got {}", number)
+                })?;
+                if number == 0 {
+                    break;
+                } else {
+                    tokio::time::sleep(std::time::Duration::from_secs(1)).await;
+                    continue;
+                }
+            }
+            Some(thing_else) => {
+                bail!(format!(
+                    "waiting for tasks: could not parse 'total' as a number, got '{thing_else}'"
+                ))
+            }
+            None => {
+                bail!(format!(
+                    "waiting for tasks: expected response to contain 'total', got '{response}'"
+                ))
+            }
+        }
+    }
+    Ok(())
+}
+
+#[tracing::instrument(skip(client, command, assets, asset_folder), fields(command = %command))]
+pub async fn run(
+    client: Client,
+    mut command: Command,
+    assets: &BTreeMap<String, Asset>,
+    asset_folder: &str,
+) -> anyhow::Result<()> {
+    // memtake the body here to leave an empty body in its place, so that command is not partially moved-out
+    let body = std::mem::take(&mut command.body)
+        .get(assets, asset_folder)
+        .with_context(|| format!("while getting body for command {command}"))?;
+
+    let request = client.request(command.method.into(), &command.route);
+
+    let request = if let Some((body, content_type)) = body {
+        request.body(body).header(reqwest::header::CONTENT_TYPE, content_type)
+    } else {
+        request
+    };
+
+    let response =
+        request.send().await.with_context(|| format!("error sending command: {}", command))?;
+
+    let code = response.status();
+    if code.is_client_error() {
+        tracing::error!(%command, %code, "error in workload file");
+        let response: serde_json::Value = response
+            .json()
+            .await
+            .context("could not deserialize response as JSON")
+            .context("parsing error in workload file when sending command")?;
+        bail!("error in workload file: server responded with error code {code} and '{response}'")
+    } else if code.is_server_error() {
+        tracing::error!(%command, %code, "server error");
+        let response: serde_json::Value = response
+            .json()
+            .await
+            .context("could not deserialize response as JSON")
+            .context("parsing server error when sending command")?;
+        bail!("server error: server responded with error code {code} and '{response}'")
+    }
+
+    Ok(())
+}
--- a/xtask/src/bench/dashboard.rs
+++ b/xtask/src/bench/dashboard.rs
@@ -0,0 +1,167 @@
+use std::collections::BTreeMap;
+
+use anyhow::{bail, Context};
+use serde_json::json;
+use tokio::signal::ctrl_c;
+use tokio::task::AbortHandle;
+use tracing_trace::processor::span_stats::CallStats;
+use uuid::Uuid;
+
+use super::client::Client;
+use super::env_info;
+use super::workload::Workload;
+
+pub async fn cancel_on_ctrl_c(
+    invocation_uuid: Uuid,
+    dashboard_client: Client,
+    abort_handle: AbortHandle,
+) {
+    tracing::info!("press Ctrl-C to cancel the invocation");
+    match ctrl_c().await {
+        Ok(()) => {
+            tracing::info!(%invocation_uuid, "received Ctrl-C, cancelling invocation");
+            mark_as_failed(dashboard_client, invocation_uuid, None).await;
+            abort_handle.abort();
+        }
+        Err(error) => tracing::warn!(
+            error = &error as &dyn std::error::Error,
+            "failed to listen to Ctrl-C signal, invocation won't be canceled on Ctrl-C"
+        ),
+    }
+}
+
+pub async fn mark_as_failed(
+    dashboard_client: Client,
+    invocation_uuid: Uuid,
+    failure_reason: Option<String>,
+) {
+    let response = dashboard_client
+        .post("cancel-invocation")
+        .json(&json!({
+            "invocation_uuid": invocation_uuid,
+            "failure_reason": failure_reason,
+        }))
+        .send()
+        .await;
+    let response = match response {
+        Ok(response) => response,
+        Err(response_error) => {
+            tracing::error!(error = &response_error as &dyn std::error::Error, %invocation_uuid, "could not mark invocation as failed");
+            return;
+        }
+    };
+
+    if !response.status().is_success() {
+        tracing::error!(
+            %invocation_uuid,
+            "could not mark invocation as failed: {}",
+            response.text().await.unwrap()
+        );
+        return;
+    }
+    tracing::warn!(%invocation_uuid, "marked invocation as failed or canceled");
+}
+
+pub async fn send_machine_info(
+    dashboard_client: &Client,
+    env: &env_info::Environment,
+) -> anyhow::Result<()> {
+    let response = dashboard_client
+        .put("machine")
+        .json(&json!({"hostname": env.hostname}))
+        .send()
+        .await
+        .context("sending machine information")?;
+    if !response.status().is_success() {
+        bail!(
+            "could not send machine information: {} {}",
+            response.status(),
+            response.text().await.unwrap_or_else(|_| "unknown".into())
+        );
+    }
+    Ok(())
+}
+
+pub async fn create_invocation(
+    dashboard_client: &Client,
+    build_info: build_info::BuildInfo,
+    commit_message: &str,
+    env: env_info::Environment,
+    max_workloads: usize,
+    reason: Option<&str>,
+) -> anyhow::Result<Uuid> {
+    let response = dashboard_client
+        .put("invocation")
+        .json(&json!({
+            "commit": {
+                "sha1": build_info.commit_sha1,
+                "message": commit_message,
+                "commit_date": build_info.commit_timestamp,
+                "branch": build_info.branch,
+                "tag": build_info.describe.and_then(|describe| describe.as_tag()),
+            },
+            "machine_hostname": env.hostname,
+            "max_workloads": max_workloads,
+            "reason": reason
+        }))
+        .send()
+        .await
+        .context("sending invocation")?;
+    if !response.status().is_success() {
+        bail!(
+            "could not send new invocation: {}",
+            response.text().await.unwrap_or_else(|_| "unknown".into())
+        );
+    }
+    let invocation_uuid: Uuid =
+        response.json().await.context("could not deserialize invocation response as JSON")?;
+    Ok(invocation_uuid)
+}
+
+pub async fn create_workload(
+    dashboard_client: &Client,
+    invocation_uuid: Uuid,
+    workload: &Workload,
+) -> anyhow::Result<Uuid> {
+    let response = dashboard_client
+        .put("workload")
+        .json(&json!({
+            "invocation_uuid": invocation_uuid,
+            "name": &workload.name,
+            "max_runs": workload.run_count,
+        }))
+        .send()
+        .await
+        .context("could not create new workload")?;
+
+    if !response.status().is_success() {
+        bail!("creating new workload failed: {}", response.text().await.unwrap())
+    }
+
+    let workload_uuid: Uuid =
+        response.json().await.context("could not deserialize JSON as UUID")?;
+    Ok(workload_uuid)
+}
+
+pub async fn create_run(
+    dashboard_client: Client,
+    workload_uuid: Uuid,
+    report: &BTreeMap<String, CallStats>,
+) -> anyhow::Result<()> {
+    let response = dashboard_client
+        .put("run")
+        .json(&json!({
+            "workload_uuid": workload_uuid,
+            "data": report
+        }))
+        .send()
+        .await
+        .context("sending new run")?;
+    if !response.status().is_success() {
+        bail!(
+            "sending new run failed: {}",
+            response.text().await.unwrap_or_else(|_| "unknown".into())
+        )
+    }
+    Ok(())
+}
--- a/xtask/src/bench/env_info.rs
+++ b/xtask/src/bench/env_info.rs
@@ -0,0 +1,75 @@
+use serde::{Deserialize, Serialize};
+
+#[derive(Debug, Clone, Deserialize, Serialize)]
+#[serde(rename_all = "camelCase")]
+pub struct Environment {
+    pub hostname: Option<String>,
+    pub cpu: String,
+
+    /// Advertised or nominal clock speed in Hertz.
+    pub clock_speed: u64,
+
+    /// Total number of bytes of memory provided by the system. */
+    pub memory: u64,
+    pub os_type: String,
+    pub software: Vec<VersionInfo>,
+
+    pub user_name: String,
+
+    /// Is set true when the data was gathered by a manual run,
+    /// possibly on a developer machine, instead of the usual benchmark server.
+    pub manual_run: bool,
+}
+
+impl Environment {
+    pub fn generate_from_current_config() -> Self {
+        use sysinfo::System;
+
+        let unknown_string = String::from("Unknown");
+        let mut system = System::new();
+        system.refresh_cpu();
+        system.refresh_cpu_frequency();
+        system.refresh_memory();
+
+        let (cpu, frequency) = match system.cpus().first() {
+            Some(cpu) => (
+                format!("{} @ {:.2}GHz", cpu.brand(), cpu.frequency() as f64 / 1000.0),
+                cpu.frequency() * 1_000_000,
+            ),
+            None => (unknown_string.clone(), 0),
+        };
+
+        let mut software = Vec::new();
+        if let Some(distribution) = System::name() {
+            software
+                .push(VersionInfo { name: distribution, version: String::from("distribution") });
+        }
+        if let Some(kernel) = System::kernel_version() {
+            software.push(VersionInfo { name: kernel, version: String::from("kernel") });
+        }
+        if let Some(os) = System::os_version() {
+            software.push(VersionInfo { name: os, version: String::from("kernel-release") });
+        }
+        if let Some(arch) = System::cpu_arch() {
+            software.push(VersionInfo { name: arch, version: String::from("arch") });
+        }
+
+        Self {
+            hostname: System::host_name(),
+            cpu,
+            clock_speed: frequency,
+            memory: system.total_memory(),
+            os_type: System::long_os_version().unwrap_or(unknown_string.clone()),
+            user_name: System::name().unwrap_or(unknown_string.clone()),
+            manual_run: false,
+            software,
+        }
+    }
+}
+
+#[derive(Debug, Clone, Deserialize, Serialize)]
+#[serde(rename_all = "camelCase")]
+pub struct VersionInfo {
+    pub name: String,
+    pub version: String,
+}
--- a/xtask/src/bench/meili_process.rs
+++ b/xtask/src/bench/meili_process.rs
@@ -0,0 +1,112 @@
+use std::collections::BTreeMap;
+
+use anyhow::{bail, Context as _};
+
+use super::assets::Asset;
+use super::client::Client;
+use super::workload::Workload;
+
+pub async fn kill(mut meilisearch: tokio::process::Child) {
+    if let Err(error) = meilisearch.kill().await {
+        tracing::warn!(
+            error = &error as &dyn std::error::Error,
+            "while terminating Meilisearch server"
+        )
+    }
+}
+
+#[tracing::instrument]
+pub async fn build() -> anyhow::Result<()> {
+    let mut command = tokio::process::Command::new("cargo");
+    command.arg("build").arg("--release").arg("-p").arg("meilisearch");
+
+    command.kill_on_drop(true);
+
+    let mut builder = command.spawn().context("error building Meilisearch")?;
+
+    if !builder.wait().await.context("could not build Meilisearch")?.success() {
+        bail!("failed building Meilisearch")
+    }
+
+    Ok(())
+}
+
+#[tracing::instrument(skip(client, master_key, workload), fields(workload = workload.name))]
+pub async fn start(
+    client: &Client,
+    master_key: Option<&str>,
+    workload: &Workload,
+    asset_folder: &str,
+) -> anyhow::Result<tokio::process::Child> {
+    let mut command = tokio::process::Command::new("cargo");
+    command
+        .arg("run")
+        .arg("--release")
+        .arg("-p")
+        .arg("meilisearch")
+        .arg("--bin")
+        .arg("meilisearch")
+        .arg("--");
+
+    command.arg("--db-path").arg("./_xtask_benchmark.ms");
+    if let Some(master_key) = master_key {
+        command.arg("--master-key").arg(master_key);
+    }
+    command.arg("--experimental-enable-logs-route");
+
+    for extra_arg in workload.extra_cli_args.iter() {
+        command.arg(extra_arg);
+    }
+
+    command.kill_on_drop(true);
+
+    let mut meilisearch = command.spawn().context("Error starting Meilisearch")?;
+
+    wait_for_health(client, &mut meilisearch, &workload.assets, asset_folder).await?;
+
+    Ok(meilisearch)
+}
+
+async fn wait_for_health(
+    client: &Client,
+    meilisearch: &mut tokio::process::Child,
+    assets: &BTreeMap<String, Asset>,
+    asset_folder: &str,
+) -> anyhow::Result<()> {
+    for i in 0..100 {
+        let res = super::command::run(client.clone(), health_command(), assets, asset_folder).await;
+        if res.is_ok() {
+            // check that this is actually the current Meilisearch instance that answered us
+            if let Some(exit_code) =
+                meilisearch.try_wait().context("cannot check Meilisearch server process status")?
+            {
+                tracing::error!("Got an health response from a different process");
+                bail!("Meilisearch server exited early with code {exit_code}");
+            }
+
+            return Ok(());
+        }
+        tokio::time::sleep(std::time::Duration::from_millis(500)).await;
+        // check whether the Meilisearch instance exited early (cut the wait)
+        if let Some(exit_code) =
+            meilisearch.try_wait().context("cannot check Meilisearch server process status")?
+        {
+            bail!("Meilisearch server exited early with code {exit_code}");
+        }
+        tracing::debug!(attempt = i, "Waiting for Meilisearch to go up");
+    }
+    bail!("meilisearch is not responding")
+}
+
+fn health_command() -> super::command::Command {
+    super::command::Command {
+        route: "/health".into(),
+        method: super::client::Method::Get,
+        body: Default::default(),
+        synchronous: super::command::SyncMode::WaitForResponse,
+    }
+}
+
+pub fn delete_db() {
+    let _ = std::fs::remove_dir_all("./_xtask_benchmark.ms");
+}
--- a/xtask/src/bench/mod.rs
+++ b/xtask/src/bench/mod.rs
@@ -0,0 +1,203 @@
+mod assets;
+mod client;
+mod command;
+mod dashboard;
+mod env_info;
+mod meili_process;
+mod workload;
+
+use std::path::PathBuf;
+
+use anyhow::Context;
+use clap::Parser;
+use tracing_subscriber::fmt::format::FmtSpan;
+use tracing_subscriber::layer::SubscriberExt;
+use tracing_subscriber::Layer;
+
+use self::client::Client;
+use self::workload::Workload;
+
+pub fn default_http_addr() -> String {
+    "127.0.0.1:7700".to_string()
+}
+pub fn default_report_folder() -> String {
+    "./bench/reports/".into()
+}
+
+pub fn default_asset_folder() -> String {
+    "./bench/assets/".into()
+}
+
+pub fn default_log_filter() -> String {
+    "info".into()
+}
+
+pub fn default_dashboard_url() -> String {
+    "http://localhost:9001".into()
+}
+
+/// Run benchmarks from a workload
+#[derive(Parser, Debug)]
+pub struct BenchDeriveArgs {
+    /// Filename of the workload file, pass multiple filenames
+    /// to run multiple workloads in the specified order.
+    ///
+    /// Each workload run will get its own report file.
+    #[arg(value_name = "WORKLOAD_FILE", last = false)]
+    workload_file: Vec<PathBuf>,
+
+    /// URL of the dashboard.
+    #[arg(long, default_value_t = default_dashboard_url())]
+    dashboard_url: String,
+
+    /// Directory to output reports.
+    #[arg(long, default_value_t = default_report_folder())]
+    report_folder: String,
+
+    /// Directory to store the remote assets.
+    #[arg(long, default_value_t = default_asset_folder())]
+    asset_folder: String,
+
+    /// Log directives
+    #[arg(short, long, default_value_t = default_log_filter())]
+    log_filter: String,
+
+    /// Benchmark dashboard API key
+    #[arg(long)]
+    api_key: Option<String>,
+
+    /// Meilisearch master keys
+    #[arg(long)]
+    master_key: Option<String>,
+
+    /// Authentication bearer for fetching assets
+    #[arg(long)]
+    assets_key: Option<String>,
+
+    /// Reason for the benchmark invocation
+    #[arg(short, long)]
+    reason: Option<String>,
+}
+
+pub fn run(args: BenchDeriveArgs) -> anyhow::Result<()> {
+    // setup logs
+    let filter: tracing_subscriber::filter::Targets =
+        args.log_filter.parse().context("invalid --log-filter")?;
+
+    let subscriber = tracing_subscriber::registry().with(
+        tracing_subscriber::fmt::layer()
+            .with_span_events(FmtSpan::NEW | FmtSpan::CLOSE)
+            .with_filter(filter),
+    );
+    tracing::subscriber::set_global_default(subscriber).context("could not setup logging")?;
+
+    // fetch environment and build info
+    let env = env_info::Environment::generate_from_current_config();
+    let build_info = build_info::BuildInfo::from_build();
+
+    // tokio runtime
+    let rt = tokio::runtime::Builder::new_current_thread().enable_io().enable_time().build()?;
+    let _scope = rt.enter();
+
+    // setup clients
+    let assets_client =
+        Client::new(None, args.assets_key.as_deref(), Some(std::time::Duration::from_secs(3600)))?; // 1h
+
+    let dashboard_client = Client::new(
+        Some(format!("{}/api/v1", args.dashboard_url)),
+        args.api_key.as_deref(),
+        Some(std::time::Duration::from_secs(60)),
+    )?;
+
+    // reporting uses its own client because keeping the stream open to wait for entries
+    // blocks any other requests
+    // Also we don't want any pesky timeout because we don't know how much time it will take to recover the full trace
+    let logs_client = Client::new(
+        Some("http://127.0.0.1:7700/logs/stream".into()),
+        args.master_key.as_deref(),
+        None,
+    )?;
+
+    let meili_client = Client::new(
+        Some("http://127.0.0.1:7700".into()),
+        args.master_key.as_deref(),
+        Some(std::time::Duration::from_secs(60)),
+    )?;
+
+    // enter runtime
+
+    rt.block_on(async {
+        dashboard::send_machine_info(&dashboard_client, &env).await?;
+
+        let commit_message = build_info.commit_msg.context("missing commit message")?.split('\n').next().unwrap();
+        let max_workloads = args.workload_file.len();
+        let reason: Option<&str> = args.reason.as_deref();
+        let invocation_uuid = dashboard::create_invocation(&dashboard_client, build_info, commit_message, env, max_workloads, reason).await?;
+
+        tracing::info!(workload_count = args.workload_file.len(), "handling workload files");
+
+        // main task
+        let workload_runs = tokio::spawn(
+            {
+                let dashboard_client = dashboard_client.clone();
+                async move {
+            for workload_file in args.workload_file.iter() {
+                let workload: Workload = serde_json::from_reader(
+                    std::fs::File::open(workload_file)
+                        .with_context(|| format!("error opening {}", workload_file.display()))?,
+                )
+                .with_context(|| format!("error parsing {} as JSON", workload_file.display()))?;
+
+                workload::execute(
+                    &assets_client,
+                    &dashboard_client,
+                    &logs_client,
+                    &meili_client,
+                    invocation_uuid,
+                    args.master_key.as_deref(),
+                    workload,
+                    &args,
+                )
+                .await?;
+            }
+            Ok::<(), anyhow::Error>(())
+        }});
+
+        // handle ctrl-c
+        let abort_handle = workload_runs.abort_handle();
+        tokio::spawn({
+            let dashboard_client = dashboard_client.clone();
+            dashboard::cancel_on_ctrl_c(invocation_uuid, dashboard_client, abort_handle)
+        });
+
+        // wait for the end of the main task, handle result
+        match workload_runs.await {
+            Ok(Ok(_)) => {
+                tracing::info!("Success");
+                Ok::<(), anyhow::Error>(())
+            }
+            Ok(Err(error)) => {
+                tracing::error!(%invocation_uuid, error = %error, "invocation failed, attempting to report the failure to dashboard");
+                dashboard::mark_as_failed(dashboard_client, invocation_uuid, Some(error.to_string())).await;
+                tracing::warn!(%invocation_uuid, "invocation marked as failed following error");
+                Err(error)
+            },
+            Err(join_error) => {
+                match join_error.try_into_panic() {
+                    Ok(panic) => {
+                        tracing::error!("invocation panicked, attempting to report the failure to dashboard");
+                        dashboard::mark_as_failed(dashboard_client, invocation_uuid, Some("Panicked".into())).await;
+                        std::panic::resume_unwind(panic)
+                    }
+                    Err(_) => {
+                        tracing::warn!("task was canceled");
+                        Ok(())
+                    }
+                }
+            },
+        }
+
+    })?;
+
+    Ok(())
+}
--- a/xtask/src/bench/workload.rs
+++ b/xtask/src/bench/workload.rs
@@ -0,0 +1,262 @@
+use std::collections::BTreeMap;
+use std::fs::File;
+use std::io::{Seek as _, Write as _};
+
+use anyhow::{bail, Context as _};
+use futures_util::TryStreamExt as _;
+use serde::Deserialize;
+use serde_json::json;
+use tokio::task::JoinHandle;
+use uuid::Uuid;
+
+use super::assets::Asset;
+use super::client::Client;
+use super::command::SyncMode;
+use super::BenchDeriveArgs;
+use crate::bench::{assets, dashboard, meili_process};
+
+#[derive(Deserialize)]
+pub struct Workload {
+    pub name: String,
+    pub run_count: u16,
+    pub extra_cli_args: Vec<String>,
+    pub assets: BTreeMap<String, Asset>,
+    pub commands: Vec<super::command::Command>,
+}
+
+async fn run_commands(
+    dashboard_client: &Client,
+    logs_client: &Client,
+    meili_client: &Client,
+    workload_uuid: Uuid,
+    workload: &Workload,
+    args: &BenchDeriveArgs,
+    run_number: u16,
+) -> anyhow::Result<JoinHandle<anyhow::Result<File>>> {
+    let report_folder = &args.report_folder;
+    let workload_name = &workload.name;
+
+    std::fs::create_dir_all(report_folder)
+        .with_context(|| format!("could not create report directory at {report_folder}"))?;
+
+    let trace_filename = format!("{report_folder}/{workload_name}-{run_number}-trace.json");
+    let report_filename = format!("{report_folder}/{workload_name}-{run_number}-report.json");
+
+    let report_handle = start_report(logs_client, trace_filename).await?;
+
+    for batch in workload
+        .commands
+        .as_slice()
+        .split_inclusive(|command| !matches!(command.synchronous, SyncMode::DontWait))
+    {
+        super::command::run_batch(meili_client, batch, &workload.assets, &args.asset_folder)
+            .await?;
+    }
+
+    let processor =
+        stop_report(dashboard_client, logs_client, workload_uuid, report_filename, report_handle)
+            .await?;
+
+    Ok(processor)
+}
+
+#[allow(clippy::too_many_arguments)] // not best code quality, but this is a benchmark runner
+#[tracing::instrument(skip(assets_client, dashboard_client, logs_client, meili_client, workload, master_key, args), fields(workload = workload.name))]
+pub async fn execute(
+    assets_client: &Client,
+    dashboard_client: &Client,
+    logs_client: &Client,
+    meili_client: &Client,
+    invocation_uuid: Uuid,
+    master_key: Option<&str>,
+    workload: Workload,
+    args: &BenchDeriveArgs,
+) -> anyhow::Result<()> {
+    assets::fetch_assets(assets_client, &workload.assets, &args.asset_folder).await?;
+
+    let workload_uuid =
+        dashboard::create_workload(dashboard_client, invocation_uuid, &workload).await?;
+
+    let mut tasks = Vec::new();
+
+    for i in 0..workload.run_count {
+        tasks.push(
+            execute_run(
+                dashboard_client,
+                logs_client,
+                meili_client,
+                workload_uuid,
+                master_key,
+                &workload,
+                args,
+                i,
+            )
+            .await?,
+        );
+    }
+
+    let mut reports = Vec::with_capacity(workload.run_count as usize);
+
+    for task in tasks {
+        reports.push(
+            task.await
+                .context("task panicked while processing report")?
+                .context("task failed while processing report")?,
+        );
+    }
+
+    tracing::info!(workload = workload.name, "Successful workload");
+
+    Ok(())
+}
+
+#[allow(clippy::too_many_arguments)] // not best code quality, but this is a benchmark runner
+#[tracing::instrument(skip(dashboard_client, logs_client, meili_client, workload, master_key, args), fields(workload = %workload.name))]
+async fn execute_run(
+    dashboard_client: &Client,
+    logs_client: &Client,
+    meili_client: &Client,
+    workload_uuid: Uuid,
+    master_key: Option<&str>,
+    workload: &Workload,
+    args: &BenchDeriveArgs,
+    run_number: u16,
+) -> anyhow::Result<tokio::task::JoinHandle<anyhow::Result<std::fs::File>>> {
+    meili_process::delete_db();
+
+    meili_process::build().await?;
+    let meilisearch =
+        meili_process::start(meili_client, master_key, workload, &args.asset_folder).await?;
+
+    let processor = run_commands(
+        dashboard_client,
+        logs_client,
+        meili_client,
+        workload_uuid,
+        workload,
+        args,
+        run_number,
+    )
+    .await?;
+
+    meili_process::kill(meilisearch).await;
+
+    tracing::info!(run_number, "Successful run");
+
+    Ok(processor)
+}
+
+async fn start_report(
+    logs_client: &Client,
+    filename: String,
+) -> anyhow::Result<tokio::task::JoinHandle<anyhow::Result<std::fs::File>>> {
+    let report_file = std::fs::File::options()
+        .create(true)
+        .truncate(true)
+        .write(true)
+        .read(true)
+        .open(&filename)
+        .with_context(|| format!("could not create file at {filename}"))?;
+    let mut report_file = std::io::BufWriter::new(report_file);
+
+    let response = logs_client
+        .post("")
+        .json(&json!({
+            "mode": "profile",
+            "target": "indexing::=trace"
+        }))
+        .send()
+        .await
+        .context("failed to start report")?;
+
+    let code = response.status();
+    if code.is_client_error() {
+        tracing::error!(%code, "request error when trying to start report");
+        let response: serde_json::Value = response
+            .json()
+            .await
+            .context("could not deserialize response as JSON")
+            .context("response error when trying to start report")?;
+        bail!(
+            "request error when trying to start report: server responded with error code {code} and '{response}'"
+        )
+    } else if code.is_server_error() {
+        tracing::error!(%code, "server error when trying to start report");
+        let response: serde_json::Value = response
+            .json()
+            .await
+            .context("could not deserialize response as JSON")
+            .context("response error trying to start report")?;
+        bail!("server error when trying to start report: server responded with error code {code} and '{response}'")
+    }
+
+    Ok(tokio::task::spawn(async move {
+        let mut stream = response.bytes_stream();
+        while let Some(bytes) = stream.try_next().await.context("while waiting for report")? {
+            report_file
+                .write_all(&bytes)
+                .with_context(|| format!("while writing report to {filename}"))?;
+        }
+        report_file.into_inner().with_context(|| format!("while writing report to {filename}"))
+    }))
+}
+
+async fn stop_report(
+    dashboard_client: &Client,
+    logs_client: &Client,
+    workload_uuid: Uuid,
+    filename: String,
+    report_handle: tokio::task::JoinHandle<anyhow::Result<std::fs::File>>,
+) -> anyhow::Result<tokio::task::JoinHandle<anyhow::Result<std::fs::File>>> {
+    let response = logs_client.delete("").send().await.context("while stopping report")?;
+    if !response.status().is_success() {
+        bail!("received HTTP {} while stopping report", response.status())
+    }
+
+    let mut file = tokio::time::timeout(std::time::Duration::from_secs(1000), report_handle)
+        .await
+        .context("while waiting for the end of the report")?
+        .context("report writing task panicked")?
+        .context("while writing report")?;
+
+    file.rewind().context("while rewinding report file")?;
+
+    let process_handle = tokio::task::spawn({
+        let dashboard_client = dashboard_client.clone();
+        async move {
+            let span = tracing::info_span!("processing trace to report", filename);
+            let _guard = span.enter();
+            let report = tracing_trace::processor::span_stats::to_call_stats(
+                tracing_trace::TraceReader::new(std::io::BufReader::new(file)),
+            )
+            .context("could not convert trace to report")?;
+            let context = || format!("writing report to {filename}");
+
+            dashboard::create_run(dashboard_client, workload_uuid, &report).await?;
+
+            let mut output_file = std::io::BufWriter::new(
+                std::fs::File::options()
+                    .create(true)
+                    .truncate(true)
+                    .write(true)
+                    .read(true)
+                    .open(&filename)
+                    .with_context(context)?,
+            );
+
+            for (key, value) in report {
+                serde_json::to_writer(&mut output_file, &json!({key: value}))
+                    .context("serializing span stat")?;
+                writeln!(&mut output_file).with_context(context)?;
+            }
+            output_file.flush().with_context(context)?;
+            let mut output_file = output_file.into_inner().with_context(context)?;
+
+            output_file.rewind().context("could not rewind output_file").with_context(context)?;
+
+            Ok(output_file)
+        }
+    });
+
+    Ok(process_handle)
+}
--- a/xtask/src/lib.rs
+++ b/xtask/src/lib.rs
@@ -0,0 +1 @@
+pub mod bench;
--- a/xtask/src/main.rs
+++ b/xtask/src/main.rs
@@ -1,6 +1,7 @@
 use std::collections::HashSet;

 use clap::Parser;
+use xtask::bench::BenchDeriveArgs;

 /// List features available in the workspace
 #[derive(Parser, Debug)]
@@ -17,13 +18,16 @@ struct ListFeaturesDeriveArgs {
 #[command(bin_name = "cargo xtask")]
 enum Command {
    ListFeatures(ListFeaturesDeriveArgs),
+    Bench(BenchDeriveArgs),
 }

-fn main() {
+fn main() -> anyhow::Result<()> {
    let args = Command::parse();
    match args {
        Command::ListFeatures(args) => list_features(args),
+        Command::Bench(args) => xtask::bench::run(args)?,
    }
+    Ok(())
 }

 fn list_features(args: ListFeaturesDeriveArgs) {
Author	SHA1	Message	Date
Louis Dureuil	b7ed3308bb	Update grenad	2024-04-09 09:31:23 +02:00
Louis Dureuil	579a96adc7	Actually abort in case of corrupted index	2024-04-04 11:02:54 +02:00
Louis Dureuil	e6ff45e3b9	Changes for tracking issue 138 - create a snapshot as well as a dump - only detect inconsistencies in the facet -> document direction - mark index as corrupted after creating snapshot and dump - always abort tasks on indexes marked as corrupted	2024-04-04 10:22:49 +02:00
Louis Dureuil	e4f8ee00c8	check consistency, create a dump and send push event for failed checks	2024-03-25 16:32:50 +01:00
meili-bors[bot]	d2f77e88bd	Merge #4479 4479: Skip reindexing when modifying unknown faceted fields r=dureuill a=Kerollmops This PR improves Meilisearch's decision to reindex when a faceted field is added to the settings, but not a single document contains this field. It is effectively a waste of time to reindex documents when the engine needs to know a field. This is related to a conversation [we have with our biggest customer (internal link)](https://discord.com/channels/1006923006964154428/1101213808627830794/1217112918857089187). They have 170 million documents, so reindexing this amount would be problematic. --- The image is available by using the following Docker command. You can see the advancement of the image's build [on the GitHub CI page](https://github.com/meilisearch/meilisearch/actions/runs/8251688778). ``` docker pull getmeili/meilisearch:prototype-no-reindex-unknown-fields-0 ``` Here is the hand-made test that shows that when modifying unknown filterable attributes, here `lol`, it doesn't reindex. However, when modifying the known `genre` field, it does reindex. You can see all that by looking at the time spent processing the update. ```json { "uid": 3, "indexUid": "movies", "status": "succeeded", "type": "settingsUpdate", "canceledBy": null, "details": { "filterableAttributes": [ "genres" ] }, "error": null, "duration": "PT9.237703S", "enqueuedAt": "2024-03-12T15:34:26.836083Z", "startedAt": "2024-03-12T15:34:26.836374Z", "finishedAt": "2024-03-12T15:34:36.074077Z" }, { "uid": 2, "indexUid": "movies", "status": "succeeded", "type": "settingsUpdate", "canceledBy": null, "details": { "filterableAttributes": [ "lol" ] }, "error": null, "duration": "PT0.000751S", "enqueuedAt": "2024-03-12T15:33:53.563923Z", "startedAt": "2024-03-12T15:33:53.565259Z", "finishedAt": "2024-03-12T15:33:53.56601Z" }, { "uid": 0, "indexUid": "movies", "status": "succeeded", "type": "documentAdditionOrUpdate", "canceledBy": null, "details": { "receivedDocuments": 31944, "indexedDocuments": 31944 }, "error": null, "duration": "PT3.120723S", "enqueuedAt": "2024-02-17T10:35:55.042864Z", "startedAt": "2024-02-17T10:35:55.043505Z", "finishedAt": "2024-02-17T10:35:58.164228Z" } ``` Co-authored-by: Clément Renault <clement@meilisearch.com>	2024-03-13 16:23:32 +00:00
meili-bors[bot]	1d8c13f595	Merge #4487 4487: Update version for the next release (v1.7.1) in Cargo.toml r=Kerollmops a=meili-bot ⚠️ This PR is automatically generated. Check the new version is the expected one and Cargo.lock has been updated before merging. Co-authored-by: Kerollmops <Kerollmops@users.noreply.github.com>	2024-03-13 15:41:10 +00:00
Kerollmops	7f3c495f5c	Update version for the next release (v1.7.1) in Cargo.toml	2024-03-13 14:49:21 +00:00
Clément Renault	ca4876fd10	Do not reindex when modifying unknown faceted field	2024-03-12 16:18:58 +01:00
meili-bors[bot]	ee3076d5ba	Merge #4462 4462: Divide threshold by ten r=dureuill a=ManyTheFish Change the facet incremental vs bulk indexing threshold to better fit our user needs, it might be changed in the future if we have more insights Co-authored-by: ManyTheFish <many@meilisearch.com>	2024-03-06 13:05:38 +00:00
meili-bors[bot]	ab1224bfa7	Merge #4458 4458: Replace logging timer by spans r=Kerollmops a=dureuill - Remove logging timer dependency. - Remplace last uses in search by spans Co-authored-by: Louis Dureuil <louis@meilisearch.com>	2024-03-05 16:43:23 +00:00
meili-bors[bot]	eefc1c421e	Merge #4459 4459: Put a bound on OpenAI timeout r=dureuill a=dureuill # Pull Request ## Related issue Fixes #4460 ## What does this PR do? - Makes sure that the timeout of the openai embedder is limited to max 1min, rather than the prior 15min+ Co-authored-by: Louis Dureuil <louis@meilisearch.com>	2024-03-05 15:18:51 +00:00
meili-bors[bot]	4d42a7af7c	Merge #4445 4445: Add subcommand to run benchmarks r=irevoire a=dureuill # Pull Request ## Related issue Not user-facing, no issue ## What does this PR do? - Adds a new `cargo xtask bench` subcommand that can run one or multiple workload files and report the results to a server - A workload file is a JSON file with a specific schema - Refactor our use of the `vergen` crate: - update to the beta `vergen-git2` crate - VERGEN_GIT_SEMVER_LIGHTWEIGHT => VERGEN_GIT_DESCRIBE - factor logic in a single `build-info` crate that is used both by meilisearch and xtask (prevents vergen variables from overriding themselves) - checked that defining the variables by hand when no git repo is available (docker build case) still works. - Add CI to run `cargo xtask bench` Co-authored-by: Louis Dureuil <louis@meilisearch.com>	2024-03-05 14:03:57 +00:00
Louis Dureuil	7408db2a46	Meilisearch: fix date formatting	2024-03-05 14:56:48 +01:00
Louis Dureuil	663629a9d6	Remove unused build dependency from xtask Co-authored-by: Tamo <tamo@meilisearch.com>	2024-03-05 14:45:06 +01:00
Louis Dureuil	15c38dca78	Output RFC 3339 dates where we can Co-authored-by: Tamo <tamo@meilisearch.com>	2024-03-05 14:44:48 +01:00
Louis Dureuil	7ee20b0895	Refactor xtask bench	2024-03-05 14:42:06 +01:00
Louis Dureuil	0c216048b5	Cap timeout duration	2024-03-05 12:19:25 +01:00
Louis Dureuil	36d17110d8	openai: Handle BAD_GETAWAY, be more resilient to failure	2024-03-05 12:18:54 +01:00
meili-bors[bot]	bdd428c22e	Merge #4450 4450: Add the content type in the webhook + improve the test r=Kerollmops a=irevoire # Pull Request ## Related issue Fixes https://github.com/meilisearch/meilisearch/issues/4436 ## What does this PR do? - Specify the content type of the webhook - Ensure it’s the case in the test Co-authored-by: Tamo <tamo@meilisearch.com>	2024-03-05 10:36:53 +00:00
Tamo	b130917933	add the content type in the webhook + improve the test	2024-03-05 11:22:29 +01:00
Louis Dureuil	25f64ce7df	Replace logging timer by spans	2024-03-05 11:05:42 +01:00
Louis Dureuil	adcd848809	CI: Add bench workflows	2024-03-05 11:02:05 +01:00
Louis Dureuil	eee46b7537	Add first workloads	2024-03-05 10:13:11 +01:00
Louis Dureuil	55f60a3638	Update .gitignore - Ignore `/bench` directory for git purposes - Ignore benchmark DB	2024-03-05 10:12:52 +01:00
Louis Dureuil	c608b3f9b5	Factor vergen stuff to a build-info crate	2024-03-05 10:11:43 +01:00
Louis Dureuil	86ce843f3d	Add cargo xtask bench	2024-03-05 10:11:43 +01:00
Louis Dureuil	b11df7ec34	Meilisearch: fix some wrong spans	2024-03-05 10:11:43 +01:00
Louis Dureuil	6862caef64	Span Stats compute self-time	2024-03-05 10:11:43 +01:00
Louis Dureuil	f75c7ac979	Compile xtask in --release	2024-03-05 10:11:43 +01:00
ManyTheFish	eada6de261	Divide threshold by ten	2024-03-04 18:02:54 +01:00
meili-bors[bot]	f4a6261dea	Merge #4453 4453: Don't test on nightly r=dureuill a=dureuill # Pull Request ## Related issue Fixes #4441 better 😅 ## What does this PR do? - No longer run tests on nightly The motivation for this change is that we are now updating Rust at fixed points in time, and so no longer need nightly runs to ensure that a change won't get into stable and break our build at the worst possible moment. Co-authored-by: Louis Dureuil <louis@meilisearch.com>	2024-02-29 14:41:59 +00:00
Louis Dureuil	9806a3e5f6	Don't test on nightly	2024-02-29 14:24:50 +01:00
meili-bors[bot]	a96b45dda7	Merge #4451 4451: Fix nightly build r=dureuill a=dureuill # Pull Request ## Related issue Fixes #4441 ## What does this PR do? - Change imports following https://github.com/rust-lang/rust/pull/117772 ## Note This one is going to be annoying a bit until the lint stabilizes: - We only get the warning on nightly, so we will discover them when it runs in the CI that uses the nightly compiler (not on regular PRs) - There's the case of `TryInto`/`TryFrom` traits. They have been added to the prelude in Rust edition 2021, so it means that `use`ing them is a warning on nightly for 2021 edition crates (most crates), but not `use`ing them is an error anywhere for 2018 Rust edition crates, such as `milli` Co-authored-by: Louis Dureuil <louis@meilisearch.com>	2024-02-29 07:20:22 +00:00
Louis Dureuil	452a343a2b	Fix imports	2024-02-28 18:09:40 +01:00
meili-bors[bot]	b87485e80d	Merge #4433 4433: Enhance facet incremental r=Kerollmops a=ManyTheFish # Pull Request ## Related issue Fixes #4367 Fixes #4409 ## What does this PR do? - Add a test reproducing #4409 - Fix #4409 by removing a document from a level only if it is no more present in all the linked sub-level nodes - Optimize facet Incremental indexing by creating or deleting a complete level once per field id instead of for each facet value - Optimize facet Incremental indexing by doing the additions and the deletions in the same process instead of doing them separately Co-authored-by: ManyTheFish <many@meilisearch.com>	2024-02-28 15:28:46 +00:00
meili-bors[bot]	147a67dc82	Merge #4446 4446: Do not omit vectors when importing a dump r=irevoire a=dureuill # Pull Request ## Related issue Fixes #4447 ## What does this PR do? - Correctly populate the maps of embedders before starting the indexing operations, while importing a dump Co-authored-by: Louis Dureuil <louis@meilisearch.com>	2024-02-27 09:11:00 +00:00
Louis Dureuil	716ffc07ee	Build the embedders when importing a dump	2024-02-26 22:15:57 +01:00
meili-bors[bot]	b005eb3289	Merge #4435 4435: Make update file deletion atomic r=Kerollmops a=irevoire # Pull Request ## Related issue Fixes https://github.com/meilisearch/meilisearch/issues/4432 Fixes https://github.com/meilisearch/meilisearch/issues/4438 by adding the logs the user asked ## What does this PR do? - Adds a bunch of logs to help debug this kind of issue in the future - Delete the update files AFTER committing the update in the `index-scheduler` (thus, if a restart happens, we are able to re-process the batch successfully) - Multi-thread the deletion of all update files. Co-authored-by: Tamo <tamo@meilisearch.com>	2024-02-26 17:54:40 +00:00
meili-bors[bot]	9e664d87eb	Merge #4443 4443: Add GPU analytics r=dureuill a=dureuill # Pull Request ## Related issue Adds analytics indicating whether Meilisearch was compiled with the `milli/cuda` feature. Cc `@macraig` Co-authored-by: Louis Dureuil <louis@meilisearch.com>	2024-02-26 17:13:45 +00:00
meili-bors[bot]	6dcb5219a0	Merge #4442 4442: Send custom task r=ManyTheFish a=irevoire This PR has already been merged on main but was supposed to be merged on `release-v1.7.0` thus we need to merge it a second time; sorry 😓 ### This PR implements the necessary parameters for the High Availability Introduce a new CLI flag called `--experimental-replication-parameters` that changes a few behaviors in the engine: - [The auto-deletion of tasks is disabled](https://specs.meilisearch.com/specifications/text/0060-tasks-api.html#_2-technical-details) - Upon registering a task, you can choose its task ID by sending a new header: `TaskId: 456645`. It must be a valid number, which must be superior to the last task id ever seen. - Add the ability to « dry-register » a task. That means meilisearch will answer to you with a valid task ID like everything went well, but won’t actually write anything in the database. To do that, you need to use the `DryRun: true` header. - Specification’s here: https://github.com/meilisearch/specifications/pull/266 Co-authored-by: Tamo <tamo@meilisearch.com>	2024-02-26 15:20:16 +00:00
ManyTheFish	5e83bac448	Fix PR comments	2024-02-26 15:40:15 +01:00
Tamo	0562818c2a	fix and remove the file-store hack of /dev/null	2024-02-26 13:59:41 +01:00
Tamo	a478392b7a	create a test with the dry-run parameter enabled	2024-02-26 13:59:41 +01:00
Tamo	bbf3fb88ca	rename the cli parameter	2024-02-26 13:59:40 +01:00
Tamo	60510e037b	update the discussion link	2024-02-26 13:58:04 +01:00
Tamo	36c27a18a1	implement the dry run ha parameter	2024-02-26 13:58:04 +01:00
Tamo	1eb1c043b5	disable the auto deletion of tasks when the ha mode is enabled	2024-02-26 13:58:04 +01:00
Tamo	507739bd98	add an experimental cli parameter to allow specifying your task id	2024-02-26 13:58:03 +01:00
Tamo	eb25b07390	let you specify your task id	2024-02-26 13:56:31 +01:00
Tamo	066a7a3cde	takes only one read transaction per thread	2024-02-26 10:43:04 +01:00
Louis Dureuil	55796406c5	Add GPU analytics	2024-02-26 10:41:47 +01:00
Tamo	91cdd502f8	When processing tasks, make the update file deletion atomic	2024-02-22 14:56:22 +01:00
ManyTheFish	a493a50825	Fix clippy	2024-02-22 14:53:33 +01:00
ManyTheFish	9d1f489a37	Fix facet incremental indexing	2024-02-21 18:42:16 +01:00
ManyTheFish	865b415b3f	Add test rerpoducing bug	2024-02-15 16:00:48 +01:00