Use Language allow list in the highlighter

Change indexing threshold
Fix clippy errors
2025-07-21 22:00:59 +00:00 · 2023-03-08 12:44:16 +01:00 · 2023-03-08 12:36:04 +01:00 · 2023-03-08 10:53:42 +01:00 · 2023-03-07 19:38:01 +01:00 · 2023-03-07 18:35:26 +01:00
35 changed files with 439 additions and 1682 deletions
--- a/.github/workflows/create-issue-dependencies.yml
+++ b/.github/workflows/create-issue-dependencies.yml
@ -0,0 +1,28 @@
+name: Create issue to upgrade dependencies
+on:
+  schedule:
+    # Run the first of the month, every 3 month
+    - cron: '0 0 1 */3 *'
+  workflow_dispatch:
+
+jobs:
+  create-issue:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v3
+    - name: Create an issue
+      uses: actions-ecosystem/action-create-issue@v1
+      with:
+        github_token: ${{ secrets.MEILI_BOT_GH_PAT }}
+        title: Upgrade dependencies
+        body: |
+          This issue is about updating Meilisearch dependencies:
+          - [ ] Cargo toml dependencies of Meilisearch; but also the main engine-team repositories that Meilisearch depends on (charabia, heed...)
+          - [ ] If new Rust versions have been released, update the Rust version in the Clippy job of this [GitHub Action file](./.github/workflows/rust.yml)
+
+          ⚠️ To avoid last minute bugs, this issue should only be done at the beginning of the sprint!
+
+          The GitHub action dependencies are managed by [Dependabot](./.github/dependabot.yml)
+        labels: |
+          dependencies
+          maintenance
--- a/.github/workflows/dependency-issue.yml
+++ b/.github/workflows/dependency-issue.yml
@ -1,24 +0,0 @@
-name: Create issue to upgrade dependencies
-
-on:
-  schedule:
-    # Run the first of the month, every 3 month
-    - cron: '0 0 1 */3 *'
-  workflow_dispatch:
-
-jobs:
-  create-issue:
-    runs-on: ubuntu-latest
-    env:
-      ISSUE_TEMPLATE: issue-template.md
-      GH_TOKEN: ${{ secrets.MEILI_BOT_GH_PAT }}
-    steps:
-    - uses: actions/checkout@v3
-    - name: Download the issue template
-      run: curl -s https://raw.githubusercontent.com/meilisearch/engine-team/main/issue-templates/dependency-issue.md > $ISSUE_TEMPLATE
-    - name: Create issue
-      run: |
-        gh issue create \
-          --title 'Upgrade dependencies' \
-          --label 'dependencies,maintenance' \
-          --body-file $ISSUE_TEMPLATE
--- a/.github/workflows/flaky-tests.yml
+++ b/.github/workflows/flaky-tests.yml
--- a/.github/workflows/manual_benchmarks.yml
+++ b/.github/workflows/manual_benchmarks.yml
@ -1,4 +1,4 @@
-name: Benchmarks (manual)
+name: Benchmarks

 on:
  workflow_dispatch:
--- a/.github/workflows/publish-binaries.yml
+++ b/.github/workflows/publish-binaries.yml
@ -1,5 +1,3 @@
-name: Publish binaries to GitHub release
-
 on:
  workflow_dispatch:
  schedule:
@ -7,6 +5,8 @@ on:
  release:
    types: [published]

+name: Publish binaries to release
+
 jobs:
  check-version:
    name: Check the version validity
@ -54,7 +54,7 @@ jobs:
    # No need to upload binaries for dry run (cron)
    - name: Upload binaries to release
      if: github.event_name == 'release'
-      uses: svenstaro/upload-release-action@2.5.0
+      uses: svenstaro/upload-release-action@2.4.0
      with:
        repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
        file: target/release/meilisearch
@ -87,7 +87,7 @@ jobs:
    # No need to upload binaries for dry run (cron)
    - name: Upload binaries to release
      if: github.event_name == 'release'
-      uses: svenstaro/upload-release-action@2.5.0
+      uses: svenstaro/upload-release-action@2.4.0
      with:
        repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
        file: target/release/${{ matrix.artifact_name }}
@ -123,7 +123,7 @@ jobs:
      - name: Upload the binary to release
        # No need to upload binaries for dry run (cron)
        if: github.event_name == 'release'
-        uses: svenstaro/upload-release-action@2.5.0
+        uses: svenstaro/upload-release-action@2.4.0
        with:
          repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
          file: target/${{ matrix.target }}/release/meilisearch
@ -183,7 +183,7 @@ jobs:
      - name: Upload the binary to release
        # No need to upload binaries for dry run (cron)
        if: github.event_name == 'release'
-        uses: svenstaro/upload-release-action@2.5.0
+        uses: svenstaro/upload-release-action@2.4.0
        with:
          repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
          file: target/${{ matrix.target }}/release/meilisearch
--- a/.github/workflows/publish-deb-brew-pkg.yml
+++ b/.github/workflows/publish-deb-brew-pkg.yml
@ -1,4 +1,4 @@
-name: Publish to APT & Homebrew
+name: Publish to APT repository & Homebrew

 on:
  release:
@ -35,7 +35,7 @@ jobs:
    - name: Build deb package
      run: cargo deb -p meilisearch -o target/debian/meilisearch.deb
    - name: Upload debian pkg to release
-      uses: svenstaro/upload-release-action@2.5.0
+      uses: svenstaro/upload-release-action@2.4.0
      with:
        repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
        file: target/debian/meilisearch.deb
--- a/.github/workflows/publish-docker-images.yml
+++ b/.github/workflows/publish-docker-images.yml
@ -1,5 +1,4 @@
-name: Publish images to Docker Hub
-
+---
 on:
  push:
    # Will run for every tag pushed except `latest`
@ -13,6 +12,8 @@ on:
    - cron: '0 23 * * *' # Every day at 11:00pm
  workflow_dispatch:

+name: Publish tagged images to Docker Hub
+
 jobs:
  docker:
    runs-on: docker
--- a/.github/workflows/push_benchmarks_indexing.yml
+++ b/.github/workflows/push_benchmarks_indexing.yml
@ -1,4 +1,4 @@
-name: Benchmarks of indexing (push)
+name: Benchmarks indexing (push)

 on:
  push:
--- a/.github/workflows/push_benchmarks_search_geo.yml
+++ b/.github/workflows/push_benchmarks_search_geo.yml
@ -1,4 +1,4 @@
-name: Benchmarks of search for geo (push)
+name: Benchmarks search geo (push)

 on:
  push:
--- a/.github/workflows/push_benchmarks_search_songs.yml
+++ b/.github/workflows/push_benchmarks_search_songs.yml
@ -1,4 +1,4 @@
-name: Benchmarks of search for songs (push)
+name: Benchmarks search songs (push)

 on:
  push:
--- a/.github/workflows/push_benchmarks_search_wiki.yml
+++ b/.github/workflows/push_benchmarks_search_wiki.yml
@ -1,4 +1,4 @@
-name: Benchmarks of search for Wikipedia articles (push)
+name: Benchmarks search wikipedia articles (push)

 on:
  push:
--- a/.github/workflows/test-suite.yml
+++ b/.github/workflows/test-suite.yml
@ -1,4 +1,4 @@
-name: Test suite
+name: Rust

 on:
  workflow_dispatch:
@ -25,35 +25,36 @@ jobs:
      # Use ubuntu-18.04 to compile with glibc 2.27, which are the production expectations
      image: ubuntu:18.04
    steps:
-      - uses: actions/checkout@v3
-      - name: Install needed dependencies
-        run: |
-          apt-get update && apt-get install -y curl
-          apt-get install build-essential -y
-      - name: Run test with Rust stable
-        if: github.event_name != 'schedule'
-        uses: actions-rs/toolchain@v1
-        with:
-          toolchain: stable
-          override: true
-      - name: Run test with Rust nightly
-        if: github.event_name == 'schedule'
-        uses: actions-rs/toolchain@v1
-        with:
-          toolchain: nightly
-          override: true
-      - name: Cache dependencies
-        uses: Swatinem/rust-cache@v2.2.0
-      - name: Run cargo check without any default features
-        uses: actions-rs/cargo@v1
-        with:
-          command: build
-          args: --locked --release --no-default-features --all
-      - name: Run cargo test
-        uses: actions-rs/cargo@v1
-        with:
-          command: test
-          args: --locked --release --all
+    - uses: actions/checkout@v3
+    - name: Install needed dependencies
+      run: |
+        apt-get update && apt-get install -y curl
+        apt-get install build-essential -y
+    - name: Run test with Rust stable
+      if: github.event_name != 'schedule'
+      uses: actions-rs/toolchain@v1
+      with:
+        toolchain: stable
+        override: true
+    - name: Run test with Rust nightly
+      if: github.event_name == 'schedule'
+      uses: actions-rs/toolchain@v1
+      with:
+        toolchain: nightly
+        override: true
+    # Disable cache due to disk space issues with Windows workers in CI
+    # - name: Cache dependencies
+    #   uses: Swatinem/rust-cache@v2.2.0
+    - name: Run cargo check without any default features
+      uses: actions-rs/cargo@v1
+      with:
+        command: build
+        args: --locked --release --no-default-features --all
+    - name: Run cargo test
+      uses: actions-rs/cargo@v1
+      with:
+        command: test
+        args: --locked --release --all

  test-others:
    name: Tests on ${{ matrix.os }}
@ -63,47 +64,19 @@ jobs:
      matrix:
        os: [macos-12, windows-2022]
    steps:
-      - uses: actions/checkout@v3
-      - name: Cache dependencies
-        uses: Swatinem/rust-cache@v2.2.0
-      - name: Run cargo check without any default features
-        uses: actions-rs/cargo@v1
-        with:
-          command: build
-          args: --locked --release --no-default-features --all
-      - name: Run cargo test
-        uses: actions-rs/cargo@v1
-        with:
-          command: test
-          args: --locked --release --all
-
-  test-all-features:
-    name: Tests all features on cron schedule only
-    runs-on: ubuntu-latest
-    container:
-      # Use ubuntu-18.04 to compile with glibc 2.27, which are the production expectations
-      image: ubuntu:18.04
-    if: github.event_name == 'schedule'
-    steps:
-      - uses: actions/checkout@v3
-      - name: Install needed dependencies
-        run: |
-          apt-get update
-          apt-get install --assume-yes build-essential curl
-      - uses: actions-rs/toolchain@v1
-        with:
-          toolchain: stable
-          override: true
-      - name: Run cargo build with all features
-        uses: actions-rs/cargo@v1
-        with:
-          command: build
-          args: --workspace --locked --release --all-features
-      - name: Run cargo test with all features
-        uses: actions-rs/cargo@v1
-        with:
-          command: test
-          args: --workspace --locked --release --all-features
+    - uses: actions/checkout@v3
+#     - name: Cache dependencies
+#       uses: Swatinem/rust-cache@v2.2.0
+    - name: Run cargo check without any default features
+      uses: actions-rs/cargo@v1
+      with:
+        command: build
+        args: --locked --release --no-default-features --all
+    - name: Run cargo test
+      uses: actions-rs/cargo@v1
+      with:
+        command: test
+        args: --locked --release --all

  # We run tests in debug also, to make sure that the debug_assertions are hit
  test-debug:
@ -122,8 +95,8 @@ jobs:
        with:
          toolchain: stable
          override: true
-      - name: Cache dependencies
-        uses: Swatinem/rust-cache@v2.2.0
+      # - name: Cache dependencies
+      #   uses: Swatinem/rust-cache@v2.2.0
      - name: Run tests in debug
        uses: actions-rs/cargo@v1
        with:
@ -141,8 +114,8 @@ jobs:
          toolchain: 1.67.0
          override: true
          components: clippy
-      - name: Cache dependencies
-        uses: Swatinem/rust-cache@v2.2.0
+      # - name: Cache dependencies
+      #   uses: Swatinem/rust-cache@v2.2.0
      - name: Run cargo clippy
        uses: actions-rs/cargo@v1
        with:
@ -161,8 +134,8 @@ jobs:
          toolchain: nightly
          override: true
          components: rustfmt
-      - name: Cache dependencies
-        uses: Swatinem/rust-cache@v2.2.0
+      # - name: Cache dependencies
+      #   uses: Swatinem/rust-cache@v2.2.0
      - name: Run cargo fmt
        # Since we never ran the `build.rs` script in the benchmark directory we are missing one auto-generated import file.
        # Since we want to trigger (and fail) this action as fast as possible, instead of building the benchmark crate
--- a/.github/workflows/uffizzi-build.yml
+++ b/.github/workflows/uffizzi-build.yml
@ -23,7 +23,7 @@ jobs:
          target: x86_64-unknown-linux-musl

      - name: Cache dependencies
-        uses: Swatinem/rust-cache@v2.2.1
+        uses: Swatinem/rust-cache@v2.2.0

      - name: Run cargo check without any default features
        uses: actions-rs/cargo@v1
@ -46,14 +46,14 @@ jobs:

      - name: Docker metadata
        id: meta
-        uses: docker/metadata-action@v4
+        uses: docker/metadata-action@v3
        with:
          images: registry.uffizzi.com/${{ env.UUID_TAG }}
          tags: | 
            type=raw,value=60d

      - name: Build Image
-        uses: docker/build-push-action@v4
+        uses: docker/build-push-action@v3
        with:
          context: ./
          file: .github/uffizzi/Dockerfile
--- a/Cargo.lock
+++ b/Cargo.lock
@ -252,7 +252,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9e8b47f52ea9bae42228d07ec09eb676433d7c4ed1ebdf0f1d1c29ed446f1ab8"
 dependencies = [
 "cfg-if",
- "cipher 0.3.0",
+ "cipher",
 "cpufeatures",
 "opaque-debug",
 ]
@ -523,17 +523,6 @@ version = "3.11.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "572f695136211188308f16ad2ca5c851a712c464060ae6974944458eb83880ba"

-[[package]]
-name = "bus"
-version = "2.3.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "80cb4625f5b60155ff1018c9d4ce2e38bf5ae3e5780dfab9fa68bb44a6b751e2"
-dependencies = [
- "crossbeam-channel",
- "num_cpus",
- "parking_lot_core",
-]
-
 [[package]]
 name = "byte-unit"
 version = "4.0.18"
@ -652,17 +641,6 @@ version = "1.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"

-[[package]]
-name = "chacha20"
-version = "0.9.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c7fc89c7c5b9e7a02dfe45cd2367bae382f9ed31c61ca8debe5f827c420a2f08"
-dependencies = [
- "cfg-if",
- "cipher 0.4.4",
- "cpufeatures",
-]
-
 [[package]]
 name = "change-detection"
 version = "1.2.0"
@ -734,16 +712,6 @@ dependencies = [
 "generic-array",
 ]

-[[package]]
-name = "cipher"
-version = "0.4.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "773f3b9af64447d2ce9850330c473515014aa235e6a783b02db81ff39e4a3dad"
-dependencies = [
- "crypto-common",
- "inout",
-]
-
 [[package]]
 name = "clap"
 version = "3.2.23"
@ -802,24 +770,6 @@ dependencies = [
 "os_str_bytes",
 ]

-[[package]]
-name = "cluster"
-version = "1.1.0"
-dependencies = [
- "bus",
- "crossbeam",
- "ductile",
- "log",
- "meilisearch-types",
- "roaring",
- "serde",
- "serde_json",
- "synchronoise",
- "thiserror",
- "time",
- "uuid 1.3.0",
-]
-
 [[package]]
 name = "concat-arrays"
 version = "0.1.2"
@ -1198,21 +1148,6 @@ dependencies = [
 "winapi",
 ]

-[[package]]
-name = "ductile"
-version = "0.3.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "12cde25956886749c891a27249630ae99471f1ba05c4a924aad1a6ffe6932812"
-dependencies = [
- "anyhow",
- "bincode",
- "chacha20",
- "crossbeam-channel",
- "log",
- "rand",
- "serde",
-]
-
 [[package]]
 name = "dump"
 version = "1.1.0"
@ -1235,14 +1170,14 @@ dependencies = [
 "tempfile",
 "thiserror",
 "time",
- "uuid 1.3.0",
+ "uuid 1.2.2",
 ]

 [[package]]
 name = "either"
-version = "1.8.1"
+version = "1.8.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7fcaabb2fef8c910e7f4c7ce9f67a1283a1715879a7c230ca9d6d1ae31f16d91"
+checksum = "90e5c1c8368803113bf0c9584fc495a58b86dc8a29edbf8fe877d21d9507e797"
 dependencies = [
 "serde",
 ]
@ -1441,7 +1376,7 @@ dependencies = [
 "faux",
 "tempfile",
 "thiserror",
- "uuid 1.3.0",
+ "uuid 1.2.2",
 ]

 [[package]]
@ -1960,7 +1895,6 @@ dependencies = [
 "anyhow",
 "big_s",
 "bincode",
- "cluster",
 "crossbeam",
 "csv",
 "derive_builder",
@ -1981,7 +1915,7 @@ dependencies = [
 "tempfile",
 "thiserror",
 "time",
- "uuid 1.3.0",
+ "uuid 1.2.2",
 ]

 [[package]]
@ -1995,15 +1929,6 @@ dependencies = [
 "serde",
 ]

-[[package]]
-name = "inout"
-version = "0.1.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a0c10553d664a4d0bcff9f4215d0aac67a639cc68ef660840afe309b807bc9f5"
-dependencies = [
- "generic-array",
-]
-
 [[package]]
 name = "insta"
 version = "1.26.0"
@ -2548,7 +2473,6 @@ dependencies = [
 "bytes",
 "cargo_toml",
 "clap 4.0.32",
- "cluster",
 "crossbeam-channel",
 "deserr",
 "dump",
@ -2609,7 +2533,7 @@ dependencies = [
 "tokio-stream",
 "toml",
 "urlencoding",
- "uuid 1.3.0",
+ "uuid 1.2.2",
 "vergen",
 "walkdir",
 "yaup",
@ -2621,7 +2545,6 @@ name = "meilisearch-auth"
 version = "1.1.0"
 dependencies = [
 "base64 0.13.1",
- "cluster",
 "enum-iterator",
 "hmac",
 "maplit",
@ -2633,7 +2556,7 @@ dependencies = [
 "sha2",
 "thiserror",
 "time",
- "uuid 1.3.0",
+ "uuid 1.2.2",
 ]

 [[package]]
@ -2663,7 +2586,7 @@ dependencies = [
 "thiserror",
 "time",
 "tokio",
- "uuid 1.3.0",
+ "uuid 1.2.2",
 ]

 [[package]]
@ -2738,7 +2661,7 @@ dependencies = [
 "tempfile",
 "thiserror",
 "time",
- "uuid 1.3.0",
+ "uuid 1.2.2",
 ]

 [[package]]
@ -3576,9 +3499,9 @@ checksum = "58bc9567378fc7690d6b2addae4e60ac2eeea07becb2c64b9f218b53865cba2a"

 [[package]]
 name = "serde"
-version = "1.0.155"
+version = "1.0.152"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "71f2b4817415c6d4210bfe1c7bfcf4801b2d904cb4d0e1a8fdb651013c9e86b8"
+checksum = "bb7d1f0d3021d347a83e556fc4683dea2ea09d87bccdf88ff5c12545d89d5efb"
 dependencies = [
 "serde_derive",
 ]
@ -3594,9 +3517,9 @@ dependencies = [

 [[package]]
 name = "serde_derive"
-version = "1.0.155"
+version = "1.0.152"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d071a94a3fac4aff69d023a7f411e33f40f3483f8c5190b1953822b6b76d7630"
+checksum = "af487d118eecd09402d70a5d72551860e788df87b464af30e5ea6a38c75c541e"
 dependencies = [
 "proc-macro2",
 "quote",
@ -3605,9 +3528,9 @@ dependencies = [

 [[package]]
 name = "serde_json"
-version = "1.0.94"
+version = "1.0.91"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1c533a59c9d8a93a09c6ab31f0fd5e5f4dd1b8fc9434804029839884765d04ea"
+checksum = "877c235533714907a8c2464236f5c4b2a17262ef1bd71f38f35ea592c8da6883"
 dependencies = [
 "indexmap",
 "itoa 1.0.5",
@ -3893,18 +3816,18 @@ checksum = "222a222a5bfe1bba4a77b45ec488a741b3cb8872e5e499451fd7d0129c9c7c3d"

 [[package]]
 name = "thiserror"
-version = "1.0.39"
+version = "1.0.38"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a5ab016db510546d856297882807df8da66a16fb8c4101cb8b30054b0d5b2d9c"
+checksum = "6a9cd18aa97d5c45c6603caea1da6628790b37f7a34b6ca89522331c5180fed0"
 dependencies = [
 "thiserror-impl",
 ]

 [[package]]
 name = "thiserror-impl"
-version = "1.0.39"
+version = "1.0.38"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5420d42e90af0c38c3290abcca25b9b3bdf379fc9f55c528f53a269d9c9a267e"
+checksum = "1fb327af4685e4d03fa8cbcf1716380da910eeb2bb8be417e7f9fd3fb164f36f"
 dependencies = [
 "proc-macro2",
 "quote",
@ -3913,9 +3836,9 @@ dependencies = [

 [[package]]
 name = "time"
-version = "0.3.20"
+version = "0.3.17"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cd0cbfecb4d19b5ea75bb31ad904eb5b9fa13f21079c3b92017ebdf4999a5890"
+checksum = "a561bf4617eebd33bca6434b988f39ed798e527f51a1e797d0ee4f61c0a38376"
 dependencies = [
 "itoa 1.0.5",
 "serde",
@ -3931,9 +3854,9 @@ checksum = "2e153e1f1acaef8acc537e68b44906d2db6436e2b35ac2c6b42640fff91f00fd"

 [[package]]
 name = "time-macros"
-version = "0.2.8"
+version = "0.2.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fd80a657e71da814b8e5d60d3374fc6d35045062245d80224748ae522dd76f36"
+checksum = "d967f99f534ca7e495c575c62638eebc2898a8c84c119b89e250477bc4ba16b2"
 dependencies = [
 "time-core",
 ]
@ -4178,9 +4101,9 @@ dependencies = [

 [[package]]
 name = "uuid"
-version = "1.3.0"
+version = "1.2.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1674845326ee10d37ca60470760d4288a6f80f304007d92e5c53bab78c9cfd79"
+checksum = "422ee0de9031b5b948b97a8fc04e3aa35230001a722ddd27943e0be31564ce4c"
 dependencies = [
 "getrandom",
 "serde",
--- a/Cargo.toml
+++ b/Cargo.toml
@ -9,7 +9,6 @@ members = [
    "dump",
    "file-store",
    "permissive-json-pointer",
-    "cluster",
    "milli",
    "filter-parser",
    "flatten-serde-json",
--- a/cluster/Cargo.toml
+++ b/cluster/Cargo.toml
@ -1,25 +0,0 @@
-[package]
-name = "cluster"
-publish = false
-
-version.workspace = true
-authors.workspace = true
-description.workspace = true
-homepage.workspace = true
-readme.workspace = true
-edition.workspace = true
-license.workspace = true
-
-[dependencies]
-ductile = "0.3.0"
-serde = { version = "1.0.155", features = ["derive"] }
-serde_json = "1.0.94"
-thiserror = "1.0.39"
-meilisearch-types = { path = "../meilisearch-types" }
-roaring = { version = "0.10.1", features = ["serde"] }
-log = "0.4.17"
-crossbeam = "0.8.2"
-bus = "2.3.0"
-time = "0.3.20"
-uuid = { version = "1.3.0", features = ["v4"] }
-synchronoise = "1.0.1"
--- a/cluster/src/batch.rs
+++ b/cluster/src/batch.rs
@ -1,148 +0,0 @@
-use meilisearch_types::milli::update::IndexDocumentsMethod;
-use meilisearch_types::settings::{Settings, Unchecked};
-use meilisearch_types::tasks::TaskId;
-use roaring::RoaringBitmap;
-use serde::{Deserialize, Serialize};
-use time::OffsetDateTime;
-use uuid::Uuid;
-
-/// Represents a combination of tasks that can all be processed at the same time.
-///
-/// A batch contains the set of tasks that it represents (accessible through
-/// [`self.ids()`](Batch::ids)), as well as additional information on how to
-/// be processed.
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub enum Batch {
-    TaskCancelation {
-        /// The task cancelation itself.
-        task: TaskId,
-        /// The date and time at which the previously processing tasks started.
-        previous_started_at: OffsetDateTime,
-        /// The list of tasks that were processing when this task cancelation appeared.
-        previous_processing_tasks: RoaringBitmap,
-    },
-    TaskDeletion(TaskId),
-    SnapshotCreation(Vec<TaskId>),
-    Dump(TaskId),
-    IndexOperation {
-        op: IndexOperation,
-        must_create_index: bool,
-    },
-    IndexCreation {
-        index_uid: String,
-        primary_key: Option<String>,
-        task: TaskId,
-    },
-    IndexUpdate {
-        index_uid: String,
-        primary_key: Option<String>,
-        task: TaskId,
-    },
-    IndexDeletion {
-        index_uid: String,
-        tasks: Vec<TaskId>,
-        index_has_been_created: bool,
-    },
-    IndexSwap {
-        task: TaskId,
-    },
-}
-
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub enum DocumentOperation {
-    Add(Uuid),
-    Delete(Vec<String>),
-}
-
-/// A [batch](Batch) that combines multiple tasks operating on an index.
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub enum IndexOperation {
-    DocumentOperation {
-        index_uid: String,
-        primary_key: Option<String>,
-        method: IndexDocumentsMethod,
-        documents_counts: Vec<u64>,
-        operations: Vec<DocumentOperation>,
-        tasks: Vec<TaskId>,
-    },
-    DocumentDeletion {
-        index_uid: String,
-        // The vec associated with each document deletion tasks.
-        documents: Vec<Vec<String>>,
-        tasks: Vec<TaskId>,
-    },
-    DocumentClear {
-        index_uid: String,
-        tasks: Vec<TaskId>,
-    },
-    Settings {
-        index_uid: String,
-        // The boolean indicates if it's a settings deletion or creation.
-        settings: Vec<(bool, Settings<Unchecked>)>,
-        tasks: Vec<TaskId>,
-    },
-    DocumentClearAndSetting {
-        index_uid: String,
-        cleared_tasks: Vec<TaskId>,
-
-        // The boolean indicates if it's a settings deletion or creation.
-        settings: Vec<(bool, Settings<Unchecked>)>,
-        settings_tasks: Vec<TaskId>,
-    },
-    SettingsAndDocumentOperation {
-        index_uid: String,
-
-        primary_key: Option<String>,
-        method: IndexDocumentsMethod,
-        documents_counts: Vec<u64>,
-        operations: Vec<DocumentOperation>,
-        document_import_tasks: Vec<TaskId>,
-
-        // The boolean indicates if it's a settings deletion or creation.
-        settings: Vec<(bool, Settings<Unchecked>)>,
-        settings_tasks: Vec<TaskId>,
-    },
-}
-
-impl Batch {
-    pub fn ids(&self) -> impl Iterator<Item = TaskId> {
-        type Ret = Box<dyn Iterator<Item = TaskId>>;
-
-        match self {
-            Batch::TaskCancelation { task, .. } => Box::new(std::iter::once(*task)) as Ret,
-            Batch::TaskDeletion(task) => Box::new(std::iter::once(*task)) as Ret,
-            Batch::SnapshotCreation(tasks) => Box::new(tasks.clone().into_iter()) as Ret,
-            Batch::Dump(task) => Box::new(std::iter::once(*task)) as Ret,
-            Batch::IndexOperation { op, .. } => match op {
-                IndexOperation::DocumentOperation { tasks, .. } => {
-                    Box::new(tasks.clone().into_iter()) as Ret
-                }
-                IndexOperation::DocumentDeletion { tasks, .. } => {
-                    Box::new(tasks.clone().into_iter()) as Ret
-                }
-                IndexOperation::DocumentClear { tasks, .. } => {
-                    Box::new(tasks.clone().into_iter()) as Ret
-                }
-                IndexOperation::Settings { tasks, .. } => {
-                    Box::new(tasks.clone().into_iter()) as Ret
-                }
-                IndexOperation::DocumentClearAndSetting {
-                    cleared_tasks, settings_tasks, ..
-                } => {
-                    Box::new(cleared_tasks.clone().into_iter().chain(settings_tasks.clone())) as Ret
-                }
-                IndexOperation::SettingsAndDocumentOperation {
-                    document_import_tasks,
-                    settings_tasks,
-                    ..
-                } => Box::new(
-                    document_import_tasks.clone().into_iter().chain(settings_tasks.clone()),
-                ) as Ret,
-            },
-            Batch::IndexCreation { task, .. } => Box::new(std::iter::once(*task)) as Ret,
-            Batch::IndexUpdate { task, .. } => Box::new(std::iter::once(*task)) as Ret,
-            Batch::IndexDeletion { tasks, .. } => Box::new(tasks.clone().into_iter()) as Ret,
-            Batch::IndexSwap { task } => Box::new(std::iter::once(*task)) as Ret,
-        }
-    }
-}
--- a/cluster/src/leader.rs
+++ b/cluster/src/leader.rs
@ -1,276 +0,0 @@
-use std::net::ToSocketAddrs;
-use std::sync::atomic::{AtomicUsize, Ordering};
-use std::sync::{atomic, Arc, Mutex, RwLock};
-use std::time::Duration;
-
-use bus::{Bus, BusReader};
-use crossbeam::channel::{unbounded, Receiver, Sender};
-use ductile::{ChannelReceiver, ChannelSender, ChannelServer};
-use log::{info, warn};
-use meilisearch_types::keys::Key;
-use meilisearch_types::tasks::Task;
-use synchronoise::SignalEvent;
-use uuid::Uuid;
-
-use crate::batch::Batch;
-use crate::{ApiKeyOperation, Consistency, FollowerMsg, LeaderMsg};
-
-#[derive(Clone)]
-pub struct Leader {
-    task_ready_to_commit: Receiver<u32>,
-    broadcast_to_follower: Sender<LeaderMsg>,
-    needs_key_sender: Sender<Sender<Vec<Key>>>,
-    needs_key_receiver: Receiver<Sender<Vec<Key>>>,
-
-    pub wake_up: Arc<SignalEvent>,
-
-    new_followers: Arc<AtomicUsize>,
-    active_followers: Arc<AtomicUsize>,
-
-    batch_id: Arc<RwLock<u32>>,
-}
-
-impl Leader {
-    pub fn new(
-        listen_on: impl ToSocketAddrs + Send + 'static,
-        master_key: Option<String>,
-    ) -> Leader {
-        let new_followers = Arc::new(AtomicUsize::new(0));
-        let active_followers = Arc::new(AtomicUsize::new(1));
-        let wake_up = Arc::new(SignalEvent::auto(true));
-        let (broadcast_to_follower, process_batch_receiver) = unbounded();
-        let (task_finished_sender, task_finished_receiver) = unbounded();
-        let (needs_key_sender, needs_key_receiver) = unbounded();
-
-        let nf = new_followers.clone();
-        let af = active_followers.clone();
-        let wu = wake_up.clone();
-        std::thread::spawn(move || {
-            Self::listener(
-                listen_on,
-                master_key,
-                nf,
-                af,
-                wu,
-                process_batch_receiver,
-                task_finished_sender,
-            )
-        });
-
-        Leader {
-            task_ready_to_commit: task_finished_receiver,
-            broadcast_to_follower,
-            needs_key_sender,
-            needs_key_receiver,
-
-            wake_up,
-
-            new_followers,
-            active_followers,
-            batch_id: Arc::default(),
-        }
-    }
-
-    pub fn has_new_followers(&self) -> bool {
-        self.new_followers.load(Ordering::Relaxed) != 0
-    }
-
-    /// Takes all the necessary channels to chat with the scheduler and give them
-    /// to each new followers
-    fn listener(
-        listen_on: impl ToSocketAddrs,
-        master_key: Option<String>,
-        new_followers: Arc<AtomicUsize>,
-        active_followers: Arc<AtomicUsize>,
-        wake_up: Arc<SignalEvent>,
-        broadcast_to_follower: Receiver<LeaderMsg>,
-        task_finished: Sender<u32>,
-    ) {
-        let listener: ChannelServer<LeaderMsg, FollowerMsg> = if let Some(ref master_key) =
-            master_key
-        {
-            let mut enc = [0; 32];
-            let master_key = master_key.as_bytes();
-            if master_key.len() < 32 {
-                warn!("Master key is not secure, use a longer master key (at least 32 bytes long)");
-            }
-            enc.iter_mut().zip(master_key).for_each(|(enc, mk)| *enc = *mk);
-            info!("Listening with encryption enabled");
-            ChannelServer::bind_with_enc(listen_on, enc).unwrap()
-        } else {
-            ChannelServer::bind(listen_on).unwrap()
-        };
-
-        info!("Ready to the receive connections");
-
-        // We're going to broadcast all the batches to all our follower
-        let bus: Bus<LeaderMsg> = Bus::new(10);
-        let bus = Arc::new(Mutex::new(bus));
-        let b = bus.clone();
-
-        std::thread::spawn(move || loop {
-            let msg = broadcast_to_follower.recv().expect("Main thread is dead");
-            b.lock().unwrap().broadcast(msg);
-        });
-
-        for (sender, receiver, _addr) in listener {
-            let task_finished = task_finished.clone();
-            let nf = new_followers.clone();
-            let af = active_followers.clone();
-            let wu = wake_up.clone();
-
-            let process_batch = bus.lock().unwrap().add_rx();
-
-            std::thread::spawn(move || {
-                Self::follower(sender, receiver, nf, af, wu, process_batch, task_finished)
-            });
-        }
-    }
-
-    /// Allow a follower to chat with the scheduler
-    fn follower(
-        sender: ChannelSender<LeaderMsg>,
-        receiver: ChannelReceiver<FollowerMsg>,
-        new_followers: Arc<AtomicUsize>,
-        active_followers: Arc<AtomicUsize>,
-        wake_up: Arc<SignalEvent>,
-        mut broadcast_to_follower: BusReader<LeaderMsg>,
-        task_finished: Sender<u32>,
-    ) {
-        let size = new_followers.fetch_add(1, Ordering::Relaxed) + 1;
-        wake_up.signal();
-        info!("A new follower joined the cluster. {} members.", size);
-
-        loop {
-            if let msg @ LeaderMsg::JoinFromDump(_) =
-                broadcast_to_follower.recv().expect("Main thread died")
-            {
-                // we exit the new_follower state and become an active follower even though
-                // the dump will takes some time to index
-                new_followers.fetch_sub(1, Ordering::Relaxed);
-                let size = active_followers.fetch_add(1, Ordering::Relaxed) + 1;
-                info!("A new follower became active. {} active members.", size);
-
-                sender.send(msg).unwrap();
-                break;
-            }
-        }
-
-        // send messages to the follower
-        std::thread::spawn(move || loop {
-            let msg = broadcast_to_follower.recv().expect("Main thread died");
-            match msg {
-                LeaderMsg::JoinFromDump(_) => (),
-                msg => {
-                    if sender.send(msg).is_err() {
-                        // the follower died, the logging and cluster size update should be done
-                        // in the other thread
-                        break;
-                    }
-                }
-            }
-        });
-
-        // receive messages from the follower
-        loop {
-            match receiver.recv() {
-                Err(_) => break,
-                Ok(msg) => match msg {
-                    FollowerMsg::ReadyToCommit(id) => {
-                        task_finished.send(id).expect("Can't reach the main thread")
-                    }
-                    FollowerMsg::RegisterNewTask(_) => todo!(),
-                },
-            }
-        }
-
-        // if we exited from the previous loop it means the follower is down and should
-        // be removed from the cluster
-        let size = active_followers.fetch_sub(1, atomic::Ordering::Relaxed) - 1;
-        info!("A follower left the cluster. {} members.", size);
-    }
-
-    // ============= Everything related to the setup of the cluster
-    pub fn join_me(&self, dump: Vec<u8>) {
-        self.broadcast_to_follower
-            .send(LeaderMsg::JoinFromDump(dump))
-            .expect("Lost the link with the followers");
-    }
-
-    // ============= Everything related to the scheduler
-
-    pub fn starts_batch(&self, batch: Batch) {
-        let mut batch_id = self.batch_id.write().unwrap();
-
-        info!("Send the batch to process to the followers");
-        *batch_id += 1;
-
-        self.broadcast_to_follower
-            .send(LeaderMsg::StartBatch { id: *batch_id, batch })
-            .expect("Can't reach the cluster");
-    }
-
-    pub fn commit(&self, consistency_level: Consistency) {
-        info!("Wait until enough followers are ready to commit a batch");
-
-        let batch_id = self.batch_id.write().unwrap();
-
-        let mut nodes_ready_to_commit = 1;
-
-        loop {
-            let size = self.active_followers.load(atomic::Ordering::Relaxed);
-
-            info!("{nodes_ready_to_commit} nodes are ready to commit for a cluster size of {size}");
-            let all = nodes_ready_to_commit == size;
-
-            match consistency_level {
-                Consistency::One if nodes_ready_to_commit >= 1 || all => break,
-                Consistency::Two if nodes_ready_to_commit >= 2 || all => break,
-                Consistency::Quorum if nodes_ready_to_commit >= (size / 2) || all => break,
-                Consistency::All if all => break,
-                _ => (),
-            }
-
-            // we can't wait forever here because if a node dies the cluster size might get updated while we're stuck
-            match self.task_ready_to_commit.recv_timeout(Duration::new(1, 0)) {
-                Ok(id) if id == *batch_id => nodes_ready_to_commit += 1,
-                _ => continue,
-            };
-        }
-
-        info!("Tells all the follower to commit");
-
-        self.broadcast_to_follower.send(LeaderMsg::Commit(*batch_id)).unwrap();
-    }
-
-    pub fn register_new_task(&self, task: Task, update_file: Option<Vec<u8>>) {
-        info!("Tells all the follower to register a new task");
-        self.broadcast_to_follower
-            .send(LeaderMsg::RegisterNewTask { task, update_file })
-            .expect("Main thread is dead");
-    }
-
-    // ============= Everything related to the api-keys
-
-    pub fn insert_key(&self, key: Key) {
-        self.broadcast_to_follower
-            .send(LeaderMsg::ApiKeyOperation(ApiKeyOperation::Insert(key)))
-            .unwrap()
-    }
-
-    pub fn delete_key(&self, uuid: Uuid) {
-        self.broadcast_to_follower
-            .send(LeaderMsg::ApiKeyOperation(ApiKeyOperation::Delete(uuid)))
-            .unwrap()
-    }
-
-    pub fn needs_keys(&self) -> Sender<Vec<Key>> {
-        self.needs_key_receiver.recv().expect("The cluster is dead")
-    }
-
-    pub fn get_keys(&self) -> Vec<Key> {
-        let (send, rcv) = crossbeam::channel::bounded(1);
-        self.needs_key_sender.send(send).expect("The cluster is dead");
-        rcv.recv().expect("The auth controller is dead")
-    }
-}
--- a/cluster/src/lib.rs
+++ b/cluster/src/lib.rs
@ -1,231 +0,0 @@
-use std::net::ToSocketAddrs;
-use std::str::FromStr;
-use std::sync::{Arc, RwLock};
-
-use batch::Batch;
-use crossbeam::channel::{unbounded, Receiver, Sender};
-use ductile::{connect_channel, connect_channel_with_enc, ChannelReceiver, ChannelSender};
-use log::{info, warn};
-use meilisearch_types::keys::Key;
-use meilisearch_types::tasks::{KindWithContent, Task};
-use serde::{Deserialize, Serialize};
-
-pub mod batch;
-mod leader;
-
-pub use leader::Leader;
-use uuid::Uuid;
-
-#[derive(Debug, thiserror::Error)]
-pub enum Error {
-    #[error("Network issue occured")]
-    NetworkIssue,
-    #[error("Internal error: {0}")]
-    SerdeJson(#[from] serde_json::Error),
-}
-
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub enum LeaderMsg {
-    /// A dump to join the cluster
-    JoinFromDump(Vec<u8>),
-    /// Starts a new batch
-    StartBatch { id: u32, batch: Batch },
-    /// Tell the follower to commit the update asap
-    Commit(u32),
-    /// Tell the follower to commit the update asap
-    RegisterNewTask { task: Task, update_file: Option<Vec<u8>> },
-
-    /// Tell the follower to commit the update asap
-    ApiKeyOperation(ApiKeyOperation),
-}
-
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub enum FollowerMsg {
-    // Let the leader knows you're ready to commit
-    ReadyToCommit(u32),
-    RegisterNewTask(KindWithContent),
-}
-
-#[derive(Default, Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
-#[serde(rename_all = "lowercase")]
-pub enum Consistency {
-    One,
-    Two,
-    Quorum,
-    #[default]
-    All,
-}
-
-#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
-pub enum ApiKeyOperation {
-    Insert(Key),
-    Delete(Uuid),
-}
-
-impl std::fmt::Display for Consistency {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        match self {
-            Consistency::One => write!(f, "one"),
-            Consistency::Two => write!(f, "two"),
-            Consistency::Quorum => write!(f, "quorum"),
-            Consistency::All => write!(f, "all"),
-        }
-    }
-}
-
-impl FromStr for Consistency {
-    type Err = String;
-
-    fn from_str(s: &str) -> Result<Self, Self::Err> {
-        match s {
-            "one" => Ok(Consistency::One),
-            "two" => Ok(Consistency::Two),
-            "quorum" => Ok(Consistency::Quorum),
-            "all" => Ok(Consistency::All),
-            s => Err(format!(
-                "Unexpected value `{s}`, expected one of `one`, `two`, `quorum`, `all`"
-            )),
-        }
-    }
-}
-
-#[derive(Clone)]
-pub enum Cluster {
-    Leader(Leader),
-    Follower(Follower),
-}
-
-#[derive(Clone)]
-pub struct Follower {
-    sender: ChannelSender<FollowerMsg>,
-
-    get_batch: Receiver<(u32, Batch)>,
-    must_commit: Receiver<u32>,
-    register_new_task: Receiver<(Task, Option<Vec<u8>>)>,
-
-    api_key_op: Receiver<ApiKeyOperation>,
-
-    batch_id: Arc<RwLock<u32>>,
-}
-
-impl Follower {
-    pub fn join(leader: impl ToSocketAddrs, master_key: Option<String>) -> (Follower, Vec<u8>) {
-        let (sender, receiver) = if let Some(master_key) = master_key {
-            let mut enc = [0; 32];
-            let master_key = master_key.as_bytes();
-            if master_key.len() < 32 {
-                warn!("Master key is not secure, use a longer master key (at least 32 bytes long)");
-            }
-            enc.iter_mut().zip(master_key).for_each(|(enc, mk)| *enc = *mk);
-            info!("Connecting with encryption enabled");
-            connect_channel_with_enc(leader, &enc).unwrap()
-        } else {
-            connect_channel(leader).unwrap()
-        };
-
-        info!("Connection to the leader established");
-
-        info!("Waiting for the leader to contact us");
-        let state = receiver.recv().unwrap();
-
-        let dump = match state {
-            LeaderMsg::JoinFromDump(dump) => dump,
-            msg => panic!("Received unexpected message {msg:?}"),
-        };
-
-        let (get_batch_sender, get_batch_receiver) = unbounded();
-        let (must_commit_sender, must_commit_receiver) = unbounded();
-        let (register_task_sender, register_task_receiver) = unbounded();
-        let (create_api_key_sender, create_api_key_receiver) = unbounded();
-
-        std::thread::spawn(move || {
-            Self::router(
-                receiver,
-                get_batch_sender,
-                must_commit_sender,
-                register_task_sender,
-                create_api_key_sender,
-            );
-        });
-
-        (
-            Follower {
-                sender,
-                get_batch: get_batch_receiver,
-                must_commit: must_commit_receiver,
-                register_new_task: register_task_receiver,
-                api_key_op: create_api_key_receiver,
-                batch_id: Arc::default(),
-            },
-            dump,
-        )
-    }
-
-    fn router(
-        receiver: ChannelReceiver<LeaderMsg>,
-        get_batch: Sender<(u32, Batch)>,
-        must_commit: Sender<u32>,
-        register_new_task: Sender<(Task, Option<Vec<u8>>)>,
-        api_key_op: Sender<ApiKeyOperation>,
-    ) {
-        loop {
-            match receiver.recv().expect("Lost connection to the leader") {
-                LeaderMsg::JoinFromDump(_) => {
-                    warn!("Received a join from dump msg but I’m already running : ignoring the message")
-                }
-                LeaderMsg::StartBatch { id, batch } => {
-                    info!("Starting to process a new batch");
-                    get_batch.send((id, batch)).expect("Lost connection to the main thread")
-                }
-                LeaderMsg::Commit(id) => {
-                    info!("Must commit");
-                    must_commit.send(id).expect("Lost connection to the main thread")
-                }
-                LeaderMsg::RegisterNewTask { task, update_file } => {
-                    info!("Registered a new task");
-                    register_new_task
-                        .send((task, update_file))
-                        .expect("Lost connection to the main thread")
-                }
-                LeaderMsg::ApiKeyOperation(key) => {
-                    api_key_op.send(key).expect("Lost connection to the main thread")
-                }
-            }
-        }
-    }
-
-    pub fn get_new_batch(&self) -> Batch {
-        info!("Get new batch called");
-        let (id, batch) = self.get_batch.recv().expect("Lost connection to the leader");
-        info!("Got a new batch");
-        *self.batch_id.write().unwrap() = id;
-        batch
-    }
-
-    pub fn ready_to_commit(&self) {
-        info!("I'm ready to commit");
-        let batch_id = self.batch_id.read().unwrap();
-
-        self.sender.send(FollowerMsg::ReadyToCommit(*batch_id)).unwrap();
-
-        loop {
-            let id = self.must_commit.recv().expect("Lost connection to the leader");
-            #[allow(clippy::comparison_chain)]
-            if id == *batch_id {
-                break;
-            } else if id > *batch_id {
-                panic!("We missed a batch");
-            }
-        }
-        info!("I got the right to commit");
-    }
-
-    pub fn get_new_task(&self) -> (Task, Option<Vec<u8>>) {
-        self.register_new_task.recv().expect("Lost connection to the leader")
-    }
-
-    pub fn api_key_operation(&self) -> ApiKeyOperation {
-        info!("Creating a new api key");
-        self.api_key_op.recv().expect("Lost connection to the leader")
-    }
-}
--- a/index-scheduler/Cargo.toml
+++ b/index-scheduler/Cargo.toml
@ -13,8 +13,6 @@ license.workspace = true
 [dependencies]
 anyhow = "1.0.64"
 bincode = "1.3.3"
-cluster = { path = "../cluster" }
-crossbeam = "0.8.2"
 csv = "1.1.6"
 derive_builder = "0.11.2"
 dump = { path = "../dump" }
--- a/index-scheduler/src/batch.rs
+++ b/index-scheduler/src/batch.rs
@ -22,8 +22,7 @@ use std::ffi::OsStr;
 use std::fs::{self, File};
 use std::io::BufWriter;

-use crossbeam::utils::Backoff;
-use dump::{DumpWriter, IndexMetadata};
+use dump::IndexMetadata;
 use log::{debug, error, info};
 use meilisearch_types::heed::{RoTxn, RwTxn};
 use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader};
@ -42,14 +41,14 @@ use uuid::Uuid;

 use crate::autobatcher::{self, BatchKind};
 use crate::utils::{self, swap_index_uid_in_task};
-use crate::{Cluster, Error, IndexScheduler, ProcessingTasks, Result, TaskId};
+use crate::{Error, IndexScheduler, ProcessingTasks, Result, TaskId};

 /// Represents a combination of tasks that can all be processed at the same time.
 ///
 /// A batch contains the set of tasks that it represents (accessible through
 /// [`self.ids()`](Batch::ids)), as well as additional information on how to
 /// be processed.
-#[derive(Debug, Clone)]
+#[derive(Debug)]
 pub(crate) enum Batch {
    TaskCancelation {
        /// The task cancelation itself.
@ -86,14 +85,14 @@ pub(crate) enum Batch {
    },
 }

-#[derive(Debug, Clone)]
+#[derive(Debug)]
 pub(crate) enum DocumentOperation {
    Add(Uuid),
    Delete(Vec<String>),
 }

 /// A [batch](Batch) that combines multiple tasks operating on an index.
-#[derive(Debug, Clone)]
+#[derive(Debug)]
 pub(crate) enum IndexOperation {
    DocumentOperation {
        index_uid: String,
@ -587,12 +586,6 @@ impl IndexScheduler {
                    _ => unreachable!(),
                }

-                match &self.cluster {
-                    Some(Cluster::Leader(leader)) => leader.commit(self.consistency_level),
-                    Some(Cluster::Follower(follower)) => follower.ready_to_commit(),
-                    None => (),
-                }
-
                // We must only remove the content files if the transaction is successfully committed
                // and if errors occurs when we are deleting files we must do our best to delete
                // everything. We do not return the encountered errors when deleting the content
@ -636,13 +629,6 @@ impl IndexScheduler {
                    }
                    _ => unreachable!(),
                }
-
-                match &self.cluster {
-                    Some(Cluster::Leader(leader)) => leader.commit(self.consistency_level),
-                    Some(Cluster::Follower(follower)) => follower.ready_to_commit(),
-                    None => (),
-                }
-
                wtxn.commit()?;
                Ok(vec![task])
            }
@ -689,9 +675,6 @@ impl IndexScheduler {
                }

                // 3. Snapshot every indexes
-                // TODO we are opening all of the indexes it can be too much we should unload all
-                //      of the indexes we are trying to open. It would be even better to only unload
-                //      the ones that were opened by us. Or maybe use a LRU in the index mapper.
                for result in self.index_mapper.index_mapping.iter(&rtxn)? {
                    let (name, uuid) = result?;
                    let index = self.index_mapper.index(&rtxn, name)?;
@ -728,6 +711,14 @@ impl IndexScheduler {
                // 5.3 Change the permission to make the snapshot readonly
                let mut permissions = file.metadata()?.permissions();
                permissions.set_readonly(true);
+                #[cfg(unix)]
+                {
+                    use std::os::unix::fs::PermissionsExt;
+                    #[allow(clippy::non_octal_unix_permissions)]
+                    //                     rwxrwxrwx
+                    permissions.set_mode(0b100100100);
+                }
+
                file.set_permissions(permissions)?;

                for task in &mut tasks {
@ -737,9 +728,96 @@ impl IndexScheduler {
                Ok(tasks)
            }
            Batch::Dump(mut task) => {
-                // TODO: It would be better to use the started_at from the task instead of generating a new one
                let started_at = OffsetDateTime::now_utc();
-                let dump = self.create_dump(&task, &started_at)?;
+                let (keys, instance_uid) =
+                    if let KindWithContent::DumpCreation { keys, instance_uid } = &task.kind {
+                        (keys, instance_uid)
+                    } else {
+                        unreachable!();
+                    };
+                let dump = dump::DumpWriter::new(*instance_uid)?;
+
+                // 1. dump the keys
+                let mut dump_keys = dump.create_keys()?;
+                for key in keys {
+                    dump_keys.push_key(key)?;
+                }
+                dump_keys.flush()?;
+
+                let rtxn = self.env.read_txn()?;
+
+                // 2. dump the tasks
+                let mut dump_tasks = dump.create_tasks_queue()?;
+                for ret in self.all_tasks.iter(&rtxn)? {
+                    let (_, mut t) = ret?;
+                    let status = t.status;
+                    let content_file = t.content_uuid();
+
+                    // In the case we're dumping ourselves we want to be marked as finished
+                    // to not loop over ourselves indefinitely.
+                    if t.uid == task.uid {
+                        let finished_at = OffsetDateTime::now_utc();
+
+                        // We're going to fake the date because we don't know if everything is going to go well.
+                        // But we need to dump the task as finished and successful.
+                        // If something fail everything will be set appropriately in the end.
+                        t.status = Status::Succeeded;
+                        t.started_at = Some(started_at);
+                        t.finished_at = Some(finished_at);
+                    }
+                    let mut dump_content_file = dump_tasks.push_task(&t.into())?;
+
+                    // 2.1. Dump the `content_file` associated with the task if there is one and the task is not finished yet.
+                    if let Some(content_file) = content_file {
+                        if status == Status::Enqueued {
+                            let content_file = self.file_store.get_update(content_file)?;
+
+                            let reader = DocumentsBatchReader::from_reader(content_file)
+                                .map_err(milli::Error::from)?;
+
+                            let (mut cursor, documents_batch_index) =
+                                reader.into_cursor_and_fields_index();
+
+                            while let Some(doc) =
+                                cursor.next_document().map_err(milli::Error::from)?
+                            {
+                                dump_content_file.push_document(&obkv_to_object(
+                                    &doc,
+                                    &documents_batch_index,
+                                )?)?;
+                            }
+                            dump_content_file.flush()?;
+                        }
+                    }
+                }
+                dump_tasks.flush()?;
+
+                // 3. Dump the indexes
+                self.index_mapper.try_for_each_index(&rtxn, |uid, index| -> Result<()> {
+                    let rtxn = index.read_txn()?;
+                    let metadata = IndexMetadata {
+                        uid: uid.to_owned(),
+                        primary_key: index.primary_key(&rtxn)?.map(String::from),
+                        created_at: index.created_at(&rtxn)?,
+                        updated_at: index.updated_at(&rtxn)?,
+                    };
+                    let mut index_dumper = dump.create_index(uid, &metadata)?;
+
+                    let fields_ids_map = index.fields_ids_map(&rtxn)?;
+                    let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
+
+                    // 3.1. Dump the documents
+                    for ret in index.all_documents(&rtxn)? {
+                        let (_id, doc) = ret?;
+                        let document = milli::obkv_to_json(&all_fields, &fields_ids_map, doc)?;
+                        index_dumper.push_document(&document)?;
+                    }
+
+                    // 3.2. Dump the settings
+                    let settings = meilisearch_types::settings::settings(index, &rtxn)?;
+                    index_dumper.settings(&settings)?;
+                    Ok(())
+                })?;

                let dump_uid = started_at.format(format_description!(
                    "[year repr:full][month repr:numerical][day padding:zero]-[hour padding:zero][minute padding:zero][second padding:zero][subsecond digits:3]"
@ -767,13 +845,6 @@ impl IndexScheduler {

                let mut index_wtxn = index.write_txn()?;
                let tasks = self.apply_index_operation(&mut index_wtxn, &index, op)?;
-
-                match &self.cluster {
-                    Some(Cluster::Leader(leader)) => leader.commit(self.consistency_level),
-                    Some(Cluster::Follower(follower)) => follower.ready_to_commit(),
-                    None => (),
-                }
-
                index_wtxn.commit()?;

                Ok(tasks)
@ -872,13 +943,6 @@ impl IndexScheduler {
                for swap in swaps {
                    self.apply_index_swap(&mut wtxn, task.uid, &swap.indexes.0, &swap.indexes.1)?;
                }
-
-                match &self.cluster {
-                    Some(Cluster::Leader(leader)) => leader.commit(self.consistency_level),
-                    Some(Cluster::Follower(follower)) => follower.ready_to_commit(),
-                    None => (),
-                }
-
                wtxn.commit()?;
                task.status = Status::Succeeded;
                Ok(vec![task])
@ -886,99 +950,6 @@ impl IndexScheduler {
        }
    }

-    pub(crate) fn create_dump(
-        &self,
-        task: &Task,
-        started_at: &OffsetDateTime,
-    ) -> Result<DumpWriter> {
-        let (keys, instance_uid) =
-            if let KindWithContent::DumpCreation { keys, instance_uid } = &task.kind {
-                (keys, instance_uid)
-            } else {
-                unreachable!();
-            };
-        let dump = dump::DumpWriter::new(*instance_uid)?;
-
-        // 1. dump the keys
-        let mut dump_keys = dump.create_keys()?;
-        for key in keys {
-            dump_keys.push_key(key)?;
-        }
-        dump_keys.flush()?;
-
-        let rtxn = self.env.read_txn()?;
-
-        // 2. dump the tasks
-        let mut dump_tasks = dump.create_tasks_queue()?;
-        for ret in self.all_tasks.iter(&rtxn)? {
-            let (_, mut t) = ret?;
-            let status = t.status;
-            let content_file = t.content_uuid();
-
-            // In the case we're dumping ourselves we want to be marked as finished
-            // to not loop over ourselves indefinitely.
-            if t.uid == task.uid {
-                let finished_at = OffsetDateTime::now_utc();
-
-                // We're going to fake the date because we don't know if everything is going to go well.
-                // But we need to dump the task as finished and successful.
-                // If something fail everything will be set appropriately in the end.
-                t.status = Status::Succeeded;
-                t.started_at = Some(*started_at);
-                t.finished_at = Some(finished_at);
-            }
-            let mut dump_content_file = dump_tasks.push_task(&t.into())?;
-
-            // 2.1. Dump the `content_file` associated with the task if there is one and the task is not finished yet.
-            if let Some(content_file) = content_file {
-                if status == Status::Enqueued {
-                    let content_file = self.file_store.get_update(content_file)?;
-
-                    let reader = DocumentsBatchReader::from_reader(content_file)
-                        .map_err(milli::Error::from)?;
-
-                    let (mut cursor, documents_batch_index) = reader.into_cursor_and_fields_index();
-
-                    while let Some(doc) = cursor.next_document().map_err(milli::Error::from)? {
-                        dump_content_file
-                            .push_document(&obkv_to_object(&doc, &documents_batch_index)?)?;
-                    }
-                    dump_content_file.flush()?;
-                }
-            }
-        }
-        dump_tasks.flush()?;
-
-        // 3. Dump the indexes
-        self.index_mapper.try_for_each_index(&rtxn, |uid, index| -> Result<()> {
-            let rtxn = index.read_txn()?;
-            let metadata = IndexMetadata {
-                uid: uid.to_owned(),
-                primary_key: index.primary_key(&rtxn)?.map(String::from),
-                created_at: index.created_at(&rtxn)?,
-                updated_at: index.updated_at(&rtxn)?,
-            };
-            let mut index_dumper = dump.create_index(uid, &metadata)?;
-
-            let fields_ids_map = index.fields_ids_map(&rtxn)?;
-            let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
-
-            // 3.1. Dump the documents
-            for ret in index.all_documents(&rtxn)? {
-                let (_id, doc) = ret?;
-                let document = milli::obkv_to_json(&all_fields, &fields_ids_map, doc)?;
-                index_dumper.push_document(&document)?;
-            }
-
-            // 3.2. Dump the settings
-            let settings = meilisearch_types::settings::settings(index, &rtxn)?;
-            index_dumper.settings(&settings)?;
-            Ok(())
-        })?;
-
-        Ok(dump)
-    }
-
    /// Swap the index `lhs` with the index `rhs`.
    fn apply_index_swap(&self, wtxn: &mut RwTxn, task_id: u32, lhs: &str, rhs: &str) -> Result<()> {
        // 1. Verify that both lhs and rhs are existing indexes
@ -1409,274 +1380,4 @@ impl IndexScheduler {

        Ok(content_files_to_delete)
    }
-
-    pub(crate) fn get_batch_from_cluster_batch(
-        &self,
-        batch: cluster::batch::Batch,
-    ) -> Result<Batch> {
-        use cluster::batch::Batch as CBatch;
-
-        let mut rtxn = self.env.read_txn().map_err(Error::HeedTransaction)?;
-
-        for id in batch.ids() {
-            let backoff = Backoff::new();
-            let id = BEU32::new(id);
-
-            loop {
-                if self.all_tasks.get(&rtxn, &id)?.is_some() {
-                    info!("Found the task_id");
-                    break;
-                }
-                info!("The task is not present in the task queue, we wait");
-                // we need to drop the txn to make a write visible
-                drop(rtxn);
-                backoff.spin();
-                rtxn = self.env.read_txn().map_err(Error::HeedTransaction)?;
-            }
-        }
-
-        Ok(match batch {
-            CBatch::TaskCancelation { task, previous_started_at, previous_processing_tasks } => {
-                Batch::TaskCancelation {
-                    task: self.get_existing_tasks(&rtxn, Some(task))?[0].clone(),
-                    previous_started_at,
-                    previous_processing_tasks,
-                }
-            }
-            CBatch::TaskDeletion(task) => {
-                Batch::TaskDeletion(self.get_existing_tasks(&rtxn, Some(task))?[0].clone())
-            }
-            CBatch::SnapshotCreation(tasks) => {
-                Batch::SnapshotCreation(self.get_existing_tasks(&rtxn, tasks)?)
-            }
-            CBatch::Dump(task) => {
-                Batch::Dump(self.get_existing_tasks(&rtxn, Some(task))?[0].clone())
-            }
-            CBatch::IndexOperation { op, must_create_index } => Batch::IndexOperation {
-                op: self.get_index_op_from_cluster_index_op(&rtxn, op)?,
-                must_create_index,
-            },
-            CBatch::IndexCreation { index_uid, primary_key, task } => Batch::IndexCreation {
-                index_uid,
-                primary_key,
-                task: self.get_existing_tasks(&rtxn, Some(task))?[0].clone(),
-            },
-            CBatch::IndexUpdate { index_uid, primary_key, task } => Batch::IndexUpdate {
-                index_uid,
-                primary_key,
-                task: self.get_existing_tasks(&rtxn, Some(task))?[0].clone(),
-            },
-            CBatch::IndexDeletion { index_uid, tasks, index_has_been_created } => {
-                Batch::IndexDeletion {
-                    index_uid,
-                    tasks: self.get_existing_tasks(&rtxn, tasks)?,
-                    index_has_been_created,
-                }
-            }
-            CBatch::IndexSwap { task } => {
-                Batch::IndexSwap { task: self.get_existing_tasks(&rtxn, Some(task))?[0].clone() }
-            }
-        })
-    }
-
-    pub(crate) fn get_index_op_from_cluster_index_op(
-        &self,
-        rtxn: &RoTxn,
-        op: cluster::batch::IndexOperation,
-    ) -> Result<IndexOperation> {
-        use cluster::batch::IndexOperation as COp;
-
-        Ok(match op {
-            COp::DocumentOperation {
-                index_uid,
-                primary_key,
-                method,
-                documents_counts,
-                operations,
-                tasks,
-            } => IndexOperation::DocumentOperation {
-                index_uid,
-                primary_key,
-                method,
-                documents_counts,
-                operations: operations.into_iter().map(|op| op.into()).collect(),
-                tasks: self.get_existing_tasks(rtxn, tasks)?,
-            },
-            COp::DocumentDeletion { index_uid, documents, tasks } => {
-                IndexOperation::DocumentDeletion {
-                    index_uid,
-                    documents,
-                    tasks: self.get_existing_tasks(rtxn, tasks)?,
-                }
-            }
-            COp::DocumentClear { index_uid, tasks } => IndexOperation::DocumentClear {
-                index_uid,
-                tasks: self.get_existing_tasks(rtxn, tasks)?,
-            },
-            COp::Settings { index_uid, settings, tasks } => IndexOperation::Settings {
-                index_uid,
-                settings,
-                tasks: self.get_existing_tasks(rtxn, tasks)?,
-            },
-            COp::DocumentClearAndSetting { index_uid, cleared_tasks, settings, settings_tasks } => {
-                IndexOperation::DocumentClearAndSetting {
-                    index_uid,
-                    cleared_tasks: self.get_existing_tasks(rtxn, cleared_tasks)?,
-                    settings,
-                    settings_tasks: self.get_existing_tasks(rtxn, settings_tasks)?,
-                }
-            }
-            COp::SettingsAndDocumentOperation {
-                index_uid,
-                primary_key,
-                method,
-                documents_counts,
-                operations,
-                document_import_tasks,
-                settings,
-                settings_tasks,
-            } => IndexOperation::SettingsAndDocumentOperation {
-                index_uid,
-                primary_key,
-                method,
-                documents_counts,
-                operations: operations.into_iter().map(|op| op.into()).collect(),
-                document_import_tasks: self.get_existing_tasks(rtxn, document_import_tasks)?,
-                settings,
-                settings_tasks: self.get_existing_tasks(rtxn, settings_tasks)?,
-            },
-        })
-    }
-}
-
-impl From<Batch> for cluster::batch::Batch {
-    fn from(batch: Batch) -> Self {
-        use cluster::batch::Batch as CBatch;
-
-        match batch {
-            Batch::TaskCancelation { task, previous_started_at, previous_processing_tasks } => {
-                CBatch::TaskCancelation {
-                    task: task.uid,
-                    previous_started_at,
-                    previous_processing_tasks,
-                }
-            }
-            Batch::TaskDeletion(task) => CBatch::TaskDeletion(task.uid),
-            Batch::SnapshotCreation(task) => {
-                CBatch::SnapshotCreation(task.into_iter().map(|task| task.uid).collect())
-            }
-            Batch::Dump(task) => CBatch::Dump(task.uid),
-            Batch::IndexOperation { op, must_create_index } => {
-                CBatch::IndexOperation { op: op.into(), must_create_index }
-            }
-            Batch::IndexCreation { index_uid, primary_key, task } => {
-                CBatch::IndexCreation { index_uid, primary_key, task: task.uid }
-            }
-            Batch::IndexUpdate { index_uid, primary_key, task } => {
-                CBatch::IndexUpdate { index_uid, primary_key, task: task.uid }
-            }
-            Batch::IndexDeletion { index_uid, tasks, index_has_been_created } => {
-                CBatch::IndexDeletion {
-                    index_uid,
-                    tasks: tasks.into_iter().map(|task| task.uid).collect(),
-                    index_has_been_created,
-                }
-            }
-            Batch::IndexSwap { task } => CBatch::IndexSwap { task: task.uid },
-        }
-    }
-}
-
-impl From<IndexOperation> for cluster::batch::IndexOperation {
-    fn from(op: IndexOperation) -> Self {
-        use cluster::batch::IndexOperation as COp;
-        match op {
-            IndexOperation::DocumentOperation {
-                index_uid,
-                primary_key,
-                method,
-                documents_counts,
-                operations,
-                tasks,
-            } => COp::DocumentOperation {
-                index_uid,
-                primary_key,
-                method,
-                documents_counts,
-                operations: operations.into_iter().map(|op| op.into()).collect(),
-                tasks: tasks.into_iter().map(|task| task.uid).collect(),
-            },
-            IndexOperation::DocumentDeletion { index_uid, documents, tasks } => {
-                COp::DocumentDeletion {
-                    index_uid,
-                    documents,
-                    tasks: tasks.into_iter().map(|task| task.uid).collect(),
-                }
-            }
-            IndexOperation::DocumentClear { index_uid, tasks } => COp::DocumentClear {
-                index_uid,
-                tasks: tasks.into_iter().map(|task| task.uid).collect(),
-            },
-            IndexOperation::Settings { index_uid, settings, tasks } => COp::Settings {
-                index_uid,
-                settings,
-                tasks: tasks.into_iter().map(|task| task.uid).collect(),
-            },
-            IndexOperation::DocumentClearAndSetting {
-                index_uid,
-                cleared_tasks,
-                settings,
-                settings_tasks,
-            } => COp::DocumentClearAndSetting {
-                index_uid,
-                cleared_tasks: cleared_tasks.into_iter().map(|task| task.uid).collect(),
-                settings,
-                settings_tasks: settings_tasks.into_iter().map(|task| task.uid).collect(),
-            },
-            IndexOperation::SettingsAndDocumentOperation {
-                index_uid,
-                primary_key,
-                method,
-                documents_counts,
-                operations,
-                document_import_tasks,
-                settings,
-                settings_tasks,
-            } => COp::SettingsAndDocumentOperation {
-                index_uid,
-                primary_key,
-                method,
-                documents_counts,
-                operations: operations.into_iter().map(|op| op.into()).collect(),
-                document_import_tasks: document_import_tasks
-                    .into_iter()
-                    .map(|task| task.uid)
-                    .collect(),
-                settings,
-                settings_tasks: settings_tasks.into_iter().map(|task| task.uid).collect(),
-            },
-        }
-    }
-}
-
-impl From<DocumentOperation> for cluster::batch::DocumentOperation {
-    fn from(op: DocumentOperation) -> Self {
-        use cluster::batch::DocumentOperation as COp;
-
-        match op {
-            DocumentOperation::Add(uuid) => COp::Add(uuid),
-            DocumentOperation::Delete(docs) => COp::Delete(docs),
-        }
-    }
-}
-
-impl From<cluster::batch::DocumentOperation> for DocumentOperation {
-    fn from(op: cluster::batch::DocumentOperation) -> Self {
-        use cluster::batch::DocumentOperation as COp;
-
-        match op {
-            COp::Add(uuid) => DocumentOperation::Add(uuid),
-            COp::Delete(docs) => DocumentOperation::Delete(docs),
-        }
-    }
 }
--- a/index-scheduler/src/insta_snapshot.rs
+++ b/index-scheduler/src/insta_snapshot.rs
@ -33,8 +33,6 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
        snapshots_path: _,
        auth_path: _,
        version_file_path: _,
-        cluster: _,
-        consistency_level: _,
        test_breakpoint_sdr: _,
        planned_failures: _,
        run_loop_iteration: _,
--- a/index-scheduler/src/lib.rs
+++ b/index-scheduler/src/lib.rs
@ -31,7 +31,6 @@ mod uuid_codec;
 pub type Result<T> = std::result::Result<T, Error>;
 pub type TaskId = u32;

-use std::io::Write;
 use std::ops::{Bound, RangeBounds};
 use std::path::{Path, PathBuf};
 use std::sync::atomic::AtomicBool;
@ -39,12 +38,9 @@ use std::sync::atomic::Ordering::Relaxed;
 use std::sync::{Arc, RwLock};
 use std::time::Duration;

-use batch::Batch;
-use cluster::{Cluster, Consistency};
 use dump::{KindDump, TaskDump, UpdateFile};
 pub use error::Error;
 use file_store::FileStore;
-use log::info;
 use meilisearch_types::error::ResponseError;
 use meilisearch_types::heed::types::{OwnedType, SerdeBincode, SerdeJson, Str};
 use meilisearch_types::heed::{self, Database, Env, RoTxn};
@ -54,7 +50,6 @@ use meilisearch_types::milli::update::IndexerConfig;
 use meilisearch_types::milli::{CboRoaringBitmapCodec, Index, RoaringBitmapCodec, BEU32};
 use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task};
 use roaring::RoaringBitmap;
-use serde::Deserialize;
 use synchronoise::SignalEvent;
 use time::OffsetDateTime;
 use utils::{filter_out_references_to_newer_tasks, keep_tasks_within_datetimes, map_bound};
@ -307,11 +302,6 @@ pub struct IndexScheduler {
    /// The path to the version file of Meilisearch.
    pub(crate) version_file_path: PathBuf,

-    /// The role in the cluster
-    pub(crate) cluster: Option<Cluster>,
-    /// The Consistency level used by the leader. Ignored if the node is not in a leader in cluster mode.
-    pub(crate) consistency_level: Consistency,
-
    // ================= test
    // The next entry is dedicated to the tests.
    /// Provide a way to set a breakpoint in multiple part of the scheduler.
@ -331,24 +321,6 @@ pub struct IndexScheduler {
    run_loop_iteration: Arc<RwLock<usize>>,
 }

-#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserialize)]
-pub enum ClusterMode {
-    Leader,
-    Follower,
-}
-
-impl std::str::FromStr for ClusterMode {
-    type Err = ();
-
-    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
-        match s {
-            "leader" => Ok(ClusterMode::Leader),
-            "follower" => Ok(ClusterMode::Follower),
-            _ => Err(()),
-        }
-    }
-}
-
 impl IndexScheduler {
    fn private_clone(&self) -> IndexScheduler {
        IndexScheduler {
@ -371,8 +343,6 @@ impl IndexScheduler {
            dumps_path: self.dumps_path.clone(),
            auth_path: self.auth_path.clone(),
            version_file_path: self.version_file_path.clone(),
-            cluster: self.cluster.clone(),
-            consistency_level: self.consistency_level,
            #[cfg(test)]
            test_breakpoint_sdr: self.test_breakpoint_sdr.clone(),
            #[cfg(test)]
@ -387,8 +357,6 @@ impl IndexScheduler {
    /// Create an index scheduler and start its run loop.
    pub fn new(
        options: IndexSchedulerOptions,
-        cluster: Option<Cluster>,
-        consistency_level: Consistency,
        #[cfg(test)] test_breakpoint_sdr: crossbeam::channel::Sender<(Breakpoint, bool)>,
        #[cfg(test)] planned_failures: Vec<(usize, tests::FailureLocation)>,
    ) -> Result<Self> {
@ -448,8 +416,6 @@ impl IndexScheduler {
            snapshots_path: options.snapshots_path,
            auth_path: options.auth_path,
            version_file_path: options.version_file_path,
-            cluster,
-            consistency_level,

            #[cfg(test)]
            test_breakpoint_sdr,
@ -542,26 +508,6 @@ impl IndexScheduler {
    /// only once per index scheduler.
    fn run(&self) {
        let run = self.private_clone();
-
-        // if we're a follower we starts a thread to register the tasks coming from the leader
-        if let Some(Cluster::Follower(ref follower)) = self.cluster {
-            let this = self.private_clone();
-            let follower = follower.clone();
-            std::thread::spawn(move || loop {
-                let (task, content) = follower.get_new_task();
-                this.register_raw_task(task, content);
-            });
-        } else if let Some(Cluster::Leader(ref leader)) = self.cluster {
-            // we need a way to let the leader come out of its loop if a new follower joins the cluster
-            let cluster = leader.wake_up.clone();
-            let scheduler = self.wake_up.clone();
-
-            std::thread::spawn(move || loop {
-                cluster.wait();
-                scheduler.signal();
-            });
-        }
-
        std::thread::Builder::new()
            .name(String::from("scheduler"))
            .spawn(move || {
@ -919,16 +865,6 @@ impl IndexScheduler {
            return Err(e.into());
        }

-        if let Some(Cluster::Leader(leader)) = &self.cluster {
-            let update_file = if let Some(uuid) = task.content_uuid() {
-                let path = self.file_store.get_update_path(uuid);
-                Some(std::fs::read(path).unwrap())
-            } else {
-                None
-            };
-            leader.register_new_task(task.clone(), update_file);
-        }
-
        // If the registered task is a task cancelation
        // we inform the processing tasks to stop (if necessary).
        if let KindWithContent::TaskCancelation { tasks, .. } = kind {
@ -1058,44 +994,6 @@ impl IndexScheduler {
        Ok(task)
    }

-    /// /!\ should only be used when you're a follower in cluster mode
-    pub fn register_raw_task(&self, task: Task, content_file: Option<Vec<u8>>) {
-        if let Some(content) = content_file {
-            let uuid = task.content_uuid().expect("bad task");
-            let (_, mut file) = self.file_store.new_update_with_uuid(uuid.as_u128()).unwrap();
-            file.write_all(&content).unwrap();
-            file.persist().unwrap();
-        }
-
-        let mut wtxn = self.env.write_txn().unwrap();
-
-        self.all_tasks.put(&mut wtxn, &BEU32::new(task.uid), &task).unwrap();
-
-        for index in task.indexes() {
-            self.update_index(&mut wtxn, index, |bitmap| {
-                bitmap.insert(task.uid);
-            })
-            .unwrap();
-        }
-
-        self.update_status(&mut wtxn, task.status, |bitmap| {
-            bitmap.insert(task.uid);
-        })
-        .unwrap();
-
-        self.update_kind(&mut wtxn, task.kind.as_kind(), |bitmap| {
-            (bitmap.insert(task.uid));
-        })
-        .unwrap();
-
-        utils::insert_task_datetime(&mut wtxn, self.enqueued_at, task.enqueued_at, task.uid)
-            .unwrap();
-
-        wtxn.commit().unwrap();
-
-        self.wake_up.signal();
-    }
-
    /// Create a new index without any associated task.
    pub fn create_raw_index(
        &self,
@ -1152,15 +1050,14 @@ impl IndexScheduler {
            self.breakpoint(Breakpoint::Start);
        }

-        info!("before getting a new batch");
-        let batch = match self.get_or_create_next_batch()? {
-            Some(batch) => batch,
-            None => return Ok(TickOutcome::WaitForSignal),
-        };
-        info!("after getting a new batch");
+        let rtxn = self.env.read_txn().map_err(Error::HeedTransaction)?;
+        let batch =
+            match self.create_next_batch(&rtxn).map_err(|e| Error::CreateBatch(Box::new(e)))? {
+                Some(batch) => batch,
+                None => return Ok(TickOutcome::WaitForSignal),
+            };
        let index_uid = batch.index_uid().map(ToOwned::to_owned);
-
-        // TODO cluster: Should we send the starting date as well so everyone is in sync?
+        drop(rtxn);

        // 1. store the starting date with the bitmap of processing tasks.
        let mut ids = batch.ids();
@ -1289,63 +1186,6 @@ impl IndexScheduler {
        Ok(TickOutcome::TickAgain(processed_tasks))
    }

-    /// If there is no cluster or if leader -> create a new batch
-    /// If follower -> wait till the leader gives us a batch to process
-    fn get_or_create_next_batch(&self) -> Result<Option<Batch>> {
-        info!("inside get or create next batch");
-
-        let batch = match &self.cluster {
-            None | Some(Cluster::Leader(_)) => {
-                let rtxn = self.env.read_txn().map_err(Error::HeedTransaction)?;
-                self.create_next_batch(&rtxn).map_err(|e| Error::CreateBatch(Box::new(e)))?
-            }
-            Some(Cluster::Follower(follower)) => {
-                let batch = follower.get_new_batch();
-                Some(self.get_batch_from_cluster_batch(batch)?)
-            }
-        };
-
-        if let Some(Cluster::Leader(leader)) = &self.cluster {
-            // first, onboard the new followers
-            if leader.has_new_followers() {
-                info!("New followers are trying to join the cluster");
-                let started_at = OffsetDateTime::now_utc();
-                let dump = self
-                    .create_dump(
-                        &Task {
-                            uid: TaskId::MAX,
-                            enqueued_at: started_at,
-                            started_at: Some(started_at),
-                            finished_at: Some(started_at),
-                            error: None,
-                            canceled_by: None,
-                            details: None,
-                            status: Status::Enqueued,
-                            kind: KindWithContent::DumpCreation {
-                                keys: leader.get_keys(),
-                                // TODO cluster: should we unify the instance_uid between every instances?
-                                instance_uid: None,
-                            },
-                        },
-                        &started_at,
-                    )
-                    .unwrap();
-
-                let mut buffer = Vec::new();
-                // TODO cluster: stop writing everything in RAM
-                dump.persist_to(&mut buffer).unwrap();
-
-                leader.join_me(buffer);
-            }
-
-            // second, starts processing the batch
-            if let Some(ref batch) = batch {
-                leader.starts_batch(batch.clone().into());
-            }
-        }
-        Ok(batch)
-    }
-
    pub(crate) fn delete_persisted_task_data(&self, task: &Task) -> Result<()> {
        match task.content_uuid() {
            Some(content_file) => self.delete_update_file(content_file),
@ -1461,8 +1301,7 @@ mod tests {
                autobatching_enabled,
            };

-            let index_scheduler =
-                Self::new(options, None, Consistency::default(), sender, planned_failures).unwrap();
+            let index_scheduler = Self::new(options, sender, planned_failures).unwrap();

            // To be 100% consistent between all test we're going to start the scheduler right now
            // and ensure it's in the expected starting state.
--- a/meilisearch-auth/Cargo.toml
+++ b/meilisearch-auth/Cargo.toml
@ -12,7 +12,6 @@ license.workspace = true

 [dependencies]
 base64 = "0.13.1"
-cluster = { path = "../cluster" }
 enum-iterator = "1.1.3"
 hmac = "0.12.1"
 maplit = "1.0.2"
--- a/meilisearch-auth/src/lib.rs
+++ b/meilisearch-auth/src/lib.rs
@ -6,7 +6,6 @@ use std::collections::{HashMap, HashSet};
 use std::path::Path;
 use std::sync::Arc;

-use cluster::Cluster;
 use error::{AuthControllerError, Result};
 use maplit::hashset;
 use meilisearch_types::index_uid_pattern::IndexUidPattern;
@ -22,52 +21,17 @@ use uuid::Uuid;
 pub struct AuthController {
    store: Arc<HeedAuthStore>,
    master_key: Option<String>,
-
-    cluster: Option<Cluster>,
 }

 impl AuthController {
-    pub fn new(
-        db_path: impl AsRef<Path>,
-        master_key: &Option<String>,
-        cluster: Option<Cluster>,
-    ) -> Result<Self> {
+    pub fn new(db_path: impl AsRef<Path>, master_key: &Option<String>) -> Result<Self> {
        let store = HeedAuthStore::new(db_path)?;

        if store.is_empty()? {
            generate_default_keys(&store)?;
        }

-        let this = Self {
-            store: Arc::new(store),
-            master_key: master_key.clone(),
-            cluster: cluster.clone(),
-        };
-
-        if let Some(Cluster::Follower(follower)) = cluster {
-            let this = this.clone();
-
-            std::thread::spawn(move || loop {
-                match follower.api_key_operation() {
-                    cluster::ApiKeyOperation::Insert(key) => {
-                        this.store.put_api_key(key).expect("Inconsistency with the leader");
-                    }
-                    cluster::ApiKeyOperation::Delete(uuid) => {
-                        this.store.delete_api_key(uuid).expect("Inconsistency with the leader");
-                    }
-                }
-            });
-        } else if let Some(Cluster::Leader(leader)) = cluster {
-            let this = this.clone();
-
-            std::thread::spawn(move || loop {
-                let channel = leader.needs_keys();
-                let keys = this.list_keys().expect("auth controller is dead");
-                channel.send(keys).expect("Cluster is dead");
-            });
-        }
-
-        Ok(this)
+        Ok(Self { store: Arc::new(store), master_key: master_key.clone() })
    }

    /// Return the size of the `AuthController` database in bytes.
@ -78,13 +42,7 @@ impl AuthController {
    pub fn create_key(&self, create_key: CreateApiKey) -> Result<Key> {
        match self.store.get_api_key(create_key.uid)? {
            Some(_) => Err(AuthControllerError::ApiKeyAlreadyExists(create_key.uid.to_string())),
-            None => {
-                let key = self.store.put_api_key(create_key.to_key())?;
-                if let Some(Cluster::Leader(ref leader)) = self.cluster {
-                    leader.insert_key(key.clone());
-                }
-                Ok(key)
-            }
+            None => self.store.put_api_key(create_key.to_key()),
        }
    }

@ -99,12 +57,7 @@ impl AuthController {
            name => key.name = name.set(),
        };
        key.updated_at = OffsetDateTime::now_utc();
-
-        let key = self.store.put_api_key(key)?;
-        if let Some(Cluster::Leader(ref leader)) = self.cluster {
-            leader.insert_key(key.clone());
-        }
-        Ok(key)
+        self.store.put_api_key(key)
    }

    pub fn get_key(&self, uid: Uuid) -> Result<Key> {
@ -147,9 +100,6 @@ impl AuthController {

    pub fn delete_key(&self, uid: Uuid) -> Result<()> {
        if self.store.delete_api_key(uid)? {
-            if let Some(Cluster::Leader(ref leader)) = self.cluster {
-                leader.delete_key(uid);
-            }
            Ok(())
        } else {
            Err(AuthControllerError::ApiKeyNotFound(uid.to_string()))
--- a/meilisearch/Cargo.toml
+++ b/meilisearch/Cargo.toml
@ -24,7 +24,6 @@ bstr = "1.0.1"
 byte-unit = { version = "4.0.14", default-features = false, features = ["std", "serde"] }
 bytes = "1.2.1"
 clap = { version = "4.0.9", features = ["derive", "env"] }
-cluster = { path = "../cluster" }
 crossbeam-channel = "0.5.6"
 deserr = "0.5.0"
 dump = { path = "../dump" }
--- a/meilisearch/src/analytics/segment_analytics.rs
+++ b/meilisearch/src/analytics/segment_analytics.rs
@ -282,7 +282,6 @@ impl From<Opt> for Infos {
            dump_dir,
            log_level,
            indexer_options,
-            cluster_configuration: _,
            config_file_path,
            #[cfg(all(not(debug_assertions), feature = "analytics"))]
                no_analytics: _,
--- a/meilisearch/src/lib.rs
+++ b/meilisearch/src/lib.rs
@ -11,8 +11,7 @@ pub mod routes;
 pub mod search;

 use std::fs::File;
-use std::io::{BufReader, BufWriter, Write};
-use std::net::ToSocketAddrs;
+use std::io::{BufReader, BufWriter};
 use std::path::Path;
 use std::sync::Arc;
 use std::thread;
@ -26,12 +25,11 @@ use actix_web::web::Data;
 use actix_web::{web, HttpRequest};
 use analytics::Analytics;
 use anyhow::bail;
-use cluster::{Cluster, Follower, Leader};
 use error::PayloadError;
 use extractors::payload::PayloadConfig;
 use http::header::CONTENT_TYPE;
 use index_scheduler::{IndexScheduler, IndexSchedulerOptions};
-use log::{error, info};
+use log::error;
 use meilisearch_auth::AuthController;
 use meilisearch_types::milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
 use meilisearch_types::milli::update::{IndexDocumentsConfig, IndexDocumentsMethod};
@ -145,7 +143,7 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc<IndexScheduler>, Auth
        // the db is empty and the snapshot exists, import it
        if empty_db && snapshot_path_exists {
            match compression::from_tar_gz(snapshot_path, &opt.db_path) {
-                Ok(()) => open_or_create_database_unchecked(opt, None, OnFailure::RemoveDb)?,
+                Ok(()) => open_or_create_database_unchecked(opt, OnFailure::RemoveDb)?,
                Err(e) => {
                    std::fs::remove_dir_all(&opt.db_path)?;
                    return Err(e);
@ -162,14 +160,14 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc<IndexScheduler>, Auth
            bail!("snapshot doesn't exist at {}", snapshot_path.display())
        // the snapshot and the db exist, and we can ignore the snapshot because of the ignore_snapshot_if_db_exists flag
        } else {
-            open_or_create_database(opt, empty_db, None)?
+            open_or_create_database(opt, empty_db)?
        }
    } else if let Some(ref path) = opt.import_dump {
        let src_path_exists = path.exists();
        // the db is empty and the dump exists, import it
        if empty_db && src_path_exists {
            let (mut index_scheduler, mut auth_controller) =
-                open_or_create_database_unchecked(opt, None, OnFailure::RemoveDb)?;
+                open_or_create_database_unchecked(opt, OnFailure::RemoveDb)?;
            match import_dump(&opt.db_path, path, &mut index_scheduler, &mut auth_controller) {
                Ok(()) => (index_scheduler, auth_controller),
                Err(e) => {
@ -189,62 +187,10 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc<IndexScheduler>, Auth
        // the dump and the db exist and we can ignore the dump because of the ignore_dump_if_db_exists flag
        // or, the dump is missing but we can ignore that because of the ignore_missing_dump flag
        } else {
-            open_or_create_database(opt, empty_db, None)?
-        }
-    } else if let Some(ref cluster) = opt.cluster_configuration.experimental_enable_ha {
-        match cluster.as_str() {
-            "leader" => {
-                info!("Starting as a leader");
-                let mut addr = opt.http_addr.to_socket_addrs().unwrap().next().unwrap();
-                addr.set_port(6666);
-                open_or_create_database(
-                    opt,
-                    empty_db,
-                    Some(Cluster::Leader(Leader::new(addr, opt.master_key.clone()))),
-                )?
-            }
-            "follower" => {
-                info!("Starting as a follower");
-                if !empty_db {
-                    panic!("Can't start as a follower with an already existing data.ms");
-                }
-                let mut addr = opt
-                    .cluster_configuration
-                    .leader
-                    .as_ref()
-                    .expect("Can't be a follower without a leader")
-                    .to_socket_addrs()
-                    .unwrap()
-                    .next()
-                    .unwrap();
-                addr.set_port(6666);
-
-                let (follower, dump) = Follower::join(addr, opt.master_key.clone());
-                let mut dump_file = tempfile::NamedTempFile::new().unwrap();
-                dump_file.write_all(&dump).unwrap();
-
-                let (mut index_scheduler, mut auth_controller) = open_or_create_database_unchecked(
-                    opt,
-                    Some(Cluster::Follower(follower)),
-                    OnFailure::RemoveDb,
-                )?;
-                match import_dump(
-                    &opt.db_path,
-                    dump_file.path(),
-                    &mut index_scheduler,
-                    &mut auth_controller,
-                ) {
-                    Ok(()) => (index_scheduler, auth_controller),
-                    Err(e) => {
-                        std::fs::remove_dir_all(&opt.db_path)?;
-                        return Err(e);
-                    }
-                }
-            }
-            _ => panic!("Available values for the cluster mode are leader and follower"),
+            open_or_create_database(opt, empty_db)?
        }
    } else {
-        open_or_create_database(opt, empty_db, None)?
+        open_or_create_database(opt, empty_db)?
    };

    // We create a loop in a thread that registers snapshotCreation tasks
@ -269,34 +215,27 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc<IndexScheduler>, Auth
 /// Try to start the IndexScheduler and AuthController without checking the VERSION file or anything.
 fn open_or_create_database_unchecked(
    opt: &Opt,
-    cluster: Option<Cluster>,
    on_failure: OnFailure,
 ) -> anyhow::Result<(IndexScheduler, AuthController)> {
    // we don't want to create anything in the data.ms yet, thus we
    // wrap our two builders in a closure that'll be executed later.
-    let auth_controller = AuthController::new(&opt.db_path, &opt.master_key, cluster.clone());
-
+    let auth_controller = AuthController::new(&opt.db_path, &opt.master_key);
    let index_scheduler_builder = || -> anyhow::Result<_> {
-        Ok(IndexScheduler::new(
-            IndexSchedulerOptions {
-                version_file_path: opt.db_path.join(VERSION_FILE_NAME),
-                auth_path: opt.db_path.join("auth"),
-                tasks_path: opt.db_path.join("tasks"),
-                update_file_path: opt.db_path.join("update_files"),
-                indexes_path: opt.db_path.join("indexes"),
-                snapshots_path: opt.snapshot_dir.clone(),
-                dumps_path: opt.dump_dir.clone(),
-                task_db_size: opt.max_task_db_size.get_bytes() as usize,
-                index_base_map_size: opt.max_index_size.get_bytes() as usize,
-                indexer_config: (&opt.indexer_options).try_into()?,
-                autobatching_enabled: true,
-                index_growth_amount: byte_unit::Byte::from_str("10GiB").unwrap().get_bytes()
-                    as usize,
-                index_count: DEFAULT_INDEX_COUNT,
-            },
-            cluster,
-            opt.cluster_configuration.consistency,
-        )?)
+        Ok(IndexScheduler::new(IndexSchedulerOptions {
+            version_file_path: opt.db_path.join(VERSION_FILE_NAME),
+            auth_path: opt.db_path.join("auth"),
+            tasks_path: opt.db_path.join("tasks"),
+            update_file_path: opt.db_path.join("update_files"),
+            indexes_path: opt.db_path.join("indexes"),
+            snapshots_path: opt.snapshot_dir.clone(),
+            dumps_path: opt.dump_dir.clone(),
+            task_db_size: opt.max_task_db_size.get_bytes() as usize,
+            index_base_map_size: opt.max_index_size.get_bytes() as usize,
+            indexer_config: (&opt.indexer_options).try_into()?,
+            autobatching_enabled: true,
+            index_growth_amount: byte_unit::Byte::from_str("10GiB").unwrap().get_bytes() as usize,
+            index_count: DEFAULT_INDEX_COUNT,
+        })?)
    };

    match (
@ -318,13 +257,12 @@ fn open_or_create_database_unchecked(
 fn open_or_create_database(
    opt: &Opt,
    empty_db: bool,
-    cluster: Option<Cluster>,
 ) -> anyhow::Result<(IndexScheduler, AuthController)> {
    if !empty_db {
        check_version_file(&opt.db_path)?;
    }

-    open_or_create_database_unchecked(opt, cluster, OnFailure::KeepDb)
+    open_or_create_database_unchecked(opt, OnFailure::KeepDb)
 }

 fn import_dump(
--- a/meilisearch/src/option.rs
+++ b/meilisearch/src/option.rs
@ -12,7 +12,6 @@ use std::{env, fmt, fs};

 use byte_unit::{Byte, ByteError};
 use clap::Parser;
-use cluster::Consistency;
 use meilisearch_types::milli::update::IndexerConfig;
 use rustls::server::{
    AllowAnyAnonymousOrAuthenticatedClient, AllowAnyAuthenticatedClient, ServerSessionMemoryCache,
@ -298,10 +297,6 @@ pub struct Opt {
    #[clap(flatten)]
    pub indexer_options: IndexerOpts,

-    #[serde(flatten)]
-    #[clap(flatten)]
-    pub cluster_configuration: ClusterOpts,
-
    /// Set the path to a configuration file that should be used to setup the engine.
    /// Format must be TOML.
    #[clap(long)]
@ -390,7 +385,6 @@ impl Opt {
            #[cfg(all(not(debug_assertions), feature = "analytics"))]
            no_analytics,
            experimental_enable_metrics: enable_metrics_route,
-            cluster_configuration: _,
        } = self;
        export_to_env_if_not_present(MEILI_DB_PATH, db_path);
        export_to_env_if_not_present(MEILI_HTTP_ADDR, http_addr);
@ -524,21 +518,6 @@ impl IndexerOpts {
    }
 }

-#[derive(Debug, Default, Clone, Parser, Deserialize)]
-pub struct ClusterOpts {
-    #[clap(long)]
-    #[serde(default)]
-    pub experimental_enable_ha: Option<String>,
-
-    #[clap(long)]
-    #[serde(default)]
-    pub leader: Option<String>,
-
-    #[clap(long, default_value_t)]
-    #[serde(default)]
-    pub consistency: Consistency,
-}
-
 impl TryFrom<&IndexerOpts> for IndexerConfig {
    type Error = anyhow::Error;

--- a/meilisearch/src/routes/indexes/mod.rs
+++ b/meilisearch/src/routes/indexes/mod.rs
@ -100,6 +100,8 @@ pub async fn list_indexes(
            Ok(Some(IndexView::new(uid.to_string(), index)?))
        })?;
    // Won't cause to open all indexes because IndexView doesn't keep the `Index` opened.
+    // error when trying to fix it: the trait `ExactSizeIterator` is not implemented for `Flatten<IntoIter<Option<IndexView>>>`
+    #[allow(clippy::needless_collect)]
    let indexes: Vec<IndexView> = indexes.into_iter().flatten().collect();
    let ret = paginate.as_pagination().auto_paginate_sized(indexes.into_iter());

--- a/meilisearch/src/search.rs
+++ b/meilisearch/src/search.rs
@ -378,6 +378,11 @@ pub fn perform_search(
    let mut tokenizer_buidler = TokenizerBuilder::default();
    tokenizer_buidler.create_char_map(true);

+    let script_lang_map = index.script_language(&rtxn)?;
+    if !script_lang_map.is_empty() {
+        tokenizer_buidler.allow_list(&script_lang_map);
+    }
+
    let mut formatter_builder = MatcherBuilder::new(matching_words, tokenizer_buidler.build());
    formatter_builder.crop_marker(query.crop_marker);
    formatter_builder.highlight_prefix(query.highlight_pre_tag);
--- a/meilisearch/tests/auth/errors.rs
+++ b/meilisearch/tests/auth/errors.rs
@ -60,7 +60,7 @@ async fn create_api_key_bad_uid() {
    snapshot!(code, @"400 Bad Request");
    snapshot!(json_string!(response), @r###"
    {
-      "message": "Invalid value at `.uid`: invalid character: expected an optional prefix of `urn:uuid:` followed by [0-9a-fA-F-], found `o` at 2",
+      "message": "Invalid value at `.uid`: invalid character: expected an optional prefix of `urn:uuid:` followed by [0-9a-zA-Z], found `o` at 2",
      "code": "invalid_api_key_uid",
      "type": "invalid_request",
      "link": "https://docs.meilisearch.com/errors#invalid_api_key_uid"
--- a/meilisearch/tests/snapshot/mod.rs
+++ b/meilisearch/tests/snapshot/mod.rs
@ -1,8 +1,8 @@
 use std::time::Duration;

+use actix_rt::time::sleep;
 use meilisearch::option::ScheduleSnapshot;
 use meilisearch::Opt;
-use tokio::time::sleep;

 use crate::common::server::default_settings;
 use crate::common::{GetAllDocumentsOptions, Server};
@ -23,21 +23,20 @@ macro_rules! verify_snapshot {
                    };
                    let (snapshot, _) = test(snapshot.clone()).await;
                    let (orig, _) = test(orig.clone()).await;
-                    assert_eq!(snapshot, orig);
+                    assert_eq!(snapshot, orig, "Got \n{}\nWhile expecting:\n{}", serde_json::to_string_pretty(&snapshot).unwrap(), serde_json::to_string_pretty(&orig).unwrap());
                }
            )*
    };
 }

 #[actix_rt::test]
-#[ignore] // TODO: unignore
 async fn perform_snapshot() {
    let temp = tempfile::tempdir().unwrap();
    let snapshot_dir = tempfile::tempdir().unwrap();

    let options = Opt {
        snapshot_dir: snapshot_dir.path().to_owned(),
-        schedule_snapshot: ScheduleSnapshot::Enabled(1),
+        schedule_snapshot: ScheduleSnapshot::Enabled(2),
        ..default_settings(temp.path())
    };

@ -61,6 +60,16 @@ async fn perform_snapshot() {
    let temp = tempfile::tempdir().unwrap();

    let snapshot_path = snapshot_dir.path().to_owned().join("db.snapshot");
+    #[cfg_attr(windows, allow(unused))]
+    let snapshot_meta = std::fs::metadata(&snapshot_path).unwrap();
+
+    #[cfg(unix)]
+    {
+        use std::os::unix::fs::PermissionsExt;
+        let mode = snapshot_meta.permissions().mode();
+        //                                                    rwxrwxrwx
+        meili_snap::snapshot!(format!("{:b}", mode), @"1000000100100100");
+    }

    let options = Opt { import_snapshot: Some(snapshot_path), ..default_settings(temp.path()) };

@ -71,7 +80,10 @@ async fn perform_snapshot() {
        // for some reason the db sizes differ. this may be due to the compaction options we have
        // set when performing the snapshot
        //server.stats(),
-        server.tasks(),
+
+        // The original instance contains the snapshotCreation task, while the snapshotted-instance does not. For this reason we need to compare the task queue **after** the task 4
+        server.tasks_filter("?from=2"),
+
        server.index("test").get_all_documents(GetAllDocumentsOptions::default()),
        server.index("test").settings(),
        server.index("test1").get_all_documents(GetAllDocumentsOptions::default()),
--- a/milli/src/index.rs
+++ b/milli/src/index.rs
@ -1211,11 +1211,22 @@ impl Index {
        let soft_deleted_documents = self.soft_deleted_documents_ids(rtxn)?;

        let mut script_language: HashMap<Script, Vec<Language>> = HashMap::new();
+        let mut script_language_doc_count: Vec<(Script, Language, u64)> = Vec::new();
+        let mut total = 0;
        for sl in self.script_language_docids.iter(rtxn)? {
            let ((script, language), docids) = sl?;

            // keep only Languages that contains at least 1 document.
-            if !soft_deleted_documents.is_superset(&docids) {
+            let remaining_documents_count = (docids - &soft_deleted_documents).len();
+            total += remaining_documents_count;
+            if remaining_documents_count > 0 {
+                script_language_doc_count.push((script, language, remaining_documents_count));
+            }
+        }
+
+        let threshold = total / 20; // 5% (arbitrar)
+        for (script, language, count) in script_language_doc_count {
+            if count > threshold {
                if let Some(languages) = script_language.get_mut(&script) {
                    (*languages).push(language);
                } else {
--- a/milli/src/update/index_documents/extract/extract_docid_word_positions.rs
+++ b/milli/src/update/index_documents/extract/extract_docid_word_positions.rs
@ -3,12 +3,14 @@ use std::convert::TryInto;
 use std::fs::File;
 use std::{io, mem, str};

-use charabia::{Language, Script, SeparatorKind, Token, TokenKind, TokenizerBuilder};
+use charabia::{Language, Script, SeparatorKind, Token, TokenKind, Tokenizer, TokenizerBuilder};
+use obkv::KvReader;
 use roaring::RoaringBitmap;
 use serde_json::Value;

 use super::helpers::{concat_u32s_array, create_sorter, sorter_into_reader, GrenadParameters};
 use crate::error::{InternalError, SerializationError};
+use crate::update::index_documents::MergeFn;
 use crate::{
    absolute_from_relative_position, FieldId, Result, MAX_POSITION_PER_ATTRIBUTE, MAX_WORD_LENGTH,
 };
@ -33,7 +35,7 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
    let max_memory = indexer.max_memory_by_thread();

    let mut documents_ids = RoaringBitmap::new();
-    let mut script_language_pair = HashMap::new();
+    let mut script_language_docids = HashMap::new();
    let mut docid_word_positions_sorter = create_sorter(
        grenad::SortAlgorithm::Stable,
        concat_u32s_array,
@ -45,11 +47,11 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(

    let mut key_buffer = Vec::new();
    let mut field_buffer = String::new();
-    let mut builder = TokenizerBuilder::new();
+    let mut tokenizer_builder = TokenizerBuilder::new();
    if let Some(stop_words) = stop_words {
-        builder.stop_words(stop_words);
+        tokenizer_builder.stop_words(stop_words);
    }
-    let tokenizer = builder.build();
+    let tokenizer = tokenizer_builder.build();

    let mut cursor = obkv_documents.into_cursor()?;
    while let Some((key, value)) = cursor.move_on_next()? {
@ -57,49 +59,121 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
            .try_into()
            .map(u32::from_be_bytes)
            .map_err(|_| SerializationError::InvalidNumberSerialization)?;
-        let obkv = obkv::KvReader::<FieldId>::new(value);
+        let obkv = KvReader::<FieldId>::new(value);

        documents_ids.push(document_id);
        key_buffer.clear();
        key_buffer.extend_from_slice(&document_id.to_be_bytes());

-        for (field_id, field_bytes) in obkv.iter() {
-            if searchable_fields.as_ref().map_or(true, |sf| sf.contains(&field_id)) {
-                let value =
-                    serde_json::from_slice(field_bytes).map_err(InternalError::SerdeJson)?;
-                field_buffer.clear();
-                if let Some(field) = json_to_string(&value, &mut field_buffer) {
-                    let tokens = process_tokens(tokenizer.tokenize(field))
-                        .take_while(|(p, _)| (*p as u32) < max_positions_per_attributes);
+        let mut script_language_word_count = HashMap::new();

-                    for (index, token) in tokens {
-                        if let Some(language) = token.language {
-                            let script = token.script;
-                            let entry = script_language_pair
-                                .entry((script, language))
-                                .or_insert_with(RoaringBitmap::new);
-                            entry.push(document_id);
-                        }
-                        let token = token.lemma().trim();
-                        if !token.is_empty() && token.len() <= MAX_WORD_LENGTH {
-                            key_buffer.truncate(mem::size_of::<u32>());
-                            key_buffer.extend_from_slice(token.as_bytes());
+        extract_tokens_from_document(
+            &obkv,
+            searchable_fields,
+            &tokenizer,
+            max_positions_per_attributes,
+            &mut key_buffer,
+            &mut field_buffer,
+            &mut script_language_word_count,
+            &mut docid_word_positions_sorter,
+        )?;

-                            let position: u16 = index
-                                .try_into()
-                                .map_err(|_| SerializationError::InvalidNumberSerialization)?;
-                            let position = absolute_from_relative_position(field_id, position);
-                            docid_word_positions_sorter
-                                .insert(&key_buffer, position.to_ne_bytes())?;
+        // if we detect a potetial mistake in the language detection,
+        // we rerun the extraction forcing the tokenizer to detect the most frequently detected Languages.
+        // context: https://github.com/meilisearch/meilisearch/issues/3565
+        if script_language_word_count.values().any(potential_language_detection_error) {
+            // build an allow list with the most frequent detected languages in the document.
+            let script_language: HashMap<_, _> =
+                script_language_word_count.iter().filter_map(most_frequent_languages).collect();
+
+            // if the allow list is empty, meaning that no Language is considered frequent,
+            // then we don't rerun the extraction.
+            if !script_language.is_empty() {
+                // build a new temporar tokenizer including the allow list.
+                let mut tokenizer_builder = TokenizerBuilder::new();
+                if let Some(stop_words) = stop_words {
+                    tokenizer_builder.stop_words(stop_words);
+                }
+                tokenizer_builder.allow_list(&script_language);
+                let tokenizer = tokenizer_builder.build();
+
+                script_language_word_count.clear();
+
+                // rerun the extraction.
+                extract_tokens_from_document(
+                    &obkv,
+                    searchable_fields,
+                    &tokenizer,
+                    max_positions_per_attributes,
+                    &mut key_buffer,
+                    &mut field_buffer,
+                    &mut script_language_word_count,
+                    &mut docid_word_positions_sorter,
+                )?;
+            }
+        }
+
+        for (script, languages_frequency) in script_language_word_count {
+            for (language, _) in languages_frequency {
+                let entry = script_language_docids
+                    .entry((script, language))
+                    .or_insert_with(RoaringBitmap::new);
+                entry.push(document_id);
+            }
+        }
+    }
+
+    sorter_into_reader(docid_word_positions_sorter, indexer)
+        .map(|reader| (documents_ids, reader, script_language_docids))
+}
+
+#[allow(clippy::too_many_arguments)]
+fn extract_tokens_from_document<T: AsRef<[u8]>>(
+    obkv: &KvReader<FieldId>,
+    searchable_fields: &Option<HashSet<FieldId>>,
+    tokenizer: &Tokenizer<T>,
+    max_positions_per_attributes: u32,
+    key_buffer: &mut Vec<u8>,
+    field_buffer: &mut String,
+    script_language_word_count: &mut HashMap<Script, Vec<(Language, usize)>>,
+    docid_word_positions_sorter: &mut grenad::Sorter<MergeFn>,
+) -> Result<()> {
+    for (field_id, field_bytes) in obkv.iter() {
+        if searchable_fields.as_ref().map_or(true, |sf| sf.contains(&field_id)) {
+            let value = serde_json::from_slice(field_bytes).map_err(InternalError::SerdeJson)?;
+            field_buffer.clear();
+            if let Some(field) = json_to_string(&value, field_buffer) {
+                let tokens = process_tokens(tokenizer.tokenize(field))
+                    .take_while(|(p, _)| (*p as u32) < max_positions_per_attributes);
+
+                for (index, token) in tokens {
+                    // if a language has been detected for the token, we update the counter.
+                    if let Some(language) = token.language {
+                        let script = token.script;
+                        let entry =
+                            script_language_word_count.entry(script).or_insert_with(Vec::new);
+                        match entry.iter_mut().find(|(l, _)| *l == language) {
+                            Some((_, n)) => *n += 1,
+                            None => entry.push((language, 1)),
                        }
                    }
+                    let token = token.lemma().trim();
+                    if !token.is_empty() && token.len() <= MAX_WORD_LENGTH {
+                        key_buffer.truncate(mem::size_of::<u32>());
+                        key_buffer.extend_from_slice(token.as_bytes());
+
+                        let position: u16 = index
+                            .try_into()
+                            .map_err(|_| SerializationError::InvalidNumberSerialization)?;
+                        let position = absolute_from_relative_position(field_id, position);
+                        docid_word_positions_sorter.insert(&key_buffer, position.to_ne_bytes())?;
+                    }
                }
            }
        }
    }

-    sorter_into_reader(docid_word_positions_sorter, indexer)
-        .map(|reader| (documents_ids, reader, script_language_pair))
+    Ok(())
 }

 /// Transform a JSON value into a string that can be indexed.
@ -183,3 +257,36 @@ fn process_tokens<'a>(
        })
        .filter(|(_, t)| t.is_word())
 }
+
+fn potential_language_detection_error(languages_frequency: &Vec<(Language, usize)>) -> bool {
+    if languages_frequency.len() > 1 {
+        let threshold = compute_laguage_frequency_threshold(languages_frequency);
+        languages_frequency.iter().any(|(_, c)| *c <= threshold)
+    } else {
+        false
+    }
+}
+
+fn most_frequent_languages(
+    (script, languages_frequency): (&Script, &Vec<(Language, usize)>),
+) -> Option<(Script, Vec<Language>)> {
+    if languages_frequency.len() > 1 {
+        let threshold = compute_laguage_frequency_threshold(languages_frequency);
+
+        let languages: Vec<_> =
+            languages_frequency.iter().filter(|(_, c)| *c > threshold).map(|(l, _)| *l).collect();
+
+        if languages.is_empty() {
+            None
+        } else {
+            Some((*script, languages))
+        }
+    } else {
+        None
+    }
+}
+
+fn compute_laguage_frequency_threshold(languages_frequency: &[(Language, usize)]) -> usize {
+    let total: usize = languages_frequency.iter().map(|(_, c)| c).sum();
+    total / 10 // 10% is a completely arbitrar value.
+}
Author	SHA1	Message	Date
ManyTheFish	7e2fd82e41	Use Language allow list in the highlighter	2023-03-08 12:44:16 +01:00
ManyTheFish	24c0775c67	Change indexing threshold	2023-03-08 12:36:04 +01:00
ManyTheFish	3092cf0448	Fix clippy errors	2023-03-08 10:53:42 +01:00
ManyTheFish	37d4551e8e	Add a threshold filtering the Languages allowed to be detected at search time	2023-03-07 19:38:01 +01:00
ManyTheFish	da48506f15	Rerun extraction when language detection might have failed	2023-03-07 18:35:26 +01:00
bors[bot]	370d88f626	Merge #3561 3561: Fix the snapshots permissions on unix system r=irevoire a=irevoire # Pull Request ## Related issue Fixes https://github.com/meilisearch/meilisearch/issues/3507 The snapshot permissions were wrong after the v0.30 and the huge refacto of the index scheduler. Fix this issue + add a test on the permissions on unix Co-authored-by: Tamo <tamo@meilisearch.com>	2023-03-07 08:51:38 +00:00
Tamo	d34faa8f9c	put back the sleep as it was and fix the from	2023-03-06 18:09:09 +01:00
Tamo	e5d0bef6d8	update a comment	2023-03-06 17:04:24 +01:00
Tamo	e704728ee7	fix the snapshots permissions on unix system	2023-03-06 16:28:40 +01:00