improve the task deletion slightly.

Deleting 10M tasks went from 19.8s to 16.0s. Deleting 1M tasks went from 1.6s to 1.5s.
Merge #3624
2025-12-05 12:15:42 +00:00 · 2023-03-30 17:49:21 +02:00 · 2023-03-29 13:40:25 +00:00 · 2023-03-29 14:44:15 +02:00 · 2023-03-29 14:27:40 +02:00 · 2023-03-29 09:07:51 +00:00
20 changed files with 503 additions and 293 deletions
--- a/.github/workflows/benchmarks-manual.yml
+++ b/.github/workflows/benchmarks-manual.yml
@@ -1,4 +1,4 @@
-name: Benchmarks
+name: Benchmarks (manual)

 on:
  workflow_dispatch:
--- a/.github/workflows/benchmarks-push-indexing.yml
+++ b/.github/workflows/benchmarks-push-indexing.yml
@@ -1,4 +1,4 @@
-name: Benchmarks indexing (push)
+name: Benchmarks of indexing (push)

 on:
  push:
--- a/.github/workflows/benchmarks-push-search-geo.yml
+++ b/.github/workflows/benchmarks-push-search-geo.yml
@@ -1,4 +1,4 @@
-name: Benchmarks search geo (push)
+name: Benchmarks of search for geo (push)

 on:
  push:
--- a/.github/workflows/benchmarks-push-search-songs.yml
+++ b/.github/workflows/benchmarks-push-search-songs.yml
@@ -1,4 +1,4 @@
-name: Benchmarks search songs (push)
+name: Benchmarks of search for songs (push)

 on:
  push:
--- a/.github/workflows/benchmarks-push-search-wiki.yml
+++ b/.github/workflows/benchmarks-push-search-wiki.yml
@@ -1,4 +1,4 @@
-name: Benchmarks search wikipedia articles (push)
+name: Benchmarks of search for Wikipedia articles (push)

 on:
  push:
--- a/.github/workflows/create-issue-dependencies.yml
+++ b/.github/workflows/create-issue-dependencies.yml
@@ -1,28 +0,0 @@
-name: Create issue to upgrade dependencies
-on:
-  schedule:
-    # Run the first of the month, every 3 month
-    - cron: '0 0 1 */3 *'
-  workflow_dispatch:
-
-jobs:
-  create-issue:
-    runs-on: ubuntu-latest
-    steps:
-    - uses: actions/checkout@v3
-    - name: Create an issue
-      uses: actions-ecosystem/action-create-issue@v1
-      with:
-        github_token: ${{ secrets.MEILI_BOT_GH_PAT }}
-        title: Upgrade dependencies
-        body: |
-          This issue is about updating Meilisearch dependencies:
-          - [ ] Cargo toml dependencies of Meilisearch; but also the main engine-team repositories that Meilisearch depends on (charabia, heed...)
-          - [ ] If new Rust versions have been released, update the Rust version in the Clippy job of this [GitHub Action file](./.github/workflows/rust.yml)
-
-          ⚠️ To avoid last minute bugs, this issue should only be done at the beginning of the sprint!
-
-          The GitHub action dependencies are managed by [Dependabot](./.github/dependabot.yml)
-        labels: |
-          dependencies
-          maintenance
--- a/.github/workflows/dependency-issue.yml
+++ b/.github/workflows/dependency-issue.yml
@@ -0,0 +1,24 @@
+name: Create issue to upgrade dependencies
+
+on:
+  schedule:
+    # Run the first of the month, every 3 month
+    - cron: '0 0 1 */3 *'
+  workflow_dispatch:
+
+jobs:
+  create-issue:
+    runs-on: ubuntu-latest
+    env:
+      ISSUE_TEMPLATE: issue-template.md
+      GH_TOKEN: ${{ secrets.MEILI_BOT_GH_PAT }}
+    steps:
+    - uses: actions/checkout@v3
+    - name: Download the issue template
+      run: curl -s https://raw.githubusercontent.com/meilisearch/engine-team/main/issue-templates/dependency-issue.md > $ISSUE_TEMPLATE
+    - name: Create issue
+      run: |
+        gh issue create \
+          --title 'Upgrade dependencies' \
+          --label 'dependencies,maintenance' \
+          --body-file $ISSUE_TEMPLATE
--- a/.github/workflows/flaky-tests.yml
+++ b/.github/workflows/flaky-tests.yml
--- a/.github/workflows/publish-apt-brew-pkg.yml
+++ b/.github/workflows/publish-apt-brew-pkg.yml
@@ -1,4 +1,4 @@
-name: Publish to APT repository & Homebrew
+name: Publish to APT & Homebrew

 on:
  release:
@@ -35,7 +35,7 @@ jobs:
    - name: Build deb package
      run: cargo deb -p meilisearch -o target/debian/meilisearch.deb
    - name: Upload debian pkg to release
-      uses: svenstaro/upload-release-action@2.4.0
+      uses: svenstaro/upload-release-action@2.5.0
      with:
        repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
        file: target/debian/meilisearch.deb
--- a/.github/workflows/publish-binaries.yml
+++ b/.github/workflows/publish-binaries.yml
@@ -1,3 +1,5 @@
+name: Publish binaries to GitHub release
+
 on:
  workflow_dispatch:
  schedule:
@@ -5,8 +7,6 @@ on:
  release:
    types: [published]

-name: Publish binaries to release
-
 jobs:
  check-version:
    name: Check the version validity
@@ -54,7 +54,7 @@ jobs:
    # No need to upload binaries for dry run (cron)
    - name: Upload binaries to release
      if: github.event_name == 'release'
-      uses: svenstaro/upload-release-action@2.4.0
+      uses: svenstaro/upload-release-action@2.5.0
      with:
        repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
        file: target/release/meilisearch
@@ -87,7 +87,7 @@ jobs:
    # No need to upload binaries for dry run (cron)
    - name: Upload binaries to release
      if: github.event_name == 'release'
-      uses: svenstaro/upload-release-action@2.4.0
+      uses: svenstaro/upload-release-action@2.5.0
      with:
        repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
        file: target/release/${{ matrix.artifact_name }}
@@ -123,7 +123,7 @@ jobs:
      - name: Upload the binary to release
        # No need to upload binaries for dry run (cron)
        if: github.event_name == 'release'
-        uses: svenstaro/upload-release-action@2.4.0
+        uses: svenstaro/upload-release-action@2.5.0
        with:
          repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
          file: target/${{ matrix.target }}/release/meilisearch
@@ -183,7 +183,7 @@ jobs:
      - name: Upload the binary to release
        # No need to upload binaries for dry run (cron)
        if: github.event_name == 'release'
-        uses: svenstaro/upload-release-action@2.4.0
+        uses: svenstaro/upload-release-action@2.5.0
        with:
          repo_token: ${{ secrets.MEILI_BOT_GH_PAT }}
          file: target/${{ matrix.target }}/release/meilisearch
--- a/.github/workflows/publish-docker-images.yml
+++ b/.github/workflows/publish-docker-images.yml
@@ -1,4 +1,5 @@
---
+name: Publish images to Docker Hub
+
 on:
  push:
    # Will run for every tag pushed except `latest`
@@ -12,8 +13,6 @@ on:
    - cron: '0 23 * * *' # Every day at 11:00pm
  workflow_dispatch:

-name: Publish tagged images to Docker Hub
-
 jobs:
  docker:
    runs-on: docker
--- a/.github/workflows/test-suite.yml
+++ b/.github/workflows/test-suite.yml
@@ -1,4 +1,4 @@
-name: Rust
+name: Test suite

 on:
  workflow_dispatch:
@@ -25,36 +25,35 @@ jobs:
      # Use ubuntu-18.04 to compile with glibc 2.27, which are the production expectations
      image: ubuntu:18.04
    steps:
-    - uses: actions/checkout@v3
-    - name: Install needed dependencies
-      run: |
-        apt-get update && apt-get install -y curl
-        apt-get install build-essential -y
-    - name: Run test with Rust stable
-      if: github.event_name != 'schedule'
-      uses: actions-rs/toolchain@v1
-      with:
-        toolchain: stable
-        override: true
-    - name: Run test with Rust nightly
-      if: github.event_name == 'schedule'
-      uses: actions-rs/toolchain@v1
-      with:
-        toolchain: nightly
-        override: true
-    # Disable cache due to disk space issues with Windows workers in CI
-    # - name: Cache dependencies
-    #   uses: Swatinem/rust-cache@v2.2.0
-    - name: Run cargo check without any default features
-      uses: actions-rs/cargo@v1
-      with:
-        command: build
-        args: --locked --release --no-default-features --all
-    - name: Run cargo test
-      uses: actions-rs/cargo@v1
-      with:
-        command: test
-        args: --locked --release --all
+      - uses: actions/checkout@v3
+      - name: Install needed dependencies
+        run: |
+          apt-get update && apt-get install -y curl
+          apt-get install build-essential -y
+      - name: Run test with Rust stable
+        if: github.event_name != 'schedule'
+        uses: actions-rs/toolchain@v1
+        with:
+          toolchain: stable
+          override: true
+      - name: Run test with Rust nightly
+        if: github.event_name == 'schedule'
+        uses: actions-rs/toolchain@v1
+        with:
+          toolchain: nightly
+          override: true
+      - name: Cache dependencies
+        uses: Swatinem/rust-cache@v2.2.0
+      - name: Run cargo check without any default features
+        uses: actions-rs/cargo@v1
+        with:
+          command: build
+          args: --locked --release --no-default-features --all
+      - name: Run cargo test
+        uses: actions-rs/cargo@v1
+        with:
+          command: test
+          args: --locked --release --all

  test-others:
    name: Tests on ${{ matrix.os }}
@@ -64,19 +63,47 @@ jobs:
      matrix:
        os: [macos-12, windows-2022]
    steps:
-    - uses: actions/checkout@v3
-#     - name: Cache dependencies
-#       uses: Swatinem/rust-cache@v2.2.0
-    - name: Run cargo check without any default features
-      uses: actions-rs/cargo@v1
-      with:
-        command: build
-        args: --locked --release --no-default-features --all
-    - name: Run cargo test
-      uses: actions-rs/cargo@v1
-      with:
-        command: test
-        args: --locked --release --all
+      - uses: actions/checkout@v3
+      - name: Cache dependencies
+        uses: Swatinem/rust-cache@v2.2.0
+      - name: Run cargo check without any default features
+        uses: actions-rs/cargo@v1
+        with:
+          command: build
+          args: --locked --release --no-default-features --all
+      - name: Run cargo test
+        uses: actions-rs/cargo@v1
+        with:
+          command: test
+          args: --locked --release --all
+
+  test-all-features:
+    name: Tests all features on cron schedule only
+    runs-on: ubuntu-latest
+    container:
+      # Use ubuntu-18.04 to compile with glibc 2.27, which are the production expectations
+      image: ubuntu:18.04
+    if: github.event_name == 'schedule'
+    steps:
+      - uses: actions/checkout@v3
+      - name: Install needed dependencies
+        run: |
+          apt-get update
+          apt-get install --assume-yes build-essential curl
+      - uses: actions-rs/toolchain@v1
+        with:
+          toolchain: stable
+          override: true
+      - name: Run cargo build with all features
+        uses: actions-rs/cargo@v1
+        with:
+          command: build
+          args: --workspace --locked --release --all-features
+      - name: Run cargo test with all features
+        uses: actions-rs/cargo@v1
+        with:
+          command: test
+          args: --workspace --locked --release --all-features

  # We run tests in debug also, to make sure that the debug_assertions are hit
  test-debug:
@@ -95,8 +122,8 @@ jobs:
        with:
          toolchain: stable
          override: true
-      # - name: Cache dependencies
-      #   uses: Swatinem/rust-cache@v2.2.0
+      - name: Cache dependencies
+        uses: Swatinem/rust-cache@v2.2.0
      - name: Run tests in debug
        uses: actions-rs/cargo@v1
        with:
@@ -114,8 +141,8 @@ jobs:
          toolchain: 1.67.0
          override: true
          components: clippy
-      # - name: Cache dependencies
-      #   uses: Swatinem/rust-cache@v2.2.0
+      - name: Cache dependencies
+        uses: Swatinem/rust-cache@v2.2.0
      - name: Run cargo clippy
        uses: actions-rs/cargo@v1
        with:
@@ -134,8 +161,8 @@ jobs:
          toolchain: nightly
          override: true
          components: rustfmt
-      # - name: Cache dependencies
-      #   uses: Swatinem/rust-cache@v2.2.0
+      - name: Cache dependencies
+        uses: Swatinem/rust-cache@v2.2.0
      - name: Run cargo fmt
        # Since we never ran the `build.rs` script in the benchmark directory we are missing one auto-generated import file.
        # Since we want to trigger (and fail) this action as fast as possible, instead of building the benchmark crate
--- a/.github/workflows/uffizzi-build.yml
+++ b/.github/workflows/uffizzi-build.yml
@@ -23,7 +23,7 @@ jobs:
          target: x86_64-unknown-linux-musl

      - name: Cache dependencies
-        uses: Swatinem/rust-cache@v2.2.0
+        uses: Swatinem/rust-cache@v2.2.1

      - name: Run cargo check without any default features
        uses: actions-rs/cargo@v1
@@ -46,14 +46,14 @@ jobs:

      - name: Docker metadata
        id: meta
-        uses: docker/metadata-action@v3
+        uses: docker/metadata-action@v4
        with:
          images: registry.uffizzi.com/${{ env.UUID_TAG }}
          tags: | 
            type=raw,value=60d

      - name: Build Image
-        uses: docker/build-push-action@v3
+        uses: docker/build-push-action@v4
        with:
          context: ./
          file: .github/uffizzi/Dockerfile
--- a/index-scheduler/src/batch.rs
+++ b/index-scheduler/src/batch.rs
@@ -17,7 +17,7 @@ tasks individally, but should be much faster since we are only performing
 one indexing operation.
 */

-use std::collections::{BTreeSet, HashSet};
+use std::collections::BTreeSet;
 use std::ffi::OsStr;
 use std::fs::{self, File};
 use std::io::BufWriter;
@@ -1278,19 +1278,56 @@ impl IndexScheduler {
        to_delete_tasks -= processing_tasks;
        to_delete_tasks -= enqueued_tasks;

-        // 2. We now have a list of tasks to delete, delete them
+        let mut indexes = self.index_tasks.iter_mut(wtxn)?;
+        while let Some((key, bitmap)) = indexes.next().transpose()? {
+            let ret = &bitmap - &to_delete_tasks;
+            if bitmap.len() != ret.len() {
+                if ret.is_empty() {
+                    // safe because we don't own any references to the db
+                    unsafe {
+                        indexes.del_current()?;
+                    }
+                } else {
+                    let key = key.to_string();
+                    // safe because the key has been copied
+                    unsafe {
+                        indexes.put_current(&key, &ret)?;
+                    }
+                }
+            }
+        }
+        drop(indexes);

-        let mut affected_indexes = HashSet::new();
-        let mut affected_statuses = HashSet::new();
-        let mut affected_kinds = HashSet::new();
+        let mut statuses = self.status.iter_mut(wtxn)?;
+        while let Some((key, bitmap)) = statuses.next().transpose()? {
+            let ret = &bitmap - &to_delete_tasks;
+            if bitmap.len() != ret.len() {
+                // safe because the key has been copied
+                unsafe {
+                    statuses.put_current(&key, &ret)?;
+                }
+            }
+        }
+        drop(statuses);
+
+        let mut kinds = self.kind.iter_mut(wtxn)?;
+        while let Some((key, bitmap)) = kinds.next().transpose()? {
+            let ret = &bitmap - &to_delete_tasks;
+            if bitmap.len() != ret.len() {
+                // safe because the key has been copied
+                unsafe {
+                    kinds.put_current(&key, &ret)?;
+                }
+            }
+        }
+        drop(kinds);
+
+        // 2. We now have a list of tasks to delete, delete them
        let mut affected_canceled_by = RoaringBitmap::new();

        for task_id in to_delete_tasks.iter() {
            let task = self.get_task(wtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;

-            affected_indexes.extend(task.indexes().into_iter().map(|x| x.to_owned()));
-            affected_statuses.insert(task.status);
-            affected_kinds.insert(task.kind.as_kind());
            // Note: don't delete the persisted task data since
            // we can only delete succeeded, failed, and canceled tasks.
            // In each of those cases, the persisted data is supposed to
@@ -1307,18 +1344,6 @@ impl IndexScheduler {
            }
        }

-        for index in affected_indexes {
-            self.update_index(wtxn, &index, |bitmap| *bitmap -= &to_delete_tasks)?;
-        }
-
-        for status in affected_statuses {
-            self.update_status(wtxn, status, |bitmap| *bitmap -= &to_delete_tasks)?;
-        }
-
-        for kind in affected_kinds {
-            self.update_kind(wtxn, kind, |bitmap| *bitmap -= &to_delete_tasks)?;
-        }
-
        for task in to_delete_tasks.iter() {
            self.all_tasks.delete(wtxn, &BEU32::new(task))?;
        }
--- a/index-scheduler/src/lib.rs
+++ b/index-scheduler/src/lib.rs
@@ -31,6 +31,7 @@ mod uuid_codec;
 pub type Result<T> = std::result::Result<T, Error>;
 pub type TaskId = u32;

+use std::collections::HashMap;
 use std::ops::{Bound, RangeBounds};
 use std::path::{Path, PathBuf};
 use std::sync::atomic::AtomicBool;
@@ -43,7 +44,7 @@ pub use error::Error;
 use file_store::FileStore;
 use meilisearch_types::error::ResponseError;
 use meilisearch_types::heed::types::{OwnedType, SerdeBincode, SerdeJson, Str};
-use meilisearch_types::heed::{self, Database, Env, RoTxn};
+use meilisearch_types::heed::{self, Database, Env, RoTxn, RwTxn};
 use meilisearch_types::milli;
 use meilisearch_types::milli::documents::DocumentsBatchBuilder;
 use meilisearch_types::milli::update::IndexerConfig;
@@ -883,115 +884,8 @@ impl IndexScheduler {

    /// Register a new task coming from a dump in the scheduler.
    /// By taking a mutable ref we're pretty sure no one will ever import a dump while actix is running.
-    pub fn register_dumped_task(
-        &mut self,
-        task: TaskDump,
-        content_file: Option<Box<UpdateFile>>,
-    ) -> Result<Task> {
-        // Currently we don't need to access the tasks queue while loading a dump thus I can block everything.
-        let mut wtxn = self.env.write_txn()?;
-
-        let content_uuid = match content_file {
-            Some(content_file) if task.status == Status::Enqueued => {
-                let (uuid, mut file) = self.create_update_file()?;
-                let mut builder = DocumentsBatchBuilder::new(file.as_file_mut());
-                for doc in content_file {
-                    builder.append_json_object(&doc?)?;
-                }
-                builder.into_inner()?;
-                file.persist()?;
-
-                Some(uuid)
-            }
-            // If the task isn't `Enqueued` then just generate a recognisable `Uuid`
-            // in case we try to open it later.
-            _ if task.status != Status::Enqueued => Some(Uuid::nil()),
-            _ => None,
-        };
-
-        let task = Task {
-            uid: task.uid,
-            enqueued_at: task.enqueued_at,
-            started_at: task.started_at,
-            finished_at: task.finished_at,
-            error: task.error,
-            canceled_by: task.canceled_by,
-            details: task.details,
-            status: task.status,
-            kind: match task.kind {
-                KindDump::DocumentImport {
-                    primary_key,
-                    method,
-                    documents_count,
-                    allow_index_creation,
-                } => KindWithContent::DocumentAdditionOrUpdate {
-                    index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
-                    primary_key,
-                    method,
-                    content_file: content_uuid.ok_or(Error::CorruptedDump)?,
-                    documents_count,
-                    allow_index_creation,
-                },
-                KindDump::DocumentDeletion { documents_ids } => KindWithContent::DocumentDeletion {
-                    documents_ids,
-                    index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
-                },
-                KindDump::DocumentClear => KindWithContent::DocumentClear {
-                    index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
-                },
-                KindDump::Settings { settings, is_deletion, allow_index_creation } => {
-                    KindWithContent::SettingsUpdate {
-                        index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
-                        new_settings: settings,
-                        is_deletion,
-                        allow_index_creation,
-                    }
-                }
-                KindDump::IndexDeletion => KindWithContent::IndexDeletion {
-                    index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
-                },
-                KindDump::IndexCreation { primary_key } => KindWithContent::IndexCreation {
-                    index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
-                    primary_key,
-                },
-                KindDump::IndexUpdate { primary_key } => KindWithContent::IndexUpdate {
-                    index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
-                    primary_key,
-                },
-                KindDump::IndexSwap { swaps } => KindWithContent::IndexSwap { swaps },
-                KindDump::TaskCancelation { query, tasks } => {
-                    KindWithContent::TaskCancelation { query, tasks }
-                }
-                KindDump::TasksDeletion { query, tasks } => {
-                    KindWithContent::TaskDeletion { query, tasks }
-                }
-                KindDump::DumpCreation { keys, instance_uid } => {
-                    KindWithContent::DumpCreation { keys, instance_uid }
-                }
-                KindDump::SnapshotCreation => KindWithContent::SnapshotCreation,
-            },
-        };
-
-        self.all_tasks.put(&mut wtxn, &BEU32::new(task.uid), &task)?;
-
-        for index in task.indexes() {
-            self.update_index(&mut wtxn, index, |bitmap| {
-                bitmap.insert(task.uid);
-            })?;
-        }
-
-        self.update_status(&mut wtxn, task.status, |bitmap| {
-            bitmap.insert(task.uid);
-        })?;
-
-        self.update_kind(&mut wtxn, task.kind.as_kind(), |bitmap| {
-            (bitmap.insert(task.uid));
-        })?;
-
-        wtxn.commit()?;
-        self.wake_up.signal();
-
-        Ok(task)
+    pub fn register_dumped_task(&mut self) -> Result<Dump> {
+        Dump::new(self)
    }

    /// Create a new index without any associated task.
@@ -1218,6 +1112,156 @@ impl IndexScheduler {
    }
 }

+pub struct Dump<'a> {
+    index_scheduler: &'a IndexScheduler,
+    wtxn: RwTxn<'a, 'a>,
+
+    indexes: HashMap<String, RoaringBitmap>,
+    statuses: HashMap<Status, RoaringBitmap>,
+    kinds: HashMap<Kind, RoaringBitmap>,
+}
+
+impl<'a> Dump<'a> {
+    pub(crate) fn new(index_scheduler: &'a mut IndexScheduler) -> Result<Self> {
+        // While loading a dump no one should be able to access the scheduler thus I can block everything.
+        let wtxn = index_scheduler.env.write_txn()?;
+
+        Ok(Dump {
+            index_scheduler,
+            wtxn,
+            indexes: HashMap::new(),
+            statuses: HashMap::new(),
+            kinds: HashMap::new(),
+        })
+    }
+
+    /// Register a new task coming from a dump in the scheduler.
+    /// By taking a mutable ref we're pretty sure no one will ever import a dump while actix is running.
+    pub fn register_dumped_task(
+        &mut self,
+        task: TaskDump,
+        content_file: Option<Box<UpdateFile>>,
+    ) -> Result<Task> {
+        let content_uuid = match content_file {
+            Some(content_file) if task.status == Status::Enqueued => {
+                let (uuid, mut file) = self.index_scheduler.create_update_file()?;
+                let mut builder = DocumentsBatchBuilder::new(file.as_file_mut());
+                for doc in content_file {
+                    builder.append_json_object(&doc?)?;
+                }
+                builder.into_inner()?;
+                file.persist()?;
+
+                Some(uuid)
+            }
+            // If the task isn't `Enqueued` then just generate a recognisable `Uuid`
+            // in case we try to open it later.
+            _ if task.status != Status::Enqueued => Some(Uuid::nil()),
+            _ => None,
+        };
+
+        let task = Task {
+            uid: task.uid,
+            enqueued_at: task.enqueued_at,
+            started_at: task.started_at,
+            finished_at: task.finished_at,
+            error: task.error,
+            canceled_by: task.canceled_by,
+            details: task.details,
+            status: task.status,
+            kind: match task.kind {
+                KindDump::DocumentImport {
+                    primary_key,
+                    method,
+                    documents_count,
+                    allow_index_creation,
+                } => KindWithContent::DocumentAdditionOrUpdate {
+                    index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
+                    primary_key,
+                    method,
+                    content_file: content_uuid.ok_or(Error::CorruptedDump)?,
+                    documents_count,
+                    allow_index_creation,
+                },
+                KindDump::DocumentDeletion { documents_ids } => KindWithContent::DocumentDeletion {
+                    documents_ids,
+                    index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
+                },
+                KindDump::DocumentClear => KindWithContent::DocumentClear {
+                    index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
+                },
+                KindDump::Settings { settings, is_deletion, allow_index_creation } => {
+                    KindWithContent::SettingsUpdate {
+                        index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
+                        new_settings: settings,
+                        is_deletion,
+                        allow_index_creation,
+                    }
+                }
+                KindDump::IndexDeletion => KindWithContent::IndexDeletion {
+                    index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
+                },
+                KindDump::IndexCreation { primary_key } => KindWithContent::IndexCreation {
+                    index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
+                    primary_key,
+                },
+                KindDump::IndexUpdate { primary_key } => KindWithContent::IndexUpdate {
+                    index_uid: task.index_uid.ok_or(Error::CorruptedDump)?,
+                    primary_key,
+                },
+                KindDump::IndexSwap { swaps } => KindWithContent::IndexSwap { swaps },
+                KindDump::TaskCancelation { query, tasks } => {
+                    KindWithContent::TaskCancelation { query, tasks }
+                }
+                KindDump::TasksDeletion { query, tasks } => {
+                    KindWithContent::TaskDeletion { query, tasks }
+                }
+                KindDump::DumpCreation { keys, instance_uid } => {
+                    KindWithContent::DumpCreation { keys, instance_uid }
+                }
+                KindDump::SnapshotCreation => KindWithContent::SnapshotCreation,
+            },
+        };
+
+        self.index_scheduler.all_tasks.put(&mut self.wtxn, &BEU32::new(task.uid), &task)?;
+
+        for index in task.indexes() {
+            match self.indexes.get_mut(index) {
+                Some(bitmap) => {
+                    bitmap.insert(task.uid);
+                }
+                None => {
+                    let mut bitmap = RoaringBitmap::new();
+                    bitmap.insert(task.uid);
+                    self.indexes.insert(index.to_string(), bitmap);
+                }
+            };
+        }
+        self.statuses.entry(task.status).or_insert(RoaringBitmap::new()).insert(task.uid);
+        self.kinds.entry(task.kind.as_kind()).or_insert(RoaringBitmap::new()).insert(task.uid);
+
+        Ok(task)
+    }
+
+    /// Commit all the changes and exit the importing dump state
+    pub fn finish(mut self) -> Result<()> {
+        for (index, bitmap) in self.indexes {
+            self.index_scheduler.index_tasks.put(&mut self.wtxn, &index, &bitmap)?;
+        }
+        for (status, bitmap) in self.statuses {
+            self.index_scheduler.put_status(&mut self.wtxn, status, &bitmap)?;
+        }
+        for (kind, bitmap) in self.kinds {
+            self.index_scheduler.put_kind(&mut self.wtxn, kind, &bitmap)?;
+        }
+
+        self.wtxn.commit()?;
+        self.index_scheduler.wake_up.signal();
+
+        Ok(())
+    }
+}
+
 /// The outcome of calling the [`IndexScheduler::tick`] function.
 pub enum TickOutcome {
    /// The scheduler should immediately attempt another `tick`.
--- a/meilisearch/src/lib.rs
+++ b/meilisearch/src/lib.rs
@@ -367,12 +367,14 @@ fn import_dump(
        log::info!("All documents successfully imported.");
    }

+    let mut index_scheduler_dump = index_scheduler.register_dumped_task()?;
+
    // 4. Import the tasks.
    for ret in dump_reader.tasks()? {
        let (task, file) = ret?;
-        index_scheduler.register_dumped_task(task, file)?;
+        index_scheduler_dump.register_dumped_task(task, file)?;
    }
-    Ok(())
+    Ok(index_scheduler_dump.finish()?)
 }

 pub fn configure_data(
--- a/meilisearch/tests/documents/add_documents.rs
+++ b/meilisearch/tests/documents/add_documents.rs
@@ -279,6 +279,81 @@ async fn add_csv_document() {
    "###);
 }

+#[actix_rt::test]
+async fn add_csv_document_with_types() {
+    let server = Server::new().await;
+    let index = server.index("pets");
+
+    let document = "#id:number,name:string,race:string,age:number,cute:boolean
+0,jean,bernese mountain,2.5,true
+1,,,,
+2,lilou,pug,-2,false";
+
+    let (response, code) = index.raw_update_documents(document, Some("text/csv"), "").await;
+    snapshot!(code, @"202 Accepted");
+    snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
+    {
+      "taskUid": 0,
+      "indexUid": "pets",
+      "status": "enqueued",
+      "type": "documentAdditionOrUpdate",
+      "enqueuedAt": "[date]"
+    }
+    "###);
+    let response = index.wait_task(response["taskUid"].as_u64().unwrap()).await;
+    snapshot!(json_string!(response, { ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]" }), @r###"
+    {
+      "uid": 0,
+      "indexUid": "pets",
+      "status": "succeeded",
+      "type": "documentAdditionOrUpdate",
+      "canceledBy": null,
+      "details": {
+        "receivedDocuments": 3,
+        "indexedDocuments": 3
+      },
+      "error": null,
+      "duration": "[duration]",
+      "enqueuedAt": "[date]",
+      "startedAt": "[date]",
+      "finishedAt": "[date]"
+    }
+    "###);
+
+    let (documents, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await;
+    snapshot!(code, @"200 OK");
+    snapshot!(json_string!(documents), @r###"
+    {
+      "results": [
+        {
+          "#id": 0,
+          "name": "jean",
+          "race": "bernese mountain",
+          "age": 2.5,
+          "cute": true
+        },
+        {
+          "#id": 1,
+          "name": null,
+          "race": null,
+          "age": null,
+          "cute": null
+        },
+        {
+          "#id": 2,
+          "name": "lilou",
+          "race": "pug",
+          "age": -2,
+          "cute": false
+        }
+      ],
+      "offset": 0,
+      "limit": 20,
+      "total": 3
+    }
+    "###);
+}
+
 #[actix_rt::test]
 async fn add_csv_document_with_custom_delimiter() {
    let server = Server::new().await;
@@ -343,6 +418,40 @@ async fn add_csv_document_with_custom_delimiter() {
    "###);
 }

+#[actix_rt::test]
+async fn add_csv_document_with_types_error() {
+    let server = Server::new().await;
+    let index = server.index("pets");
+
+    let document = "#id:number,a:boolean,b:number
+0,doggo,1";
+
+    let (response, code) = index.raw_update_documents(document, Some("text/csv"), "").await;
+    snapshot!(code, @"400 Bad Request");
+    snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
+    {
+      "message": "The `csv` payload provided is malformed: `Error parsing boolean \"doggo\" at line 1: provided string was not `true` or `false``.",
+      "code": "malformed_payload",
+      "type": "invalid_request",
+      "link": "https://docs.meilisearch.com/errors#malformed_payload"
+    }
+    "###);
+
+    let document = "#id:number,a:boolean,b:number
+0,true,doggo";
+
+    let (response, code) = index.raw_update_documents(document, Some("text/csv"), "").await;
+    snapshot!(code, @"400 Bad Request");
+    snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
+    {
+      "message": "The `csv` payload provided is malformed: `Error parsing number \"doggo\" at line 1: invalid float literal`.",
+      "code": "malformed_payload",
+      "type": "invalid_request",
+      "link": "https://docs.meilisearch.com/errors#malformed_payload"
+    }
+    "###);
+}
+
 /// any other content-type is must be refused
 #[actix_rt::test]
 async fn error_add_documents_test_bad_content_types() {
--- a/milli/src/documents/builder.rs
+++ b/milli/src/documents/builder.rs
@@ -114,14 +114,15 @@ impl<W: Write> DocumentsBatchBuilder<W> {
                self.value_buffer.clear();

                let value = &record[*i];
+                let trimmed_value = value.trim();
                match type_ {
                    AllowedType::Number => {
-                        if value.trim().is_empty() {
+                        if trimmed_value.is_empty() {
                            to_writer(&mut self.value_buffer, &Value::Null)?;
-                        } else if let Ok(integer) = value.trim().parse::<i64>() {
+                        } else if let Ok(integer) = trimmed_value.parse::<i64>() {
                            to_writer(&mut self.value_buffer, &integer)?;
                        } else {
-                            match value.trim().parse::<f64>() {
+                            match trimmed_value.parse::<f64>() {
                                Ok(float) => {
                                    to_writer(&mut self.value_buffer, &float)?;
                                }
@@ -135,6 +136,24 @@ impl<W: Write> DocumentsBatchBuilder<W> {
                            }
                        }
                    }
+                    AllowedType::Boolean => {
+                        if trimmed_value.is_empty() {
+                            to_writer(&mut self.value_buffer, &Value::Null)?;
+                        } else {
+                            match trimmed_value.parse::<bool>() {
+                                Ok(bool) => {
+                                    to_writer(&mut self.value_buffer, &bool)?;
+                                }
+                                Err(error) => {
+                                    return Err(Error::ParseBool {
+                                        error,
+                                        line,
+                                        value: value.to_string(),
+                                    });
+                                }
+                            }
+                        }
+                    }
                    AllowedType::String => {
                        if value.is_empty() {
                            to_writer(&mut self.value_buffer, &Value::Null)?;
@@ -173,6 +192,7 @@ impl<W: Write> DocumentsBatchBuilder<W> {
 #[derive(Debug)]
 enum AllowedType {
    String,
+    Boolean,
    Number,
 }

@@ -181,6 +201,7 @@ fn parse_csv_header(header: &str) -> (&str, AllowedType) {
    match header.rsplit_once(':') {
        Some((field_name, field_type)) => match field_type {
            "string" => (field_name, AllowedType::String),
+            "boolean" => (field_name, AllowedType::Boolean),
            "number" => (field_name, AllowedType::Number),
            // if the pattern isn't reconized, we keep the whole field.
            _otherwise => (header, AllowedType::String),
--- a/milli/src/documents/mod.rs
+++ b/milli/src/documents/mod.rs
@@ -3,7 +3,7 @@ mod enriched;
 mod reader;
 mod serde_impl;

-use std::fmt::{self, Debug};
+use std::fmt::Debug;
 use std::io;
 use std::str::Utf8Error;

@@ -87,71 +87,30 @@ impl DocumentsBatchIndex {
    }
 }

-#[derive(Debug)]
+#[derive(Debug, thiserror::Error)]
 pub enum Error {
+    #[error("Error parsing number {value:?} at line {line}: {error}")]
    ParseFloat { error: std::num::ParseFloatError, line: usize, value: String },
+    #[error("Error parsing boolean {value:?} at line {line}: {error}")]
+    ParseBool { error: std::str::ParseBoolError, line: usize, value: String },
+    #[error("Invalid document addition format, missing the documents batch index.")]
    InvalidDocumentFormat,
+    #[error("Invalid enriched data.")]
    InvalidEnrichedData,
-    InvalidUtf8(Utf8Error),
-    Csv(csv::Error),
-    Json(serde_json::Error),
+    #[error(transparent)]
+    InvalidUtf8(#[from] Utf8Error),
+    #[error(transparent)]
+    Csv(#[from] csv::Error),
+    #[error(transparent)]
+    Json(#[from] serde_json::Error),
+    #[error(transparent)]
    Serialize(serde_json::Error),
-    Grenad(grenad::Error),
-    Io(io::Error),
+    #[error(transparent)]
+    Grenad(#[from] grenad::Error),
+    #[error(transparent)]
+    Io(#[from] io::Error),
 }

-impl From<csv::Error> for Error {
-    fn from(e: csv::Error) -> Self {
-        Self::Csv(e)
-    }
-}
-
-impl From<io::Error> for Error {
-    fn from(other: io::Error) -> Self {
-        Self::Io(other)
-    }
-}
-
-impl From<serde_json::Error> for Error {
-    fn from(other: serde_json::Error) -> Self {
-        Self::Json(other)
-    }
-}
-
-impl From<grenad::Error> for Error {
-    fn from(other: grenad::Error) -> Self {
-        Self::Grenad(other)
-    }
-}
-
-impl From<Utf8Error> for Error {
-    fn from(other: Utf8Error) -> Self {
-        Self::InvalidUtf8(other)
-    }
-}
-
-impl fmt::Display for Error {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        match self {
-            Error::ParseFloat { error, line, value } => {
-                write!(f, "Error parsing number {:?} at line {}: {}", value, line, error)
-            }
-            Error::InvalidDocumentFormat => {
-                f.write_str("Invalid document addition format, missing the documents batch index.")
-            }
-            Error::InvalidEnrichedData => f.write_str("Invalid enriched data."),
-            Error::InvalidUtf8(e) => write!(f, "{}", e),
-            Error::Io(e) => write!(f, "{}", e),
-            Error::Serialize(e) => write!(f, "{}", e),
-            Error::Grenad(e) => write!(f, "{}", e),
-            Error::Csv(e) => write!(f, "{}", e),
-            Error::Json(e) => write!(f, "{}", e),
-        }
-    }
-}
-
-impl std::error::Error for Error {}
-
 #[cfg(test)]
 pub fn objects_from_json_value(json: serde_json::Value) -> Vec<crate::Object> {
    let documents = match json {
@@ -274,6 +233,19 @@ mod test {
        ]);
    }

+    #[test]
+    fn csv_types_dont_panic() {
+        let csv1_content =
+            "id:number,b:boolean,c,d:number\n1,,,\n2,true,doggo,2\n3,false,the best doggo,-2\n4,,\"Hello, World!\",2.5";
+        let csv1 = csv::Reader::from_reader(Cursor::new(csv1_content));
+
+        let mut builder = DocumentsBatchBuilder::new(Vec::new());
+        builder.append_csv(csv1).unwrap();
+        let vector = builder.into_inner().unwrap();
+
+        DocumentsBatchReader::from_reader(Cursor::new(vector)).unwrap();
+    }
+
    #[test]
    fn out_of_order_csv_fields() {
        let csv1_content = "id:number,b\n1,0";
--- a/milli/src/update/settings.rs
+++ b/milli/src/update/settings.rs
@@ -565,8 +565,12 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
                    self.index.put_primary_key(self.wtxn, primary_key)?;
                    Ok(())
                } else {
-                    let primary_key = self.index.primary_key(self.wtxn)?.unwrap();
-                    Err(UserError::PrimaryKeyCannotBeChanged(primary_key.to_string()).into())
+                    let curr_primary_key = self.index.primary_key(self.wtxn)?.unwrap().to_string();
+                    if primary_key == &curr_primary_key {
+                        Ok(())
+                    } else {
+                        Err(UserError::PrimaryKeyCannotBeChanged(curr_primary_key).into())
+                    }
                }
            }
            Setting::Reset => {
@@ -1332,6 +1336,17 @@ mod tests {
            .unwrap();
        wtxn.commit().unwrap();

+        // Updating settings with the same primary key should do nothing
+        let mut wtxn = index.write_txn().unwrap();
+        index
+            .update_settings_using_wtxn(&mut wtxn, |settings| {
+                settings.set_primary_key(S("mykey"));
+            })
+            .unwrap();
+        assert_eq!(index.primary_key(&wtxn).unwrap(), Some("mykey"));
+        wtxn.commit().unwrap();
+
+        // Updating the settings with a different (or no) primary key causes an error
        let mut wtxn = index.write_txn().unwrap();
        let error = index
            .update_settings_using_wtxn(&mut wtxn, |settings| {