Add a puffin profiling log in the sorter_into_reader function

Add more puffin logs to the deletion functions
Temporary use the charabia git repository to get the latest fixes
2025-12-04 11:45:44 +00:00 · 2023-09-20 16:31:14 +02:00 · 2023-09-20 11:13:29 +02:00 · 2023-09-19 10:15:17 +02:00 · 2023-09-18 18:12:16 +02:00 · 2023-09-18 18:11:12 +02:00
63 changed files with 953 additions and 1259 deletions
--- a/.github/workflows/publish-apt-brew-pkg.yml
+++ b/.github/workflows/publish-apt-brew-pkg.yml
@@ -53,5 +53,6 @@ jobs:
        uses: mislav/bump-homebrew-formula-action@v2
        with:
          formula-name: meilisearch
+          formula-path: Formula/m/meilisearch.rb
        env:
          COMMITTER_TOKEN: ${{ secrets.HOMEBREW_COMMITTER_TOKEN }}
--- a/.github/workflows/test-suite.yml
+++ b/.github/workflows/test-suite.yml
@@ -43,7 +43,7 @@ jobs:
          toolchain: nightly
          override: true
      - name: Cache dependencies
-        uses: Swatinem/rust-cache@v2.6.2
+        uses: Swatinem/rust-cache@v2.5.1
      - name: Run cargo check without any default features
        uses: actions-rs/cargo@v1
        with:
@@ -65,7 +65,7 @@ jobs:
    steps:
      - uses: actions/checkout@v3
      - name: Cache dependencies
-        uses: Swatinem/rust-cache@v2.6.2
+        uses: Swatinem/rust-cache@v2.5.1
      - name: Run cargo check without any default features
        uses: actions-rs/cargo@v1
        with:
@@ -146,7 +146,7 @@ jobs:
          toolchain: stable
          override: true
      - name: Cache dependencies
-        uses: Swatinem/rust-cache@v2.6.2
+        uses: Swatinem/rust-cache@v2.5.1
      - name: Run tests in debug
        uses: actions-rs/cargo@v1
        with:
@@ -165,7 +165,7 @@ jobs:
          override: true
          components: clippy
      - name: Cache dependencies
-        uses: Swatinem/rust-cache@v2.6.2
+        uses: Swatinem/rust-cache@v2.5.1
      - name: Run cargo clippy
        uses: actions-rs/cargo@v1
        with:
@@ -184,7 +184,7 @@ jobs:
          override: true
          components: rustfmt
      - name: Cache dependencies
-        uses: Swatinem/rust-cache@v2.6.2
+        uses: Swatinem/rust-cache@v2.5.1
      - name: Run cargo fmt
        # Since we never ran the `build.rs` script in the benchmark directory we are missing one auto-generated import file.
        # Since we want to trigger (and fail) this action as fast as possible, instead of building the benchmark crate
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -700,8 +700,7 @@ dependencies = [
 [[package]]
 name = "charabia"
 version = "0.8.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "098219a776307414866165a03a9cc68c1578764fe3616fe979e1c280790ddd73"
+source = "git+https://github.com/meilisearch/charabia?branch=main#5c3d09a7127dcf5e0e5d94d991c4d3d5ef4768cc"
 dependencies = [
 "aho-corasick",
 "cow-utils",
@@ -1444,6 +1443,7 @@ dependencies = [
 "insta",
 "nom",
 "nom_locate",
+ "unescaper",
 ]

 [[package]]
@@ -2176,9 +2176,9 @@ dependencies = [

 [[package]]
 name = "lindera-cc-cedict-builder"
-version = "0.27.0"
+version = "0.27.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2d2e8f2ca97ddf952fe340642511b9c14b373cb2eef711d526bb8ef2ca0969b8"
+checksum = "6f567a47e47b5420908424de2c6c5e424e3cafe588d0146bd128c0f3755758a3"
 dependencies = [
 "anyhow",
 "bincode",
@@ -2195,9 +2195,9 @@ dependencies = [

 [[package]]
 name = "lindera-compress"
-version = "0.27.0"
+version = "0.27.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f72b460559bcbe8a9cee85ea4a5056133ed3abf373031191589236e656d65b59"
+checksum = "49f3e553d55ebe9881fa5e5de588b0a153456e93564d17dfbef498912caf63a2"
 dependencies = [
 "anyhow",
 "flate2",
@@ -2206,9 +2206,9 @@ dependencies = [

 [[package]]
 name = "lindera-core"
-version = "0.27.0"
+version = "0.27.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f586eb8a9393c32d5525e0e9336a3727bd1329674740097126f3b0bff8a1a1ea"
+checksum = "a9a2440cc156a4a911a174ec68203543d1efb10df3a700a59b6bf581e453c726"
 dependencies = [
 "anyhow",
 "bincode",
@@ -2223,9 +2223,9 @@ dependencies = [

 [[package]]
 name = "lindera-decompress"
-version = "0.27.0"
+version = "0.27.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1fb1facd8da698072fcc7338bd757730db53d59f313f44dd583fa03681dcc0e1"
+checksum = "e077a410e61c962cb526f71b7effd62ffc607488a8f61869c937582d2ccb529b"
 dependencies = [
 "anyhow",
 "flate2",
@@ -2234,9 +2234,9 @@ dependencies = [

 [[package]]
 name = "lindera-dictionary"
-version = "0.27.0"
+version = "0.27.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ec7be7410b1da7017a8948986b87af67082f605e9a716f0989790d795d677f0c"
+checksum = "d9f57491adf7b311a3ee87f5e4a36454df16a2ec73de4ef28b2106fac80bd782"
 dependencies = [
 "anyhow",
 "bincode",
@@ -2254,9 +2254,9 @@ dependencies = [

 [[package]]
 name = "lindera-ipadic-builder"
-version = "0.27.0"
+version = "0.27.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "705d07f8a45d04fd95149f7ad41a26d1f9e56c9c00402be6f9dd05e3d88b99c6"
+checksum = "a3476ec7748aebd2eb23d496ddfce5e7e0a5c031cffcd214451043e02d029f11"
 dependencies = [
 "anyhow",
 "bincode",
@@ -2275,9 +2275,9 @@ dependencies = [

 [[package]]
 name = "lindera-ipadic-neologd-builder"
-version = "0.27.0"
+version = "0.27.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "633a93983ba13fba42328311a501091bd4a7aff0c94ae9eaa9d4733dd2b0468a"
+checksum = "7b1c7576a02d5e4af2bf62de51790a01bc4b8bc0d0b6a6b86a46b157f5cb306d"
 dependencies = [
 "anyhow",
 "bincode",
@@ -2296,9 +2296,9 @@ dependencies = [

 [[package]]
 name = "lindera-ko-dic"
-version = "0.27.0"
+version = "0.27.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a428e0d316b6c86f51bd919479692bc41ad840dba266ebc044663970f431ea18"
+checksum = "b713ecd5b827d7d448c3c5eb3c6d5899ecaf22cd17087599996349a02c76828d"
 dependencies = [
 "bincode",
 "byteorder",
@@ -2313,9 +2313,9 @@ dependencies = [

 [[package]]
 name = "lindera-ko-dic-builder"
-version = "0.27.0"
+version = "0.27.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2a5288704c6b8a069c0a1705c38758e836497698b50453373ab3d56c6f9a7ef8"
+checksum = "3e545752f6487be87b572529ad594cb3b48d2ef20821516f598b2d152d23277b"
 dependencies = [
 "anyhow",
 "bincode",
@@ -2333,9 +2333,9 @@ dependencies = [

 [[package]]
 name = "lindera-tokenizer"
-version = "0.27.0"
+version = "0.27.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "106ba439b2e87529d9bbedbb88d69f635baba1195c26502b308f55a85885fc81"
+checksum = "24a2d4606a5a4da62ac4a3680ee884a75da7f0c892dc967fc9cb983ceba39a8f"
 dependencies = [
 "bincode",
 "byteorder",
@@ -2348,9 +2348,9 @@ dependencies = [

 [[package]]
 name = "lindera-unidic"
-version = "0.27.0"
+version = "0.27.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3399b6dcfe1701333451d184ff3c677f433b320153427b146360c9e4bd8cb816"
+checksum = "388b1bdf81794b5d5b8057ce0321c58ff4b90d676b637948ccc7863ae2f43d28"
 dependencies = [
 "bincode",
 "byteorder",
@@ -2365,9 +2365,9 @@ dependencies = [

 [[package]]
 name = "lindera-unidic-builder"
-version = "0.27.0"
+version = "0.27.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b698227fdaeac32289173ab389b990d4eb00a40cbc9912020f69a0c491dabf55"
+checksum = "cdfa3e29a22c047da57fadd960ff674b720de15a1e2fb35b5ed67f3408afb469"
 dependencies = [
 "anyhow",
 "bincode",
@@ -4180,6 +4180,15 @@ version = "0.1.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ed646292ffc8188ef8ea4d1e0e0150fb15a5c2e12ad9b8fc191ae7a8a7f3c4b9"

+[[package]]
+name = "unescaper"
+version = "0.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a96a44ae11e25afb520af4534fd7b0bd8cd613e35a78def813b8cf41631fa3c8"
+dependencies = [
+ "thiserror",
+]
+
 [[package]]
 name = "unicase"
 version = "2.6.0"
--- a/dump/src/lib.rs
+++ b/dump/src/lib.rs
@@ -7,7 +7,7 @@ use meilisearch_types::milli::update::IndexDocumentsMethod;
 use meilisearch_types::settings::Unchecked;
 use meilisearch_types::tasks::{Details, IndexSwap, KindWithContent, Status, Task, TaskId};
 use meilisearch_types::InstanceUid;
-use roaring::RoaringTreemap;
+use roaring::RoaringBitmap;
 use serde::{Deserialize, Serialize};
 use time::OffsetDateTime;

@@ -121,11 +121,11 @@ pub enum KindDump {
    },
    TaskCancelation {
        query: String,
-        tasks: RoaringTreemap,
+        tasks: RoaringBitmap,
    },
    TasksDeletion {
        query: String,
-        tasks: RoaringTreemap,
+        tasks: RoaringBitmap,
    },
    DumpCreation {
        keys: Vec<Key>,
--- a/dump/src/reader/compat/v5_to_v6.rs
+++ b/dump/src/reader/compat/v5_to_v6.rs
@@ -69,7 +69,7 @@ impl CompatV5ToV6 {
                }

                let task = v6::Task {
-                    uid: task_view.uid as u64,
+                    uid: task_view.uid,
                    index_uid: task_view.index_uid,
                    status: match task_view.status {
                        v5::Status::Enqueued => v6::Status::Enqueued,
--- a/filter-parser/Cargo.toml
+++ b/filter-parser/Cargo.toml
@@ -14,6 +14,7 @@ license.workspace = true
 [dependencies]
 nom = "7.1.3"
 nom_locate = "4.1.0"
+unescaper = "0.1.2"

 [dev-dependencies]
 insta = "1.29.0"
--- a/filter-parser/src/error.rs
+++ b/filter-parser/src/error.rs
@@ -62,6 +62,7 @@ pub enum ErrorKind<'a> {
    MisusedGeoRadius,
    MisusedGeoBoundingBox,
    InvalidPrimary,
+    InvalidEscapedNumber,
    ExpectedEof,
    ExpectedValue(ExpectedValueKind),
    MalformedValue,
@@ -147,6 +148,9 @@ impl<'a> Display for Error<'a> {
                let text = if input.trim().is_empty() { "but instead got nothing.".to_string() } else { format!("at `{}`.", escaped_input) };
                writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` {}", text)?
            }
+            ErrorKind::InvalidEscapedNumber => {
+                writeln!(f, "Found an invalid escaped sequence number: `{}`.", escaped_input)?
+            }
            ErrorKind::ExpectedEof => {
                writeln!(f, "Found unexpected characters at the end of the filter: `{}`. You probably forgot an `OR` or an `AND` rule.", escaped_input)?
            }
--- a/filter-parser/src/lib.rs
+++ b/filter-parser/src/lib.rs
@@ -545,6 +545,8 @@ impl<'a> std::fmt::Display for Token<'a> {

 #[cfg(test)]
 pub mod tests {
+    use FilterCondition as Fc;
+
    use super::*;

    /// Create a raw [Token]. You must specify the string that appear BEFORE your element followed by your element
@@ -556,14 +558,22 @@ pub mod tests {
        unsafe { Span::new_from_raw_offset(offset, lines as u32, value, "") }.into()
    }

+    fn p(s: &str) -> impl std::fmt::Display + '_ {
+        Fc::parse(s).unwrap().unwrap()
+    }
+
+    #[test]
+    fn parse_escaped() {
+        insta::assert_display_snapshot!(p(r#"title = 'foo\\'"#), @r#"{title} = {foo\}"#);
+        insta::assert_display_snapshot!(p(r#"title = 'foo\\\\'"#), @r#"{title} = {foo\\}"#);
+        insta::assert_display_snapshot!(p(r#"title = 'foo\\\\\\'"#), @r#"{title} = {foo\\\}"#);
+        insta::assert_display_snapshot!(p(r#"title = 'foo\\\\\\\\'"#), @r#"{title} = {foo\\\\}"#);
+        // but it also works with other sequencies
+        insta::assert_display_snapshot!(p(r#"title = 'foo\x20\n\t\"\'"'"#), @"{title} = {foo \n\t\"\'\"}");
+    }
+
    #[test]
    fn parse() {
-        use FilterCondition as Fc;
-
-        fn p(s: &str) -> impl std::fmt::Display + '_ {
-            Fc::parse(s).unwrap().unwrap()
-        }
-
        // Test equal
        insta::assert_display_snapshot!(p("channel = Ponce"), @"{channel} = {Ponce}");
        insta::assert_display_snapshot!(p("subscribers = 12"), @"{subscribers} = {12}");
--- a/filter-parser/src/value.rs
+++ b/filter-parser/src/value.rs
@@ -171,7 +171,24 @@ pub fn parse_value(input: Span) -> IResult<Token> {
        })
    })?;

-    Ok((input, value))
+    match unescaper::unescape(value.value()) {
+        Ok(content) => {
+            if content.len() != value.value().len() {
+                Ok((input, Token::new(value.original_span(), Some(content))))
+            } else {
+                Ok((input, value))
+            }
+        }
+        Err(unescaper::Error::IncompleteStr(_)) => Err(nom::Err::Incomplete(nom::Needed::Unknown)),
+        Err(unescaper::Error::ParseIntError { .. }) => Err(nom::Err::Error(Error::new_from_kind(
+            value.original_span(),
+            ErrorKind::InvalidEscapedNumber,
+        ))),
+        Err(unescaper::Error::InvalidChar { .. }) => Err(nom::Err::Error(Error::new_from_kind(
+            value.original_span(),
+            ErrorKind::MalformedValue,
+        ))),
+    }
 }

 fn is_value_component(c: char) -> bool {
@@ -318,17 +335,17 @@ pub mod test {
            ("\"cha'nnel\"", "cha'nnel", false),
            ("I'm tamo", "I", false),
            // escaped thing but not quote
-            (r#""\\""#, r#"\\"#, false),
-            (r#""\\\\\\""#, r#"\\\\\\"#, false),
-            (r#""aa\\aa""#, r#"aa\\aa"#, false),
+            (r#""\\""#, r#"\"#, true),
+            (r#""\\\\\\""#, r#"\\\"#, true),
+            (r#""aa\\aa""#, r#"aa\aa"#, true),
            // with double quote
            (r#""Hello \"world\"""#, r#"Hello "world""#, true),
-            (r#""Hello \\\"world\\\"""#, r#"Hello \\"world\\""#, true),
+            (r#""Hello \\\"world\\\"""#, r#"Hello \"world\""#, true),
            (r#""I'm \"super\" tamo""#, r#"I'm "super" tamo"#, true),
            (r#""\"\"""#, r#""""#, true),
            // with simple quote
            (r#"'Hello \'world\''"#, r#"Hello 'world'"#, true),
-            (r#"'Hello \\\'world\\\''"#, r#"Hello \\'world\\'"#, true),
+            (r#"'Hello \\\'world\\\''"#, r#"Hello \'world\'"#, true),
            (r#"'I\'m "super" tamo'"#, r#"I'm "super" tamo"#, true),
            (r#"'\'\''"#, r#"''"#, true),
        ];
@@ -350,7 +367,14 @@ pub mod test {
                "Filter `{}` was not supposed to be escaped",
                input
            );
-            assert_eq!(token.value(), expected, "Filter `{}` failed.", input);
+            assert_eq!(
+                token.value(),
+                expected,
+                "Filter `{}` failed by giving `{}` instead of `{}`.",
+                input,
+                token.value(),
+                expected
+            );
        }
    }

--- a/index-scheduler/src/batch.rs
+++ b/index-scheduler/src/batch.rs
@@ -32,11 +32,11 @@ use meilisearch_types::milli::update::{
    DeleteDocuments, DocumentDeletionResult, IndexDocumentsConfig, IndexDocumentsMethod,
    Settings as MilliSettings,
 };
-use meilisearch_types::milli::{self, Filter, BEU64};
+use meilisearch_types::milli::{self, Filter, BEU32};
 use meilisearch_types::settings::{apply_settings_to_builder, Settings, Unchecked};
 use meilisearch_types::tasks::{Details, IndexSwap, Kind, KindWithContent, Status, Task};
 use meilisearch_types::{compression, Index, VERSION_FILE_NAME};
-use roaring::RoaringTreemap;
+use roaring::RoaringBitmap;
 use time::macros::format_description;
 use time::OffsetDateTime;
 use uuid::Uuid;
@@ -58,7 +58,7 @@ pub(crate) enum Batch {
        /// The date and time at which the previously processing tasks started.
        previous_started_at: OffsetDateTime,
        /// The list of tasks that were processing when this task cancelation appeared.
-        previous_processing_tasks: RoaringTreemap,
+        previous_processing_tasks: RoaringBitmap,
    },
    TaskDeletion(Task),
    SnapshotCreation(Vec<Task>),
@@ -67,10 +67,6 @@ pub(crate) enum Batch {
        op: IndexOperation,
        must_create_index: bool,
    },
-    IndexDocumentDeletionByFilter {
-        index_uid: String,
-        task: Task,
-    },
    IndexCreation {
        index_uid: String,
        primary_key: Option<String>,
@@ -114,6 +110,10 @@ pub(crate) enum IndexOperation {
        documents: Vec<Vec<String>>,
        tasks: Vec<Task>,
    },
+    IndexDocumentDeletionByFilter {
+        index_uid: String,
+        task: Task,
+    },
    DocumentClear {
        index_uid: String,
        tasks: Vec<Task>,
@@ -155,7 +155,6 @@ impl Batch {
            | Batch::TaskDeletion(task)
            | Batch::Dump(task)
            | Batch::IndexCreation { task, .. }
-            | Batch::IndexDocumentDeletionByFilter { task, .. }
            | Batch::IndexUpdate { task, .. } => vec![task.uid],
            Batch::SnapshotCreation(tasks) | Batch::IndexDeletion { tasks, .. } => {
                tasks.iter().map(|task| task.uid).collect()
@@ -167,6 +166,7 @@ impl Batch {
                | IndexOperation::DocumentClear { tasks, .. } => {
                    tasks.iter().map(|task| task.uid).collect()
                }
+                IndexOperation::IndexDocumentDeletionByFilter { task, .. } => vec![task.uid],
                IndexOperation::SettingsAndDocumentOperation {
                    document_import_tasks: tasks,
                    settings_tasks: other,
@@ -194,8 +194,7 @@ impl Batch {
            IndexOperation { op, .. } => Some(op.index_uid()),
            IndexCreation { index_uid, .. }
            | IndexUpdate { index_uid, .. }
-            | IndexDeletion { index_uid, .. }
-            | IndexDocumentDeletionByFilter { index_uid, .. } => Some(index_uid),
+            | IndexDeletion { index_uid, .. } => Some(index_uid),
        }
    }
 }
@@ -205,6 +204,7 @@ impl IndexOperation {
        match self {
            IndexOperation::DocumentOperation { index_uid, .. }
            | IndexOperation::DocumentDeletion { index_uid, .. }
+            | IndexOperation::IndexDocumentDeletionByFilter { index_uid, .. }
            | IndexOperation::DocumentClear { index_uid, .. }
            | IndexOperation::Settings { index_uid, .. }
            | IndexOperation::DocumentClearAndSetting { index_uid, .. }
@@ -239,9 +239,12 @@ impl IndexScheduler {
                let task = self.get_task(rtxn, id)?.ok_or(Error::CorruptedTaskQueue)?;
                match &task.kind {
                    KindWithContent::DocumentDeletionByFilter { index_uid, .. } => {
-                        Ok(Some(Batch::IndexDocumentDeletionByFilter {
-                            index_uid: index_uid.clone(),
-                            task,
+                        Ok(Some(Batch::IndexOperation {
+                            op: IndexOperation::IndexDocumentDeletionByFilter {
+                                index_uid: index_uid.clone(),
+                                task,
+                            },
+                            must_create_index: false,
                        }))
                    }
                    _ => unreachable!(),
@@ -536,7 +539,9 @@ impl IndexScheduler {
        let index_tasks = self.index_tasks(rtxn, index_name)? & enqueued;

        // If autobatching is disabled we only take one task at a time.
-        let tasks_limit = if self.autobatching_enabled { usize::MAX } else { 1 };
+        // Otherwise, we take only a maximum of tasks to create batches.
+        let tasks_limit =
+            if self.autobatching_enabled { self.maximum_number_of_batched_tasks } else { 1 };

        let enqueued = index_tasks
            .into_iter()
@@ -896,51 +901,6 @@ impl IndexScheduler {

                Ok(tasks)
            }
-            Batch::IndexDocumentDeletionByFilter { mut task, index_uid: _ } => {
-                let (index_uid, filter) =
-                    if let KindWithContent::DocumentDeletionByFilter { index_uid, filter_expr } =
-                        &task.kind
-                    {
-                        (index_uid, filter_expr)
-                    } else {
-                        unreachable!()
-                    };
-                let index = {
-                    let rtxn = self.env.read_txn()?;
-                    self.index_mapper.index(&rtxn, index_uid)?
-                };
-                let deleted_documents = delete_document_by_filter(filter, index);
-                let original_filter = if let Some(Details::DocumentDeletionByFilter {
-                    original_filter,
-                    deleted_documents: _,
-                }) = task.details
-                {
-                    original_filter
-                } else {
-                    // In the case of a `documentDeleteByFilter` the details MUST be set
-                    unreachable!();
-                };
-
-                match deleted_documents {
-                    Ok(deleted_documents) => {
-                        task.status = Status::Succeeded;
-                        task.details = Some(Details::DocumentDeletionByFilter {
-                            original_filter,
-                            deleted_documents: Some(deleted_documents),
-                        });
-                    }
-                    Err(e) => {
-                        task.status = Status::Failed;
-                        task.details = Some(Details::DocumentDeletionByFilter {
-                            original_filter,
-                            deleted_documents: Some(0),
-                        });
-                        task.error = Some(e.into());
-                    }
-                }
-
-                Ok(vec![task])
-            }
            Batch::IndexCreation { index_uid, primary_key, task } => {
                let wtxn = self.env.write_txn()?;
                if self.index_mapper.exists(&wtxn, &index_uid)? {
@@ -1065,13 +1025,7 @@ impl IndexScheduler {
    }

    /// Swap the index `lhs` with the index `rhs`.
-    fn apply_index_swap(
-        &self,
-        wtxn: &mut RwTxn,
-        task_id: TaskId,
-        lhs: &str,
-        rhs: &str,
-    ) -> Result<()> {
+    fn apply_index_swap(&self, wtxn: &mut RwTxn, task_id: u32, lhs: &str, rhs: &str) -> Result<()> {
        // 1. Verify that both lhs and rhs are existing indexes
        let index_lhs_exists = self.index_mapper.index_exists(wtxn, lhs)?;
        if !index_lhs_exists {
@@ -1092,7 +1046,7 @@ impl IndexScheduler {
        for task_id in &index_lhs_task_ids | &index_rhs_task_ids {
            let mut task = self.get_task(wtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;
            swap_index_uid_in_task(&mut task, (lhs, rhs));
-            self.all_tasks.put(wtxn, &BEU64::new(task_id), &task)?;
+            self.all_tasks.put(wtxn, &BEU32::new(task_id), &task)?;
        }

        // 4. remove the task from indexuid = before_name
@@ -1305,6 +1259,47 @@ impl IndexScheduler {

                Ok(tasks)
            }
+            IndexOperation::IndexDocumentDeletionByFilter { mut task, index_uid: _ } => {
+                let filter =
+                    if let KindWithContent::DocumentDeletionByFilter { filter_expr, .. } =
+                        &task.kind
+                    {
+                        filter_expr
+                    } else {
+                        unreachable!()
+                    };
+                let deleted_documents = delete_document_by_filter(index_wtxn, filter, index);
+                let original_filter = if let Some(Details::DocumentDeletionByFilter {
+                    original_filter,
+                    deleted_documents: _,
+                }) = task.details
+                {
+                    original_filter
+                } else {
+                    // In the case of a `documentDeleteByFilter` the details MUST be set
+                    unreachable!();
+                };
+
+                match deleted_documents {
+                    Ok(deleted_documents) => {
+                        task.status = Status::Succeeded;
+                        task.details = Some(Details::DocumentDeletionByFilter {
+                            original_filter,
+                            deleted_documents: Some(deleted_documents),
+                        });
+                    }
+                    Err(e) => {
+                        task.status = Status::Failed;
+                        task.details = Some(Details::DocumentDeletionByFilter {
+                            original_filter,
+                            deleted_documents: Some(0),
+                        });
+                        task.error = Some(e.into());
+                    }
+                }
+
+                Ok(vec![task])
+            }
            IndexOperation::Settings { index_uid: _, settings, mut tasks } => {
                let indexer_config = self.index_mapper.indexer_config();
                let mut builder = milli::update::Settings::new(index_wtxn, index, indexer_config);
@@ -1395,11 +1390,7 @@ impl IndexScheduler {
    /// Delete each given task from all the databases (if it is deleteable).
    ///
    /// Return the number of tasks that were actually deleted.
-    fn delete_matched_tasks(
-        &self,
-        wtxn: &mut RwTxn,
-        matched_tasks: &RoaringTreemap,
-    ) -> Result<u64> {
+    fn delete_matched_tasks(&self, wtxn: &mut RwTxn, matched_tasks: &RoaringBitmap) -> Result<u64> {
        // 1. Remove from this list the tasks that we are not allowed to delete
        let enqueued_tasks = self.get_status(wtxn, Status::Enqueued)?;
        let processing_tasks = &self.processing_tasks.read().unwrap().processing.clone();
@@ -1414,7 +1405,7 @@ impl IndexScheduler {
        let mut affected_indexes = HashSet::new();
        let mut affected_statuses = HashSet::new();
        let mut affected_kinds = HashSet::new();
-        let mut affected_canceled_by = RoaringTreemap::new();
+        let mut affected_canceled_by = RoaringBitmap::new();

        for task_id in to_delete_tasks.iter() {
            let task = self.get_task(wtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;
@@ -1451,10 +1442,10 @@ impl IndexScheduler {
        }

        for task in to_delete_tasks.iter() {
-            self.all_tasks.delete(wtxn, &BEU64::new(task))?;
+            self.all_tasks.delete(wtxn, &BEU32::new(task))?;
        }
        for canceled_by in affected_canceled_by {
-            let canceled_by = BEU64::new(canceled_by);
+            let canceled_by = BEU32::new(canceled_by);
            if let Some(mut tasks) = self.canceled_by.get(wtxn, &canceled_by)? {
                tasks -= &to_delete_tasks;
                if tasks.is_empty() {
@@ -1475,9 +1466,9 @@ impl IndexScheduler {
        &self,
        wtxn: &mut RwTxn,
        cancel_task_id: TaskId,
-        matched_tasks: &RoaringTreemap,
+        matched_tasks: &RoaringBitmap,
        previous_started_at: OffsetDateTime,
-        previous_processing_tasks: &RoaringTreemap,
+        previous_processing_tasks: &RoaringBitmap,
    ) -> Result<Vec<Uuid>> {
        let now = OffsetDateTime::now_utc();

@@ -1502,29 +1493,28 @@ impl IndexScheduler {
            task.details = task.details.map(|d| d.to_failed());
            self.update_task(wtxn, &task)?;
        }
-        self.canceled_by.put(wtxn, &BEU64::new(cancel_task_id), &tasks_to_cancel)?;
+        self.canceled_by.put(wtxn, &BEU32::new(cancel_task_id), &tasks_to_cancel)?;

        Ok(content_files_to_delete)
    }
 }

-fn delete_document_by_filter(filter: &serde_json::Value, index: Index) -> Result<u64> {
+fn delete_document_by_filter<'a>(
+    wtxn: &mut RwTxn<'a, '_>,
+    filter: &serde_json::Value,
+    index: &'a Index,
+) -> Result<u64> {
    let filter = Filter::from_json(filter)?;
    Ok(if let Some(filter) = filter {
-        let mut wtxn = index.write_txn()?;
-
-        let candidates = filter.evaluate(&wtxn, &index).map_err(|err| match err {
+        let candidates = filter.evaluate(wtxn, index).map_err(|err| match err {
            milli::Error::UserError(milli::UserError::InvalidFilter(_)) => {
                Error::from(err).with_custom_error_code(Code::InvalidDocumentFilter)
            }
            e => e.into(),
        })?;
-        let mut delete_operation = DeleteDocuments::new(&mut wtxn, &index)?;
+        let mut delete_operation = DeleteDocuments::new(wtxn, index)?;
        delete_operation.delete_documents(&candidates);
-        let deleted_documents =
-            delete_operation.execute().map(|result| result.deleted_documents)?;
-        wtxn.commit()?;
-        deleted_documents
+        delete_operation.execute().map(|result| result.deleted_documents)?
    } else {
        0
    })
--- a/index-scheduler/src/error.rs
+++ b/index-scheduler/src/error.rs
@@ -48,8 +48,6 @@ impl From<DateField> for Code {
 pub enum Error {
    #[error("{1}")]
    WithCustomErrorCode(Code, Box<Self>),
-    #[error("Received bad task id: {received} should be >= to {expected}.")]
-    BadTaskId { received: TaskId, expected: TaskId },
    #[error("Index `{0}` not found.")]
    IndexNotFound(String),
    #[error("Index `{0}` already exists.")]
@@ -161,7 +159,6 @@ impl Error {
        match self {
            Error::IndexNotFound(_)
            | Error::WithCustomErrorCode(_, _)
-            | Error::BadTaskId { .. }
            | Error::IndexAlreadyExists(_)
            | Error::SwapDuplicateIndexFound(_)
            | Error::SwapDuplicateIndexesFound(_)
@@ -205,7 +202,6 @@ impl ErrorCode for Error {
    fn error_code(&self) -> Code {
        match self {
            Error::WithCustomErrorCode(code, _) => *code,
-            Error::BadTaskId { .. } => Code::BadRequest,
            Error::IndexNotFound(_) => Code::IndexNotFound,
            Error::IndexAlreadyExists(_) => Code::IndexAlreadyExists,
            Error::SwapDuplicateIndexesFound(_) => Code::InvalidSwapDuplicateIndexFound,
--- a/index-scheduler/src/insta_snapshot.rs
+++ b/index-scheduler/src/insta_snapshot.rs
@@ -3,10 +3,9 @@ use std::fmt::Write;

 use meilisearch_types::heed::types::{OwnedType, SerdeBincode, SerdeJson, Str};
 use meilisearch_types::heed::{Database, RoTxn};
-use meilisearch_types::milli::heed_codec::{CboRoaringTreemapCodec, RoaringTreemapCodec};
-use meilisearch_types::milli::BEU64;
+use meilisearch_types::milli::{CboRoaringBitmapCodec, RoaringBitmapCodec, BEU32};
 use meilisearch_types::tasks::{Details, Task};
-use roaring::RoaringTreemap;
+use roaring::RoaringBitmap;

 use crate::index_mapper::IndexMapper;
 use crate::{IndexScheduler, Kind, Status, BEI128};
@@ -16,6 +15,7 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {

    let IndexScheduler {
        autobatching_enabled,
+        maximum_number_of_batched_tasks: _,
        must_stop_processing: _,
        processing_tasks,
        file_store,
@@ -48,7 +48,7 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
    let processing_tasks = processing_tasks.read().unwrap().processing.clone();
    snap.push_str(&format!("### Autobatching Enabled = {autobatching_enabled}\n"));
    snap.push_str("### Processing Tasks:\n");
-    snap.push_str(&snapshot_treemap(&processing_tasks));
+    snap.push_str(&snapshot_bitmap(&processing_tasks));
    snap.push_str("\n----------------------------------------------------------------------\n");

    snap.push_str("### All Tasks:\n");
@@ -104,7 +104,7 @@ pub fn snapshot_file_store(file_store: &file_store::FileStore) -> String {
    snap
 }

-pub fn snapshot_treemap(r: &RoaringTreemap) -> String {
+pub fn snapshot_bitmap(r: &RoaringBitmap) -> String {
    let mut snap = String::new();
    snap.push('[');
    for x in r {
@@ -114,7 +114,7 @@ pub fn snapshot_treemap(r: &RoaringTreemap) -> String {
    snap
 }

-pub fn snapshot_all_tasks(rtxn: &RoTxn, db: Database<OwnedType<BEU64>, SerdeJson<Task>>) -> String {
+pub fn snapshot_all_tasks(rtxn: &RoTxn, db: Database<OwnedType<BEU32>, SerdeJson<Task>>) -> String {
    let mut snap = String::new();
    let iter = db.iter(rtxn).unwrap();
    for next in iter {
@@ -126,13 +126,13 @@ pub fn snapshot_all_tasks(rtxn: &RoTxn, db: Database<OwnedType<BEU64>, SerdeJson

 pub fn snapshot_date_db(
    rtxn: &RoTxn,
-    db: Database<OwnedType<BEI128>, CboRoaringTreemapCodec>,
+    db: Database<OwnedType<BEI128>, CboRoaringBitmapCodec>,
 ) -> String {
    let mut snap = String::new();
    let iter = db.iter(rtxn).unwrap();
    for next in iter {
        let (_timestamp, task_ids) = next.unwrap();
-        snap.push_str(&format!("[timestamp] {}\n", snapshot_treemap(&task_ids)));
+        snap.push_str(&format!("[timestamp] {}\n", snapshot_bitmap(&task_ids)));
    }
    snap
 }
@@ -217,48 +217,45 @@ fn snapshot_details(d: &Details) -> String {

 pub fn snapshot_status(
    rtxn: &RoTxn,
-    db: Database<SerdeBincode<Status>, RoaringTreemapCodec>,
+    db: Database<SerdeBincode<Status>, RoaringBitmapCodec>,
 ) -> String {
    let mut snap = String::new();
    let iter = db.iter(rtxn).unwrap();
    for next in iter {
        let (status, task_ids) = next.unwrap();
-        writeln!(snap, "{status} {}", snapshot_treemap(&task_ids)).unwrap();
+        writeln!(snap, "{status} {}", snapshot_bitmap(&task_ids)).unwrap();
    }
    snap
 }
-pub fn snapshot_kind(
-    rtxn: &RoTxn,
-    db: Database<SerdeBincode<Kind>, RoaringTreemapCodec>,
-) -> String {
+pub fn snapshot_kind(rtxn: &RoTxn, db: Database<SerdeBincode<Kind>, RoaringBitmapCodec>) -> String {
    let mut snap = String::new();
    let iter = db.iter(rtxn).unwrap();
    for next in iter {
        let (kind, task_ids) = next.unwrap();
        let kind = serde_json::to_string(&kind).unwrap();
-        writeln!(snap, "{kind} {}", snapshot_treemap(&task_ids)).unwrap();
+        writeln!(snap, "{kind} {}", snapshot_bitmap(&task_ids)).unwrap();
    }
    snap
 }

-pub fn snapshot_index_tasks(rtxn: &RoTxn, db: Database<Str, RoaringTreemapCodec>) -> String {
+pub fn snapshot_index_tasks(rtxn: &RoTxn, db: Database<Str, RoaringBitmapCodec>) -> String {
    let mut snap = String::new();
    let iter = db.iter(rtxn).unwrap();
    for next in iter {
        let (index, task_ids) = next.unwrap();
-        writeln!(snap, "{index} {}", snapshot_treemap(&task_ids)).unwrap();
+        writeln!(snap, "{index} {}", snapshot_bitmap(&task_ids)).unwrap();
    }
    snap
 }
 pub fn snapshot_canceled_by(
    rtxn: &RoTxn,
-    db: Database<OwnedType<BEU64>, RoaringTreemapCodec>,
+    db: Database<OwnedType<BEU32>, RoaringBitmapCodec>,
 ) -> String {
    let mut snap = String::new();
    let iter = db.iter(rtxn).unwrap();
    for next in iter {
        let (kind, task_ids) = next.unwrap();
-        writeln!(snap, "{kind} {}", snapshot_treemap(&task_ids)).unwrap();
+        writeln!(snap, "{kind} {}", snapshot_bitmap(&task_ids)).unwrap();
    }
    snap
 }
--- a/index-scheduler/src/lib.rs
+++ b/index-scheduler/src/lib.rs
--- a/index-scheduler/src/snapshots/lib.rs/cancel_enqueued_task/cancel_processed.snap
+++ b/index-scheduler/src/snapshots/lib.rs/cancel_enqueued_task/cancel_processed.snap
@@ -7,7 +7,7 @@ source: index-scheduler/src/lib.rs
 ----------------------------------------------------------------------
 ### All Tasks:
 0 {uid: 0, status: canceled, canceled_by: 1, details: { received_documents: 1, indexed_documents: Some(0) }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
-1 {uid: 1, status: succeeded, details: { matched_tasks: 1, canceled_tasks: Some(1), original_filter: "test_query" }, kind: TaskCancelation { query: "test_query", tasks: RoaringTreemap<[0]> }}
+1 {uid: 1, status: succeeded, details: { matched_tasks: 1, canceled_tasks: Some(1), original_filter: "test_query" }, kind: TaskCancelation { query: "test_query", tasks: RoaringBitmap<[0]> }}
 ----------------------------------------------------------------------
 ### Status:
 enqueued []
--- a/index-scheduler/src/snapshots/lib.rs/cancel_enqueued_task/initial_tasks_enqueued.snap
+++ b/index-scheduler/src/snapshots/lib.rs/cancel_enqueued_task/initial_tasks_enqueued.snap
@@ -7,7 +7,7 @@ source: index-scheduler/src/lib.rs
 ----------------------------------------------------------------------
 ### All Tasks:
 0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
-1 {uid: 1, status: enqueued, details: { matched_tasks: 1, canceled_tasks: None, original_filter: "test_query" }, kind: TaskCancelation { query: "test_query", tasks: RoaringTreemap<[0]> }}
+1 {uid: 1, status: enqueued, details: { matched_tasks: 1, canceled_tasks: None, original_filter: "test_query" }, kind: TaskCancelation { query: "test_query", tasks: RoaringBitmap<[0]> }}
 ----------------------------------------------------------------------
 ### Status:
 enqueued [0,1,]
--- a/index-scheduler/src/snapshots/lib.rs/cancel_mix_of_tasks/aborted_indexation.snap
+++ b/index-scheduler/src/snapshots/lib.rs/cancel_mix_of_tasks/aborted_indexation.snap
@@ -9,7 +9,7 @@ source: index-scheduler/src/lib.rs
 0 {uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
 1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "beavero", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
 2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "wolfo", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000002, documents_count: 1, allow_index_creation: true }}
-3 {uid: 3, status: enqueued, details: { matched_tasks: 3, canceled_tasks: None, original_filter: "test_query" }, kind: TaskCancelation { query: "test_query", tasks: RoaringTreemap<[0, 1, 2]> }}
+3 {uid: 3, status: enqueued, details: { matched_tasks: 3, canceled_tasks: None, original_filter: "test_query" }, kind: TaskCancelation { query: "test_query", tasks: RoaringBitmap<[0, 1, 2]> }}
 ----------------------------------------------------------------------
 ### Status:
 enqueued [1,2,3,]
--- a/index-scheduler/src/snapshots/lib.rs/cancel_mix_of_tasks/cancel_processed.snap
+++ b/index-scheduler/src/snapshots/lib.rs/cancel_mix_of_tasks/cancel_processed.snap
@@ -9,7 +9,7 @@ source: index-scheduler/src/lib.rs
 0 {uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
 1 {uid: 1, status: canceled, canceled_by: 3, details: { received_documents: 1, indexed_documents: Some(0) }, kind: DocumentAdditionOrUpdate { index_uid: "beavero", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
 2 {uid: 2, status: canceled, canceled_by: 3, details: { received_documents: 1, indexed_documents: Some(0) }, kind: DocumentAdditionOrUpdate { index_uid: "wolfo", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000002, documents_count: 1, allow_index_creation: true }}
-3 {uid: 3, status: succeeded, details: { matched_tasks: 3, canceled_tasks: Some(2), original_filter: "test_query" }, kind: TaskCancelation { query: "test_query", tasks: RoaringTreemap<[0, 1, 2]> }}
+3 {uid: 3, status: succeeded, details: { matched_tasks: 3, canceled_tasks: Some(2), original_filter: "test_query" }, kind: TaskCancelation { query: "test_query", tasks: RoaringBitmap<[0, 1, 2]> }}
 ----------------------------------------------------------------------
 ### Status:
 enqueued []
--- a/index-scheduler/src/snapshots/lib.rs/cancel_mix_of_tasks/processing_second_task_cancel_enqueued.snap
+++ b/index-scheduler/src/snapshots/lib.rs/cancel_mix_of_tasks/processing_second_task_cancel_enqueued.snap
@@ -9,7 +9,7 @@ source: index-scheduler/src/lib.rs
 0 {uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
 1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "beavero", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
 2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "wolfo", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000002, documents_count: 1, allow_index_creation: true }}
-3 {uid: 3, status: enqueued, details: { matched_tasks: 3, canceled_tasks: None, original_filter: "test_query" }, kind: TaskCancelation { query: "test_query", tasks: RoaringTreemap<[0, 1, 2]> }}
+3 {uid: 3, status: enqueued, details: { matched_tasks: 3, canceled_tasks: None, original_filter: "test_query" }, kind: TaskCancelation { query: "test_query", tasks: RoaringBitmap<[0, 1, 2]> }}
 ----------------------------------------------------------------------
 ### Status:
 enqueued [1,2,3,]
--- a/index-scheduler/src/snapshots/lib.rs/cancel_processing_task/aborted_indexation.snap
+++ b/index-scheduler/src/snapshots/lib.rs/cancel_processing_task/aborted_indexation.snap
@@ -7,7 +7,7 @@ source: index-scheduler/src/lib.rs
 ----------------------------------------------------------------------
 ### All Tasks:
 0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
-1 {uid: 1, status: enqueued, details: { matched_tasks: 1, canceled_tasks: None, original_filter: "test_query" }, kind: TaskCancelation { query: "test_query", tasks: RoaringTreemap<[0]> }}
+1 {uid: 1, status: enqueued, details: { matched_tasks: 1, canceled_tasks: None, original_filter: "test_query" }, kind: TaskCancelation { query: "test_query", tasks: RoaringBitmap<[0]> }}
 ----------------------------------------------------------------------
 ### Status:
 enqueued [0,1,]
--- a/index-scheduler/src/snapshots/lib.rs/cancel_processing_task/cancel_processed.snap
+++ b/index-scheduler/src/snapshots/lib.rs/cancel_processing_task/cancel_processed.snap
@@ -7,7 +7,7 @@ source: index-scheduler/src/lib.rs
 ----------------------------------------------------------------------
 ### All Tasks:
 0 {uid: 0, status: canceled, canceled_by: 1, details: { received_documents: 1, indexed_documents: Some(0) }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
-1 {uid: 1, status: succeeded, details: { matched_tasks: 1, canceled_tasks: Some(1), original_filter: "test_query" }, kind: TaskCancelation { query: "test_query", tasks: RoaringTreemap<[0]> }}
+1 {uid: 1, status: succeeded, details: { matched_tasks: 1, canceled_tasks: Some(1), original_filter: "test_query" }, kind: TaskCancelation { query: "test_query", tasks: RoaringBitmap<[0]> }}
 ----------------------------------------------------------------------
 ### Status:
 enqueued []
--- a/index-scheduler/src/snapshots/lib.rs/cancel_processing_task/cancel_task_registered.snap
+++ b/index-scheduler/src/snapshots/lib.rs/cancel_processing_task/cancel_task_registered.snap
@@ -7,7 +7,7 @@ source: index-scheduler/src/lib.rs
 ----------------------------------------------------------------------
 ### All Tasks:
 0 {uid: 0, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
-1 {uid: 1, status: enqueued, details: { matched_tasks: 1, canceled_tasks: None, original_filter: "test_query" }, kind: TaskCancelation { query: "test_query", tasks: RoaringTreemap<[0]> }}
+1 {uid: 1, status: enqueued, details: { matched_tasks: 1, canceled_tasks: None, original_filter: "test_query" }, kind: TaskCancelation { query: "test_query", tasks: RoaringBitmap<[0]> }}
 ----------------------------------------------------------------------
 ### Status:
 enqueued [0,1,]
--- a/index-scheduler/src/snapshots/lib.rs/cancel_succeeded_task/cancel_processed.snap
+++ b/index-scheduler/src/snapshots/lib.rs/cancel_succeeded_task/cancel_processed.snap
@@ -7,7 +7,7 @@ source: index-scheduler/src/lib.rs
 ----------------------------------------------------------------------
 ### All Tasks:
 0 {uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
-1 {uid: 1, status: succeeded, details: { matched_tasks: 1, canceled_tasks: Some(0), original_filter: "test_query" }, kind: TaskCancelation { query: "test_query", tasks: RoaringTreemap<[0]> }}
+1 {uid: 1, status: succeeded, details: { matched_tasks: 1, canceled_tasks: Some(0), original_filter: "test_query" }, kind: TaskCancelation { query: "test_query", tasks: RoaringBitmap<[0]> }}
 ----------------------------------------------------------------------
 ### Status:
 enqueued []
--- a/index-scheduler/src/snapshots/lib.rs/query_tasks_canceled_by/start.snap
+++ b/index-scheduler/src/snapshots/lib.rs/query_tasks_canceled_by/start.snap
@@ -9,7 +9,7 @@ source: index-scheduler/src/lib.rs
 0 {uid: 0, status: succeeded, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
 1 {uid: 1, status: canceled, canceled_by: 3, details: { primary_key: Some("sheep") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("sheep") }}
 2 {uid: 2, status: canceled, canceled_by: 3, details: { swaps: [IndexSwap { indexes: ("catto", "doggo") }] }, kind: IndexSwap { swaps: [IndexSwap { indexes: ("catto", "doggo") }] }}
-3 {uid: 3, status: succeeded, details: { matched_tasks: 3, canceled_tasks: Some(0), original_filter: "test_query" }, kind: TaskCancelation { query: "test_query", tasks: RoaringTreemap<[0, 1, 2]> }}
+3 {uid: 3, status: succeeded, details: { matched_tasks: 3, canceled_tasks: Some(0), original_filter: "test_query" }, kind: TaskCancelation { query: "test_query", tasks: RoaringBitmap<[0, 1, 2]> }}
 ----------------------------------------------------------------------
 ### Status:
 enqueued []
--- a/index-scheduler/src/snapshots/lib.rs/task_deletion_delete_same_task_twice/task_deletion_processed.snap
+++ b/index-scheduler/src/snapshots/lib.rs/task_deletion_delete_same_task_twice/task_deletion_processed.snap
@@ -7,8 +7,8 @@ source: index-scheduler/src/lib.rs
 ----------------------------------------------------------------------
 ### All Tasks:
 1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggo", primary_key: Some("bone"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
-2 {uid: 2, status: succeeded, details: { matched_tasks: 1, deleted_tasks: Some(1), original_filter: "test_query" }, kind: TaskDeletion { query: "test_query", tasks: RoaringTreemap<[0]> }}
-3 {uid: 3, status: succeeded, details: { matched_tasks: 1, deleted_tasks: Some(0), original_filter: "test_query" }, kind: TaskDeletion { query: "test_query", tasks: RoaringTreemap<[0]> }}
+2 {uid: 2, status: succeeded, details: { matched_tasks: 1, deleted_tasks: Some(1), original_filter: "test_query" }, kind: TaskDeletion { query: "test_query", tasks: RoaringBitmap<[0]> }}
+3 {uid: 3, status: succeeded, details: { matched_tasks: 1, deleted_tasks: Some(0), original_filter: "test_query" }, kind: TaskDeletion { query: "test_query", tasks: RoaringBitmap<[0]> }}
 ----------------------------------------------------------------------
 ### Status:
 enqueued [1,]
--- a/index-scheduler/src/snapshots/lib.rs/task_deletion_deleteable/after_registering_the_task_deletion.snap
+++ b/index-scheduler/src/snapshots/lib.rs/task_deletion_deleteable/after_registering_the_task_deletion.snap
@@ -8,7 +8,7 @@ source: index-scheduler/src/lib.rs
 ### All Tasks:
 0 {uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
 1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggo", primary_key: Some("bone"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
-2 {uid: 2, status: enqueued, details: { matched_tasks: 1, deleted_tasks: None, original_filter: "test_query" }, kind: TaskDeletion { query: "test_query", tasks: RoaringTreemap<[0]> }}
+2 {uid: 2, status: enqueued, details: { matched_tasks: 1, deleted_tasks: None, original_filter: "test_query" }, kind: TaskDeletion { query: "test_query", tasks: RoaringBitmap<[0]> }}
 ----------------------------------------------------------------------
 ### Status:
 enqueued [1,2,]
--- a/index-scheduler/src/snapshots/lib.rs/task_deletion_deleteable/task_deletion_processed.snap
+++ b/index-scheduler/src/snapshots/lib.rs/task_deletion_deleteable/task_deletion_processed.snap
@@ -7,7 +7,7 @@ source: index-scheduler/src/lib.rs
 ----------------------------------------------------------------------
 ### All Tasks:
 1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggo", primary_key: Some("bone"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
-2 {uid: 2, status: succeeded, details: { matched_tasks: 1, deleted_tasks: Some(1), original_filter: "test_query" }, kind: TaskDeletion { query: "test_query", tasks: RoaringTreemap<[0]> }}
+2 {uid: 2, status: succeeded, details: { matched_tasks: 1, deleted_tasks: Some(1), original_filter: "test_query" }, kind: TaskDeletion { query: "test_query", tasks: RoaringBitmap<[0]> }}
 ----------------------------------------------------------------------
 ### Status:
 enqueued [1,]
--- a/index-scheduler/src/snapshots/lib.rs/task_deletion_undeleteable/task_deletion_done.snap
+++ b/index-scheduler/src/snapshots/lib.rs/task_deletion_undeleteable/task_deletion_done.snap
@@ -9,7 +9,7 @@ source: index-scheduler/src/lib.rs
 0 {uid: 0, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
 1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
 2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggo", primary_key: Some("bone"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
-3 {uid: 3, status: succeeded, details: { matched_tasks: 2, deleted_tasks: Some(0), original_filter: "test_query" }, kind: TaskDeletion { query: "test_query", tasks: RoaringTreemap<[0, 1]> }}
+3 {uid: 3, status: succeeded, details: { matched_tasks: 2, deleted_tasks: Some(0), original_filter: "test_query" }, kind: TaskDeletion { query: "test_query", tasks: RoaringBitmap<[0, 1]> }}
 ----------------------------------------------------------------------
 ### Status:
 enqueued [0,1,2,]
--- a/index-scheduler/src/snapshots/lib.rs/task_deletion_undeleteable/task_deletion_enqueued.snap
+++ b/index-scheduler/src/snapshots/lib.rs/task_deletion_undeleteable/task_deletion_enqueued.snap
@@ -9,7 +9,7 @@ source: index-scheduler/src/lib.rs
 0 {uid: 0, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
 1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
 2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggo", primary_key: Some("bone"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
-3 {uid: 3, status: enqueued, details: { matched_tasks: 2, deleted_tasks: None, original_filter: "test_query" }, kind: TaskDeletion { query: "test_query", tasks: RoaringTreemap<[0, 1]> }}
+3 {uid: 3, status: enqueued, details: { matched_tasks: 2, deleted_tasks: None, original_filter: "test_query" }, kind: TaskDeletion { query: "test_query", tasks: RoaringBitmap<[0, 1]> }}
 ----------------------------------------------------------------------
 ### Status:
 enqueued [0,1,2,3,]
--- a/index-scheduler/src/snapshots/lib.rs/task_deletion_undeleteable/task_deletion_processing.snap
+++ b/index-scheduler/src/snapshots/lib.rs/task_deletion_undeleteable/task_deletion_processing.snap
@@ -9,7 +9,7 @@ source: index-scheduler/src/lib.rs
 0 {uid: 0, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }}
 1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "catto", primary_key: None, method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
 2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggo", primary_key: Some("bone"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
-3 {uid: 3, status: enqueued, details: { matched_tasks: 2, deleted_tasks: None, original_filter: "test_query" }, kind: TaskDeletion { query: "test_query", tasks: RoaringTreemap<[0, 1]> }}
+3 {uid: 3, status: enqueued, details: { matched_tasks: 2, deleted_tasks: None, original_filter: "test_query" }, kind: TaskDeletion { query: "test_query", tasks: RoaringBitmap<[0, 1]> }}
 ----------------------------------------------------------------------
 ### Status:
 enqueued [0,1,2,3,]
--- a/index-scheduler/src/snapshots/lib.rs/test_auto_deletion_of_tasks/after_the_second_task_deletion.snap
+++ b/index-scheduler/src/snapshots/lib.rs/test_auto_deletion_of_tasks/after_the_second_task_deletion.snap
@@ -41,18 +41,6 @@ source: index-scheduler/src/lib.rs
      "taskDeletion": {
        "query": "[query]",
        "tasks": [
-          1,
-          0,
-          0,
-          0,
-          0,
-          0,
-          0,
-          0,
-          0,
-          0,
-          0,
-          0,
          58,
          48,
          0,
--- a/index-scheduler/src/snapshots/lib.rs/test_auto_deletion_of_tasks/everything_has_been_processed.snap
+++ b/index-scheduler/src/snapshots/lib.rs/test_auto_deletion_of_tasks/everything_has_been_processed.snap
@@ -21,18 +21,6 @@ source: index-scheduler/src/lib.rs
      "taskDeletion": {
        "query": "[query]",
        "tasks": [
-          1,
-          0,
-          0,
-          0,
-          0,
-          0,
-          0,
-          0,
-          0,
-          0,
-          0,
-          0,
          58,
          48,
          0,
--- a/index-scheduler/src/snapshots/lib.rs/test_auto_deletion_of_tasks/task_deletion_have_been_enqueued.snap
+++ b/index-scheduler/src/snapshots/lib.rs/test_auto_deletion_of_tasks/task_deletion_have_been_enqueued.snap
@@ -106,18 +106,6 @@ source: index-scheduler/src/lib.rs
      "taskDeletion": {
        "query": "[query]",
        "tasks": [
-          1,
-          0,
-          0,
-          0,
-          0,
-          0,
-          0,
-          0,
-          0,
-          0,
-          0,
-          0,
          58,
          48,
          0,
--- a/index-scheduler/src/snapshots/lib.rs/test_auto_deletion_of_tasks/task_deletion_have_been_processed.snap
+++ b/index-scheduler/src/snapshots/lib.rs/test_auto_deletion_of_tasks/task_deletion_have_been_processed.snap
@@ -61,18 +61,6 @@ source: index-scheduler/src/lib.rs
      "taskDeletion": {
        "query": "[query]",
        "tasks": [
-          1,
-          0,
-          0,
-          0,
-          0,
-          0,
-          0,
-          0,
-          0,
-          0,
-          0,
-          0,
          58,
          48,
          0,
--- a/index-scheduler/src/utils.rs
+++ b/index-scheduler/src/utils.rs
@@ -5,16 +5,15 @@ use std::ops::Bound;

 use meilisearch_types::heed::types::{DecodeIgnore, OwnedType};
 use meilisearch_types::heed::{Database, RoTxn, RwTxn};
-use meilisearch_types::milli::heed_codec::CboRoaringTreemapCodec;
-use meilisearch_types::milli::BEU64;
+use meilisearch_types::milli::{CboRoaringBitmapCodec, BEU32};
 use meilisearch_types::tasks::{Details, IndexSwap, Kind, KindWithContent, Status};
-use roaring::{MultiOps, RoaringTreemap};
+use roaring::{MultiOps, RoaringBitmap};
 use time::OffsetDateTime;

 use crate::{Error, IndexScheduler, Result, Task, TaskId, BEI128};

 impl IndexScheduler {
-    pub(crate) fn all_task_ids(&self, rtxn: &RoTxn) -> Result<RoaringTreemap> {
+    pub(crate) fn all_task_ids(&self, rtxn: &RoTxn) -> Result<RoaringBitmap> {
        enum_iterator::all().map(|s| self.get_status(rtxn, s)).union()
    }

@@ -27,7 +26,7 @@ impl IndexScheduler {
    }

    pub(crate) fn get_task(&self, rtxn: &RoTxn, task_id: TaskId) -> Result<Option<Task>> {
-        Ok(self.all_tasks.get(rtxn, &BEU64::new(task_id))?)
+        Ok(self.all_tasks.get(rtxn, &BEU32::new(task_id))?)
    }

    /// Convert an iterator to a `Vec` of tasks. The tasks MUST exist or a
@@ -89,12 +88,12 @@ impl IndexScheduler {
            }
        }

-        self.all_tasks.put(wtxn, &BEU64::new(task.uid), task)?;
+        self.all_tasks.put(wtxn, &BEU32::new(task.uid), task)?;
        Ok(())
    }

    /// Returns the whole set of tasks that belongs to this index.
-    pub(crate) fn index_tasks(&self, rtxn: &RoTxn, index: &str) -> Result<RoaringTreemap> {
+    pub(crate) fn index_tasks(&self, rtxn: &RoTxn, index: &str) -> Result<RoaringBitmap> {
        Ok(self.index_tasks.get(rtxn, index)?.unwrap_or_default())
    }

@@ -102,7 +101,7 @@ impl IndexScheduler {
        &self,
        wtxn: &mut RwTxn,
        index: &str,
-        f: impl Fn(&mut RoaringTreemap),
+        f: impl Fn(&mut RoaringBitmap),
    ) -> Result<()> {
        let mut tasks = self.index_tasks(wtxn, index)?;
        f(&mut tasks);
@@ -115,7 +114,7 @@ impl IndexScheduler {
        Ok(())
    }

-    pub(crate) fn get_status(&self, rtxn: &RoTxn, status: Status) -> Result<RoaringTreemap> {
+    pub(crate) fn get_status(&self, rtxn: &RoTxn, status: Status) -> Result<RoaringBitmap> {
        Ok(self.status.get(rtxn, &status)?.unwrap_or_default())
    }

@@ -123,7 +122,7 @@ impl IndexScheduler {
        &self,
        wtxn: &mut RwTxn,
        status: Status,
-        bitmap: &RoaringTreemap,
+        bitmap: &RoaringBitmap,
    ) -> Result<()> {
        Ok(self.status.put(wtxn, &status, bitmap)?)
    }
@@ -132,7 +131,7 @@ impl IndexScheduler {
        &self,
        wtxn: &mut RwTxn,
        status: Status,
-        f: impl Fn(&mut RoaringTreemap),
+        f: impl Fn(&mut RoaringBitmap),
    ) -> Result<()> {
        let mut tasks = self.get_status(wtxn, status)?;
        f(&mut tasks);
@@ -141,7 +140,7 @@ impl IndexScheduler {
        Ok(())
    }

-    pub(crate) fn get_kind(&self, rtxn: &RoTxn, kind: Kind) -> Result<RoaringTreemap> {
+    pub(crate) fn get_kind(&self, rtxn: &RoTxn, kind: Kind) -> Result<RoaringBitmap> {
        Ok(self.kind.get(rtxn, &kind)?.unwrap_or_default())
    }

@@ -149,7 +148,7 @@ impl IndexScheduler {
        &self,
        wtxn: &mut RwTxn,
        kind: Kind,
-        bitmap: &RoaringTreemap,
+        bitmap: &RoaringBitmap,
    ) -> Result<()> {
        Ok(self.kind.put(wtxn, &kind, bitmap)?)
    }
@@ -158,7 +157,7 @@ impl IndexScheduler {
        &self,
        wtxn: &mut RwTxn,
        kind: Kind,
-        f: impl Fn(&mut RoaringTreemap),
+        f: impl Fn(&mut RoaringBitmap),
    ) -> Result<()> {
        let mut tasks = self.get_kind(wtxn, kind)?;
        f(&mut tasks);
@@ -170,20 +169,20 @@ impl IndexScheduler {

 pub(crate) fn insert_task_datetime(
    wtxn: &mut RwTxn,
-    database: Database<OwnedType<BEI128>, CboRoaringTreemapCodec>,
+    database: Database<OwnedType<BEI128>, CboRoaringBitmapCodec>,
    time: OffsetDateTime,
    task_id: TaskId,
 ) -> Result<()> {
    let timestamp = BEI128::new(time.unix_timestamp_nanos());
    let mut task_ids = database.get(wtxn, &timestamp)?.unwrap_or_default();
    task_ids.insert(task_id);
-    database.put(wtxn, &timestamp, &RoaringTreemap::from_iter(task_ids))?;
+    database.put(wtxn, &timestamp, &RoaringBitmap::from_iter(task_ids))?;
    Ok(())
 }

 pub(crate) fn remove_task_datetime(
    wtxn: &mut RwTxn,
-    database: Database<OwnedType<BEI128>, CboRoaringTreemapCodec>,
+    database: Database<OwnedType<BEI128>, CboRoaringBitmapCodec>,
    time: OffsetDateTime,
    task_id: TaskId,
 ) -> Result<()> {
@@ -193,7 +192,7 @@ pub(crate) fn remove_task_datetime(
        if existing.is_empty() {
            database.delete(wtxn, &timestamp)?;
        } else {
-            database.put(wtxn, &timestamp, &RoaringTreemap::from_iter(existing))?;
+            database.put(wtxn, &timestamp, &RoaringBitmap::from_iter(existing))?;
        }
    }

@@ -202,8 +201,8 @@ pub(crate) fn remove_task_datetime(

 pub(crate) fn keep_tasks_within_datetimes(
    rtxn: &RoTxn,
-    tasks: &mut RoaringTreemap,
-    database: Database<OwnedType<BEI128>, CboRoaringTreemapCodec>,
+    tasks: &mut RoaringBitmap,
+    database: Database<OwnedType<BEI128>, CboRoaringBitmapCodec>,
    after: Option<OffsetDateTime>,
    before: Option<OffsetDateTime>,
 ) -> Result<()> {
@@ -213,7 +212,7 @@ pub(crate) fn keep_tasks_within_datetimes(
        (Some(after), None) => (Bound::Excluded(*after), Bound::Unbounded),
        (Some(after), Some(before)) => (Bound::Excluded(*after), Bound::Excluded(*before)),
    };
-    let mut collected_task_ids = RoaringTreemap::new();
+    let mut collected_task_ids = RoaringBitmap::new();
    let start = map_bound(start, |b| BEI128::new(b.unix_timestamp_nanos()));
    let end = map_bound(end, |b| BEI128::new(b.unix_timestamp_nanos()));
    let iter = database.range(rtxn, &(start, end))?;
--- a/meilisearch-types/src/deserr/query_params.rs
+++ b/meilisearch-types/src/deserr/query_params.rs
@@ -104,7 +104,6 @@ macro_rules! impl_from_query_param_wrap_original_value_in_error {
 }
 impl_from_query_param_wrap_original_value_in_error!(usize, DeserrParseIntError);
 impl_from_query_param_wrap_original_value_in_error!(u32, DeserrParseIntError);
-impl_from_query_param_wrap_original_value_in_error!(u64, DeserrParseIntError);
 impl_from_query_param_wrap_original_value_in_error!(bool, DeserrParseBoolError);

 impl FromQueryParameter for String {
--- a/meilisearch-types/src/tasks.rs
+++ b/meilisearch-types/src/tasks.rs
@@ -5,7 +5,7 @@ use std::str::FromStr;

 use enum_iterator::Sequence;
 use milli::update::IndexDocumentsMethod;
-use roaring::RoaringTreemap;
+use roaring::RoaringBitmap;
 use serde::{Deserialize, Serialize, Serializer};
 use time::{Duration, OffsetDateTime};
 use uuid::Uuid;
@@ -15,7 +15,7 @@ use crate::keys::Key;
 use crate::settings::{Settings, Unchecked};
 use crate::InstanceUid;

-pub type TaskId = u64;
+pub type TaskId = u32;

 #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
 #[serde(rename_all = "camelCase")]
@@ -127,11 +127,11 @@ pub enum KindWithContent {
    },
    TaskCancelation {
        query: String,
-        tasks: RoaringTreemap,
+        tasks: RoaringBitmap,
    },
    TaskDeletion {
        query: String,
-        tasks: RoaringTreemap,
+        tasks: RoaringBitmap,
    },
    DumpCreation {
        keys: Vec<Key>,
--- a/meilisearch/Cargo.toml
+++ b/meilisearch/Cargo.toml
@@ -133,7 +133,7 @@ vergen = { version = "7.5.1", default-features = false, features = ["git"] }
 zip = { version = "0.6.4", optional = true }

 [features]
-default = ["analytics", "meilisearch-types/all-tokenizations", "mini-dashboard"]
+default = ["analytics", "meilisearch-types/all-tokenizations", "mini-dashboard", "profile-with-puffin"]
 analytics = ["segment"]
 profile-with-puffin = ["dep:puffin_http"]
 mini-dashboard = [
--- a/meilisearch/src/analytics/segment_analytics.rs
+++ b/meilisearch/src/analytics/segment_analytics.rs
@@ -285,6 +285,7 @@ impl From<Opt> for Infos {
            db_path,
            experimental_enable_metrics,
            experimental_reduce_indexing_memory_usage,
+            experimental_limit_batched_tasks: _,
            http_addr,
            master_key: _,
            env,
--- a/meilisearch/src/lib.rs
+++ b/meilisearch/src/lib.rs
@@ -203,7 +203,7 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<(Arc<IndexScheduler>, Arc<
            .name(String::from("register-snapshot-tasks"))
            .spawn(move || loop {
                thread::sleep(snapshot_delay);
-                if let Err(e) = index_scheduler.register(KindWithContent::SnapshotCreation, None) {
+                if let Err(e) = index_scheduler.register(KindWithContent::SnapshotCreation) {
                    error!("Error while registering snapshot: {}", e);
                }
            })
@@ -236,6 +236,7 @@ fn open_or_create_database_unchecked(
            enable_mdb_writemap: opt.experimental_reduce_indexing_memory_usage,
            indexer_config: (&opt.indexer_options).try_into()?,
            autobatching_enabled: true,
+            maximum_number_of_batched_tasks: opt.experimental_limit_batched_tasks,
            max_number_of_tasks: 1_000_000,
            index_growth_amount: byte_unit::Byte::from_str("10GiB").unwrap().get_bytes() as usize,
            index_count: DEFAULT_INDEX_COUNT,
--- a/meilisearch/src/option.rs
+++ b/meilisearch/src/option.rs
@@ -51,6 +51,7 @@ const MEILI_LOG_LEVEL: &str = "MEILI_LOG_LEVEL";
 const MEILI_EXPERIMENTAL_ENABLE_METRICS: &str = "MEILI_EXPERIMENTAL_ENABLE_METRICS";
 const MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE: &str =
    "MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE";
+const MEILI_EXPERIMENTAL_LIMIT_BATCHED_TASKS: &str = "MEILI_EXPERIMENTAL_LIMIT_BATCHED_TASKS";

 const DEFAULT_CONFIG_FILE_PATH: &str = "./config.toml";
 const DEFAULT_DB_PATH: &str = "./data.ms";
@@ -301,6 +302,11 @@ pub struct Opt {
    #[serde(default)]
    pub experimental_reduce_indexing_memory_usage: bool,

+    /// Experimental limit to the number of tasks per batch
+    #[clap(long, env = MEILI_EXPERIMENTAL_LIMIT_BATCHED_TASKS, default_value_t = default_limit_batched_tasks())]
+    #[serde(default = "default_limit_batched_tasks")]
+    pub experimental_limit_batched_tasks: usize,
+
    #[serde(flatten)]
    #[clap(flatten)]
    pub indexer_options: IndexerOpts,
@@ -393,7 +399,8 @@ impl Opt {
            #[cfg(all(not(debug_assertions), feature = "analytics"))]
            no_analytics,
            experimental_enable_metrics: enable_metrics_route,
-            experimental_reduce_indexing_memory_usage: reduce_indexing_memory_usage,
+            experimental_reduce_indexing_memory_usage,
+            experimental_limit_batched_tasks,
        } = self;
        export_to_env_if_not_present(MEILI_DB_PATH, db_path);
        export_to_env_if_not_present(MEILI_HTTP_ADDR, http_addr);
@@ -437,7 +444,11 @@ impl Opt {
        );
        export_to_env_if_not_present(
            MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE,
-            reduce_indexing_memory_usage.to_string(),
+            experimental_reduce_indexing_memory_usage.to_string(),
+        );
+        export_to_env_if_not_present(
+            MEILI_EXPERIMENTAL_LIMIT_BATCHED_TASKS,
+            experimental_limit_batched_tasks.to_string(),
        );
        indexer_options.export_to_env();
    }
@@ -739,6 +750,10 @@ fn default_dump_dir() -> PathBuf {
    PathBuf::from(DEFAULT_DUMP_DIR)
 }

+fn default_limit_batched_tasks() -> usize {
+    usize::MAX
+}
+
 /// Indicates if a snapshot was scheduled, and if yes with which interval.
 #[derive(Debug, Default, Copy, Clone, Deserialize, Serialize)]
 pub enum ScheduleSnapshot {
--- a/meilisearch/src/routes/dump.rs
+++ b/meilisearch/src/routes/dump.rs
@@ -11,7 +11,7 @@ use crate::analytics::Analytics;
 use crate::extractors::authentication::policies::*;
 use crate::extractors::authentication::GuardedData;
 use crate::extractors::sequential_extractor::SeqHandler;
-use crate::routes::{get_task_id, SummarizedTaskView};
+use crate::routes::SummarizedTaskView;

 pub fn configure(cfg: &mut web::ServiceConfig) {
    cfg.service(web::resource("").route(web::post().to(SeqHandler(create_dump))));
@@ -29,9 +29,8 @@ pub async fn create_dump(
        keys: auth_controller.list_keys()?,
        instance_uid: analytics.instance_uid().cloned(),
    };
-    let uid = get_task_id(&req)?;
    let task: SummarizedTaskView =
-        tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)).await??.into();
+        tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();

    debug!("returns: {:?}", task);
    Ok(HttpResponse::Accepted().json(task))
--- a/meilisearch/src/routes/indexes/documents.rs
+++ b/meilisearch/src/routes/indexes/documents.rs
@@ -7,7 +7,7 @@ use bstr::ByteSlice;
 use deserr::actix_web::{AwebJson, AwebQueryParameter};
 use deserr::Deserr;
 use futures::StreamExt;
-use index_scheduler::{IndexScheduler, TaskId};
+use index_scheduler::IndexScheduler;
 use log::debug;
 use meilisearch_types::deserr::query_params::Param;
 use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError};
@@ -36,7 +36,7 @@ use crate::extractors::authentication::policies::*;
 use crate::extractors::authentication::GuardedData;
 use crate::extractors::payload::Payload;
 use crate::extractors::sequential_extractor::SeqHandler;
-use crate::routes::{get_task_id, PaginationView, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT};
+use crate::routes::{PaginationView, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT};
 use crate::search::parse_filter;

 static ACCEPTED_CONTENT_TYPE: Lazy<Vec<String>> = Lazy::new(|| {
@@ -129,9 +129,8 @@ pub async fn delete_document(
        index_uid: index_uid.to_string(),
        documents_ids: vec![document_id],
    };
-    let uid = get_task_id(&req)?;
    let task: SummarizedTaskView =
-        tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)).await??.into();
+        tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
    debug!("returns: {:?}", task);
    Ok(HttpResponse::Accepted().json(task))
 }
@@ -278,7 +277,6 @@ pub async fn replace_documents(
    analytics.add_documents(&params, index_scheduler.index(&index_uid).is_err(), &req);

    let allow_index_creation = index_scheduler.filters().allow_index_creation(&index_uid);
-    let uid = get_task_id(&req)?;
    let task = document_addition(
        extract_mime_type(&req)?,
        index_scheduler,
@@ -287,7 +285,6 @@ pub async fn replace_documents(
        params.csv_delimiter,
        body,
        IndexDocumentsMethod::ReplaceDocuments,
-        uid,
        allow_index_creation,
    )
    .await?;
@@ -311,7 +308,6 @@ pub async fn update_documents(
    analytics.update_documents(&params, index_scheduler.index(&index_uid).is_err(), &req);

    let allow_index_creation = index_scheduler.filters().allow_index_creation(&index_uid);
-    let uid = get_task_id(&req)?;
    let task = document_addition(
        extract_mime_type(&req)?,
        index_scheduler,
@@ -320,7 +316,6 @@ pub async fn update_documents(
        params.csv_delimiter,
        body,
        IndexDocumentsMethod::UpdateDocuments,
-        uid,
        allow_index_creation,
    )
    .await?;
@@ -337,7 +332,6 @@ async fn document_addition(
    csv_delimiter: Option<u8>,
    mut body: Payload,
    method: IndexDocumentsMethod,
-    task_id: Option<TaskId>,
    allow_index_creation: bool,
 ) -> Result<SummarizedTaskView, MeilisearchHttpError> {
    let format = match (
@@ -451,7 +445,7 @@ async fn document_addition(
    };

    let scheduler = index_scheduler.clone();
-    let task = match tokio::task::spawn_blocking(move || scheduler.register(task, task_id)).await? {
+    let task = match tokio::task::spawn_blocking(move || scheduler.register(task)).await? {
        Ok(task) => task,
        Err(e) => {
            index_scheduler.delete_update_file(uuid)?;
@@ -482,9 +476,8 @@ pub async fn delete_documents_batch(

    let task =
        KindWithContent::DocumentDeletion { index_uid: index_uid.to_string(), documents_ids: ids };
-    let uid = get_task_id(&req)?;
    let task: SummarizedTaskView =
-        tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)).await??.into();
+        tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();

    debug!("returns: {:?}", task);
    Ok(HttpResponse::Accepted().json(task))
@@ -519,9 +512,8 @@ pub async fn delete_documents_by_filter(
    .map_err(|err| ResponseError::from_msg(err.message, Code::InvalidDocumentFilter))?;
    let task = KindWithContent::DocumentDeletionByFilter { index_uid, filter_expr: filter };

-    let uid = get_task_id(&req)?;
    let task: SummarizedTaskView =
-        tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)).await??.into();
+        tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();

    debug!("returns: {:?}", task);
    Ok(HttpResponse::Accepted().json(task))
@@ -537,9 +529,8 @@ pub async fn clear_all_documents(
    analytics.delete_documents(DocumentDeletionKind::ClearAll, &req);

    let task = KindWithContent::DocumentClear { index_uid: index_uid.to_string() };
-    let uid = get_task_id(&req)?;
    let task: SummarizedTaskView =
-        tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)).await??.into();
+        tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();

    debug!("returns: {:?}", task);
    Ok(HttpResponse::Accepted().json(task))
--- a/meilisearch/src/routes/indexes/mod.rs
+++ b/meilisearch/src/routes/indexes/mod.rs
@@ -17,7 +17,7 @@ use serde::Serialize;
 use serde_json::json;
 use time::OffsetDateTime;

-use super::{get_task_id, Pagination, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT};
+use super::{Pagination, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT};
 use crate::analytics::Analytics;
 use crate::extractors::authentication::policies::*;
 use crate::extractors::authentication::{AuthenticationError, GuardedData};
@@ -135,9 +135,8 @@ pub async fn create_index(
        );

        let task = KindWithContent::IndexCreation { index_uid: uid.to_string(), primary_key };
-        let uid = get_task_id(&req)?;
        let task: SummarizedTaskView =
-            tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)).await??.into();
+            tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();

        Ok(HttpResponse::Accepted().json(task))
    } else {
@@ -204,9 +203,8 @@ pub async fn update_index(
        primary_key: body.primary_key,
    };

-    let uid = get_task_id(&req)?;
    let task: SummarizedTaskView =
-        tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)).await??.into();
+        tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();

    debug!("returns: {:?}", task);
    Ok(HttpResponse::Accepted().json(task))
@@ -215,13 +213,11 @@ pub async fn update_index(
 pub async fn delete_index(
    index_scheduler: GuardedData<ActionPolicy<{ actions::INDEXES_DELETE }>, Data<IndexScheduler>>,
    index_uid: web::Path<String>,
-    req: HttpRequest,
 ) -> Result<HttpResponse, ResponseError> {
    let index_uid = IndexUid::try_from(index_uid.into_inner())?;
    let task = KindWithContent::IndexDeletion { index_uid: index_uid.into_inner() };
-    let uid = get_task_id(&req)?;
    let task: SummarizedTaskView =
-        tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)).await??.into();
+        tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();

    Ok(HttpResponse::Accepted().json(task))
 }
--- a/meilisearch/src/routes/indexes/settings.rs
+++ b/meilisearch/src/routes/indexes/settings.rs
@@ -14,7 +14,7 @@ use serde_json::json;
 use crate::analytics::Analytics;
 use crate::extractors::authentication::policies::*;
 use crate::extractors::authentication::GuardedData;
-use crate::routes::{get_task_id, SummarizedTaskView};
+use crate::routes::SummarizedTaskView;

 #[macro_export]
 macro_rules! make_setting_route {
@@ -33,7 +33,7 @@ macro_rules! make_setting_route {
            use $crate::extractors::authentication::policies::*;
            use $crate::extractors::authentication::GuardedData;
            use $crate::extractors::sequential_extractor::SeqHandler;
-            use $crate::routes::{get_task_id, SummarizedTaskView};
+            use $crate::routes::SummarizedTaskView;

            pub async fn delete(
                index_scheduler: GuardedData<
@@ -41,7 +41,6 @@ macro_rules! make_setting_route {
                    Data<IndexScheduler>,
                >,
                index_uid: web::Path<String>,
-                req: HttpRequest,
            ) -> Result<HttpResponse, ResponseError> {
                let index_uid = IndexUid::try_from(index_uid.into_inner())?;

@@ -56,9 +55,8 @@ macro_rules! make_setting_route {
                    is_deletion: true,
                    allow_index_creation,
                };
-                let uid = get_task_id(&req)?;
                let task: SummarizedTaskView =
-                    tokio::task::spawn_blocking(move || index_scheduler.register(task, uid))
+                    tokio::task::spawn_blocking(move || index_scheduler.register(task))
                        .await??
                        .into();

@@ -99,9 +97,8 @@ macro_rules! make_setting_route {
                    is_deletion: false,
                    allow_index_creation,
                };
-                let uid = get_task_id(&req)?;
                let task: SummarizedTaskView =
-                    tokio::task::spawn_blocking(move || index_scheduler.register(task, uid))
+                    tokio::task::spawn_blocking(move || index_scheduler.register(task))
                        .await??
                        .into();

@@ -667,9 +664,8 @@ pub async fn update_all(
        is_deletion: false,
        allow_index_creation,
    };
-    let uid = get_task_id(&req)?;
    let task: SummarizedTaskView =
-        tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)).await??.into();
+        tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();

    debug!("returns: {:?}", task);
    Ok(HttpResponse::Accepted().json(task))
@@ -691,7 +687,6 @@ pub async fn get_all(
 pub async fn delete_all(
    index_scheduler: GuardedData<ActionPolicy<{ actions::SETTINGS_UPDATE }>, Data<IndexScheduler>>,
    index_uid: web::Path<String>,
-    req: HttpRequest,
 ) -> Result<HttpResponse, ResponseError> {
    let index_uid = IndexUid::try_from(index_uid.into_inner())?;

@@ -705,9 +700,8 @@ pub async fn delete_all(
        is_deletion: true,
        allow_index_creation,
    };
-    let uid = get_task_id(&req)?;
    let task: SummarizedTaskView =
-        tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)).await??.into();
+        tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();

    debug!("returns: {:?}", task);
    Ok(HttpResponse::Accepted().json(task))
--- a/meilisearch/src/routes/mod.rs
+++ b/meilisearch/src/routes/mod.rs
@@ -5,7 +5,7 @@ use actix_web::{web, HttpRequest, HttpResponse};
 use index_scheduler::IndexScheduler;
 use log::debug;
 use meilisearch_auth::AuthController;
-use meilisearch_types::error::{Code, ResponseError};
+use meilisearch_types::error::ResponseError;
 use meilisearch_types::settings::{Settings, Unchecked};
 use meilisearch_types::tasks::{Kind, Status, Task, TaskId};
 use serde::{Deserialize, Serialize};
@@ -41,34 +41,6 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
        .service(web::scope("/experimental-features").configure(features::configure));
 }

-pub fn get_task_id(req: &HttpRequest) -> Result<Option<TaskId>, ResponseError> {
-    let task_id = req
-        .headers()
-        .get("TaskId")
-        .map(|header| {
-            header.to_str().map_err(|e| {
-                ResponseError::from_msg(
-                    format!("TaskId is not a valid utf-8 string: {e}"),
-                    Code::BadRequest,
-                )
-            })
-        })
-        .transpose()?
-        .map(|s| {
-            s.parse::<TaskId>().map_err(|e| {
-                ResponseError::from_msg(
-                    format!(
-                        "Could not parse the TaskId as a {}: {e}",
-                        std::any::type_name::<TaskId>(),
-                    ),
-                    Code::BadRequest,
-                )
-            })
-        })
-        .transpose()?;
-    Ok(task_id)
-}
-
 #[derive(Debug, Serialize)]
 #[serde(rename_all = "camelCase")]
 pub struct SummarizedTaskView {
--- a/meilisearch/src/routes/swap_indexes.rs
+++ b/meilisearch/src/routes/swap_indexes.rs
@@ -10,7 +10,7 @@ use meilisearch_types::index_uid::IndexUid;
 use meilisearch_types::tasks::{IndexSwap, KindWithContent};
 use serde_json::json;

-use super::{get_task_id, SummarizedTaskView};
+use super::SummarizedTaskView;
 use crate::analytics::Analytics;
 use crate::error::MeilisearchHttpError;
 use crate::extractors::authentication::policies::*;
@@ -60,9 +60,7 @@ pub async fn swap_indexes(
    }

    let task = KindWithContent::IndexSwap { swaps };
-
-    let uid = get_task_id(&req)?;
    let task: SummarizedTaskView =
-        tokio::task::spawn_blocking(move || index_scheduler.register(task, uid)).await??.into();
+        tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
    Ok(HttpResponse::Accepted().json(task))
 }
--- a/meilisearch/src/routes/tasks.rs
+++ b/meilisearch/src/routes/tasks.rs
@@ -20,13 +20,13 @@ use time::macros::format_description;
 use time::{Date, Duration, OffsetDateTime, Time};
 use tokio::task;

-use super::{get_task_id, SummarizedTaskView};
+use super::SummarizedTaskView;
 use crate::analytics::Analytics;
 use crate::extractors::authentication::policies::*;
 use crate::extractors::authentication::GuardedData;
 use crate::extractors::sequential_extractor::SeqHandler;

-const DEFAULT_LIMIT: u64 = 20;
+const DEFAULT_LIMIT: u32 = 20;

 pub fn configure(cfg: &mut web::ServiceConfig) {
    cfg.service(
@@ -175,14 +175,14 @@ impl From<Details> for DetailsView {
 #[deserr(error = DeserrQueryParamError, rename_all = camelCase, deny_unknown_fields)]
 pub struct TasksFilterQuery {
    #[deserr(default = Param(DEFAULT_LIMIT), error = DeserrQueryParamError<InvalidTaskLimit>)]
-    pub limit: Param<TaskId>,
+    pub limit: Param<u32>,
    #[deserr(default, error = DeserrQueryParamError<InvalidTaskFrom>)]
    pub from: Option<Param<TaskId>>,

    #[deserr(default, error = DeserrQueryParamError<InvalidTaskUids>)]
-    pub uids: OptionStarOrList<TaskId>,
+    pub uids: OptionStarOrList<u32>,
    #[deserr(default, error = DeserrQueryParamError<InvalidTaskCanceledBy>)]
-    pub canceled_by: OptionStarOrList<TaskId>,
+    pub canceled_by: OptionStarOrList<u32>,
    #[deserr(default, error = DeserrQueryParamError<InvalidTaskTypes>)]
    pub types: OptionStarOrList<Kind>,
    #[deserr(default, error = DeserrQueryParamError<InvalidTaskStatuses>)]
@@ -249,9 +249,9 @@ impl TaskDeletionOrCancelationQuery {
 #[deserr(error = DeserrQueryParamError, rename_all = camelCase, deny_unknown_fields)]
 pub struct TaskDeletionOrCancelationQuery {
    #[deserr(default, error = DeserrQueryParamError<InvalidTaskUids>)]
-    pub uids: OptionStarOrList<TaskId>,
+    pub uids: OptionStarOrList<u32>,
    #[deserr(default, error = DeserrQueryParamError<InvalidTaskCanceledBy>)]
-    pub canceled_by: OptionStarOrList<TaskId>,
+    pub canceled_by: OptionStarOrList<u32>,
    #[deserr(default, error = DeserrQueryParamError<InvalidTaskTypes>)]
    pub types: OptionStarOrList<Kind>,
    #[deserr(default, error = DeserrQueryParamError<InvalidTaskStatuses>)]
@@ -333,9 +333,7 @@ async fn cancel_tasks(
    let task_cancelation =
        KindWithContent::TaskCancelation { query: format!("?{}", req.query_string()), tasks };

-    let uid = get_task_id(&req)?;
-    let task =
-        task::spawn_blocking(move || index_scheduler.register(task_cancelation, uid)).await??;
+    let task = task::spawn_blocking(move || index_scheduler.register(task_cancelation)).await??;
    let task: SummarizedTaskView = task.into();

    Ok(HttpResponse::Ok().json(task))
@@ -380,8 +378,7 @@ async fn delete_tasks(
    let task_deletion =
        KindWithContent::TaskDeletion { query: format!("?{}", req.query_string()), tasks };

-    let uid = get_task_id(&req)?;
-    let task = task::spawn_blocking(move || index_scheduler.register(task_deletion, uid)).await??;
+    let task = task::spawn_blocking(move || index_scheduler.register(task_deletion)).await??;
    let task: SummarizedTaskView = task.into();

    Ok(HttpResponse::Ok().json(task))
@@ -391,9 +388,9 @@ async fn delete_tasks(
 pub struct AllTasks {
    results: Vec<TaskView>,
    total: u64,
-    limit: TaskId,
-    from: Option<TaskId>,
-    next: Option<TaskId>,
+    limit: u32,
+    from: Option<u32>,
+    next: Option<u32>,
 }

 async fn get_tasks(
--- a/meilisearch/tests/documents/delete_documents.rs
+++ b/meilisearch/tests/documents/delete_documents.rs
@@ -154,6 +154,19 @@ async fn delete_document_by_filter() {
        )
        .await;
    index.wait_task(1).await;
+
+    let (stats, _) = index.stats().await;
+    snapshot!(json_string!(stats), @r###"
+    {
+      "numberOfDocuments": 4,
+      "isIndexing": false,
+      "fieldDistribution": {
+        "color": 3,
+        "id": 4
+      }
+    }
+    "###);
+
    let (response, code) =
        index.delete_document_by_filter(json!({ "filter": "color = blue"})).await;
    snapshot!(code, @"202 Accepted");
@@ -188,6 +201,18 @@ async fn delete_document_by_filter() {
    }
    "###);

+    let (stats, _) = index.stats().await;
+    snapshot!(json_string!(stats), @r###"
+    {
+      "numberOfDocuments": 2,
+      "isIndexing": false,
+      "fieldDistribution": {
+        "color": 1,
+        "id": 2
+      }
+    }
+    "###);
+
    let (documents, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await;
    snapshot!(code, @"200 OK");
    snapshot!(json_string!(documents), @r###"
@@ -241,6 +266,18 @@ async fn delete_document_by_filter() {
    }
    "###);

+    let (stats, _) = index.stats().await;
+    snapshot!(json_string!(stats), @r###"
+    {
+      "numberOfDocuments": 1,
+      "isIndexing": false,
+      "fieldDistribution": {
+        "color": 1,
+        "id": 1
+      }
+    }
+    "###);
+
    let (documents, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await;
    snapshot!(code, @"200 OK");
    snapshot!(json_string!(documents), @r###"
--- a/meilisearch/tests/index/create_index.rs
+++ b/meilisearch/tests/index/create_index.rs
@@ -199,74 +199,3 @@ async fn error_create_with_invalid_index_uid() {
    }
    "###);
 }
-
-#[actix_rt::test]
-async fn send_task_id() {
-    let server = Server::new().await;
-    let app = server.init_web_app().await;
-    let index = server.index("catto");
-    let (response, code) = index.create(None).await;
-    snapshot!(code, @"202 Accepted");
-    snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
-    {
-      "taskUid": 0,
-      "indexUid": "catto",
-      "status": "enqueued",
-      "type": "indexCreation",
-      "enqueuedAt": "[date]"
-    }
-    "###);
-
-    let body = serde_json::to_string(&json!({
-        "uid": "doggo",
-        "primaryKey": None::<&str>,
-    }))
-    .unwrap();
-    let req = test::TestRequest::post()
-        .uri("/indexes")
-        .insert_header(("TaskId", "25"))
-        .insert_header(ContentType::json())
-        .set_payload(body)
-        .to_request();
-
-    let res = test::call_service(&app, req).await;
-    snapshot!(res.status(), @"202 Accepted");
-
-    let bytes = test::read_body(res).await;
-    let response = serde_json::from_slice::<Value>(&bytes).expect("Expecting valid json");
-    snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
-    {
-      "taskUid": 25,
-      "indexUid": "doggo",
-      "status": "enqueued",
-      "type": "indexCreation",
-      "enqueuedAt": "[date]"
-    }
-    "###);
-
-    let body = serde_json::to_string(&json!({
-        "uid": "girafo",
-        "primaryKey": None::<&str>,
-    }))
-    .unwrap();
-    let req = test::TestRequest::post()
-        .uri("/indexes")
-        .insert_header(("TaskId", "12"))
-        .insert_header(ContentType::json())
-        .set_payload(body)
-        .to_request();
-
-    let res = test::call_service(&app, req).await;
-    snapshot!(res.status(), @"400 Bad Request");
-
-    let bytes = test::read_body(res).await;
-    let response = serde_json::from_slice::<Value>(&bytes).expect("Expecting valid json");
-    snapshot!(json_string!(response), @r###"
-    {
-      "message": "Received bad task id: 12 should be >= to 26.",
-      "code": "bad_request",
-      "type": "invalid_request",
-      "link": "https://docs.meilisearch.com/errors#bad_request"
-    }
-    "###);
-}
--- a/meilisearch/tests/search/mod.rs
+++ b/meilisearch/tests/search/mod.rs
@@ -1104,3 +1104,59 @@ async fn camelcased_words() {
        })
        .await;
 }
+
+#[actix_rt::test]
+async fn simple_search_with_strange_synonyms() {
+    let server = Server::new().await;
+    let index = server.index("test");
+
+    index.update_settings(json!({ "synonyms": {"&": ["to"], "to": ["&"]} })).await;
+    let r = index.wait_task(0).await;
+    meili_snap::snapshot!(r["status"], @r###""succeeded""###);
+
+    let documents = DOCUMENTS.clone();
+    index.add_documents(documents, None).await;
+    index.wait_task(1).await;
+
+    index
+        .search(json!({"q": "How to train"}), |response, code| {
+            meili_snap::snapshot!(code, @"200 OK");
+            meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @r###"
+            [
+              {
+                "title": "How to Train Your Dragon: The Hidden World",
+                "id": "166428"
+              }
+            ]
+            "###);
+        })
+        .await;
+
+    index
+        .search(json!({"q": "How & train"}), |response, code| {
+            meili_snap::snapshot!(code, @"200 OK");
+            meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @r###"
+            [
+              {
+                "title": "How to Train Your Dragon: The Hidden World",
+                "id": "166428"
+              }
+            ]
+            "###);
+        })
+        .await;
+
+    index
+        .search(json!({"q": "to"}), |response, code| {
+            meili_snap::snapshot!(code, @"200 OK");
+            meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @r###"
+            [
+              {
+                "title": "How to Train Your Dragon: The Hidden World",
+                "id": "166428"
+              }
+            ]
+            "###);
+        })
+        .await;
+}
--- a/milli/Cargo.toml
+++ b/milli/Cargo.toml
@@ -17,7 +17,8 @@ bincode = "1.3.3"
 bstr = "1.4.0"
 bytemuck = { version = "1.13.1", features = ["extern_crate_alloc"] }
 byteorder = "1.4.3"
-charabia = { version = "0.8.3", default-features = false }
+# charabia = { version = "0.8.3", default-features = false }
+charabia = { git = "https://github.com/meilisearch/charabia", branch = "main", default-features = false }
 concat-arrays = "0.1.2"
 crossbeam-channel = "0.5.8"
 deserr = { version = "0.6.0", features = ["actix-web"]}
--- a/milli/src/heed_codec/mod.rs
+++ b/milli/src/heed_codec/mod.rs
@@ -20,10 +20,7 @@ pub use self::beu32_str_codec::BEU32StrCodec;
 pub use self::field_id_word_count_codec::FieldIdWordCountCodec;
 pub use self::fst_set_codec::FstSetCodec;
 pub use self::obkv_codec::ObkvCodec;
-pub use self::roaring_bitmap::{
-    BoRoaringBitmapCodec, CboRoaringBitmapCodec, CboRoaringTreemapCodec, RoaringBitmapCodec,
-    RoaringTreemapCodec,
-};
+pub use self::roaring_bitmap::{BoRoaringBitmapCodec, CboRoaringBitmapCodec, RoaringBitmapCodec};
 pub use self::roaring_bitmap_length::{
    BoRoaringBitmapLenCodec, CboRoaringBitmapLenCodec, RoaringBitmapLenCodec,
 };
--- a/milli/src/heed_codec/roaring_bitmap/cbo_roaring_treemap_codec.rs
+++ b/milli/src/heed_codec/roaring_bitmap/cbo_roaring_treemap_codec.rs
@@ -1,196 +0,0 @@
-use std::borrow::Cow;
-use std::io;
-use std::mem::size_of;
-
-use byteorder::{NativeEndian, ReadBytesExt, WriteBytesExt};
-use roaring::RoaringTreemap;
-
-use crate::heed_codec::BytesDecodeOwned;
-
-/// This is the limit where using a byteorder became less size efficient
-/// than using a direct roaring encoding, it is also the point where we are able
-/// to determine the encoding used only by using the array of bytes length.
-pub const THRESHOLD: usize = 4;
-
-/// A conditionnal codec that either use the RoaringBitmap
-/// or a lighter ByteOrder en/decoding method.
-pub struct CboRoaringTreemapCodec;
-
-impl CboRoaringTreemapCodec {
-    pub fn serialized_size(roaring: &RoaringTreemap) -> usize {
-        if roaring.len() <= THRESHOLD as u64 {
-            roaring.len() as usize * size_of::<u64>()
-        } else {
-            roaring.serialized_size()
-        }
-    }
-
-    pub fn serialize_into(roaring: &RoaringTreemap, vec: &mut Vec<u8>) {
-        if roaring.len() <= THRESHOLD as u64 {
-            // If the number of items (u32s) to encode is less than or equal to the threshold
-            // it means that it would weigh the same or less than the RoaringBitmap
-            // header, so we directly encode them using ByteOrder instead.
-            for integer in roaring {
-                vec.write_u64::<NativeEndian>(integer).unwrap();
-            }
-        } else {
-            // Otherwise, we use the classic RoaringBitmapCodec that writes a header.
-            roaring.serialize_into(vec).unwrap();
-        }
-    }
-
-    pub fn deserialize_from(mut bytes: &[u8]) -> io::Result<RoaringTreemap> {
-        if bytes.len() <= THRESHOLD * size_of::<u64>() {
-            // If there is threshold or less than threshold integers that can fit into this array
-            // of bytes it means that we used the ByteOrder codec serializer.
-            let mut bitmap = RoaringTreemap::new();
-            while let Ok(integer) = bytes.read_u64::<NativeEndian>() {
-                bitmap.insert(integer);
-            }
-            Ok(bitmap)
-        } else {
-            // Otherwise, it means we used the classic RoaringBitmapCodec and
-            // that the header takes threshold integers.
-            RoaringTreemap::deserialize_unchecked_from(bytes)
-        }
-    }
-
-    /// Merge serialized CboRoaringBitmaps in a buffer.
-    ///
-    /// if the merged values length is under the threshold, values are directly
-    /// serialized in the buffer else a RoaringBitmap is created from the
-    /// values and is serialized in the buffer.
-    pub fn merge_into(slices: &[Cow<[u8]>], buffer: &mut Vec<u8>) -> io::Result<()> {
-        let mut roaring = RoaringTreemap::new();
-        let mut vec = Vec::new();
-
-        for bytes in slices {
-            if bytes.len() <= THRESHOLD * size_of::<u64>() {
-                let mut reader = bytes.as_ref();
-                while let Ok(integer) = reader.read_u64::<NativeEndian>() {
-                    vec.push(integer);
-                }
-            } else {
-                roaring |= RoaringTreemap::deserialize_unchecked_from(bytes.as_ref())?;
-            }
-        }
-
-        if roaring.is_empty() {
-            vec.sort_unstable();
-            vec.dedup();
-
-            if vec.len() <= THRESHOLD {
-                for integer in vec {
-                    buffer.extend_from_slice(&integer.to_ne_bytes());
-                }
-            } else {
-                // We can unwrap safely because the vector is sorted upper.
-                let roaring = RoaringTreemap::from_sorted_iter(vec.into_iter()).unwrap();
-                roaring.serialize_into(buffer)?;
-            }
-        } else {
-            roaring.extend(vec);
-            roaring.serialize_into(buffer)?;
-        }
-
-        Ok(())
-    }
-}
-
-impl heed::BytesDecode<'_> for CboRoaringTreemapCodec {
-    type DItem = RoaringTreemap;
-
-    fn bytes_decode(bytes: &[u8]) -> Option<Self::DItem> {
-        Self::deserialize_from(bytes).ok()
-    }
-}
-
-impl BytesDecodeOwned for CboRoaringTreemapCodec {
-    type DItem = RoaringTreemap;
-
-    fn bytes_decode_owned(bytes: &[u8]) -> Option<Self::DItem> {
-        Self::deserialize_from(bytes).ok()
-    }
-}
-
-impl heed::BytesEncode<'_> for CboRoaringTreemapCodec {
-    type EItem = RoaringTreemap;
-
-    fn bytes_encode(item: &Self::EItem) -> Option<Cow<[u8]>> {
-        let mut vec = Vec::with_capacity(Self::serialized_size(item));
-        Self::serialize_into(item, &mut vec);
-        Some(Cow::Owned(vec))
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use std::iter::FromIterator;
-
-    use heed::{BytesDecode, BytesEncode};
-
-    use super::*;
-
-    #[test]
-    fn verify_encoding_decoding() {
-        let input = RoaringTreemap::from_iter(0..THRESHOLD as u64);
-        let bytes = CboRoaringTreemapCodec::bytes_encode(&input).unwrap();
-        let output = CboRoaringTreemapCodec::bytes_decode(&bytes).unwrap();
-        assert_eq!(input, output);
-    }
-
-    #[test]
-    fn verify_threshold() {
-        let input = RoaringTreemap::from_iter(0..THRESHOLD as u64);
-
-        // use roaring treemap
-        let mut bytes = Vec::new();
-        input.serialize_into(&mut bytes).unwrap();
-        let roaring_size = bytes.len();
-
-        // use byteorder directly
-        let mut bytes = Vec::new();
-        for integer in input {
-            bytes.write_u64::<NativeEndian>(integer).unwrap();
-        }
-        let bo_size = bytes.len();
-
-        assert!(roaring_size > bo_size, "roaring size: {}, bo size {}", roaring_size, bo_size);
-    }
-
-    #[test]
-    fn merge_cbo_roaring_bitmaps() {
-        let mut buffer = Vec::new();
-
-        let small_data = vec![
-            RoaringTreemap::from_sorted_iter(1..4).unwrap(),
-            RoaringTreemap::from_sorted_iter(2..5).unwrap(),
-            RoaringTreemap::from_sorted_iter(4..6).unwrap(),
-            RoaringTreemap::from_sorted_iter(1..3).unwrap(),
-        ];
-
-        let small_data: Vec<_> =
-            small_data.iter().map(|b| CboRoaringTreemapCodec::bytes_encode(b).unwrap()).collect();
-        CboRoaringTreemapCodec::merge_into(small_data.as_slice(), &mut buffer).unwrap();
-        let bitmap = CboRoaringTreemapCodec::deserialize_from(&buffer).unwrap();
-        let expected = RoaringTreemap::from_sorted_iter(1..6).unwrap();
-        assert_eq!(bitmap, expected);
-
-        let medium_data = vec![
-            RoaringTreemap::from_sorted_iter(1..4).unwrap(),
-            RoaringTreemap::from_sorted_iter(2..5).unwrap(),
-            RoaringTreemap::from_sorted_iter(4..8).unwrap(),
-            RoaringTreemap::from_sorted_iter(0..3).unwrap(),
-            RoaringTreemap::from_sorted_iter(7..23).unwrap(),
-        ];
-
-        let medium_data: Vec<_> =
-            medium_data.iter().map(|b| CboRoaringTreemapCodec::bytes_encode(b).unwrap()).collect();
-        buffer.clear();
-        CboRoaringTreemapCodec::merge_into(medium_data.as_slice(), &mut buffer).unwrap();
-
-        let bitmap = CboRoaringTreemapCodec::deserialize_from(&buffer).unwrap();
-        let expected = RoaringTreemap::from_sorted_iter(0..23).unwrap();
-        assert_eq!(bitmap, expected);
-    }
-}
--- a/milli/src/heed_codec/roaring_bitmap/mod.rs
+++ b/milli/src/heed_codec/roaring_bitmap/mod.rs
@@ -1,11 +1,7 @@
 mod bo_roaring_bitmap_codec;
 pub mod cbo_roaring_bitmap_codec;
-pub mod cbo_roaring_treemap_codec;
 mod roaring_bitmap_codec;
-mod roaring_treemap_codec;

 pub use self::bo_roaring_bitmap_codec::BoRoaringBitmapCodec;
 pub use self::cbo_roaring_bitmap_codec::CboRoaringBitmapCodec;
-pub use self::cbo_roaring_treemap_codec::CboRoaringTreemapCodec;
 pub use self::roaring_bitmap_codec::RoaringBitmapCodec;
-pub use self::roaring_treemap_codec::RoaringTreemapCodec;
--- a/milli/src/heed_codec/roaring_bitmap/roaring_treemap_codec.rs
+++ b/milli/src/heed_codec/roaring_bitmap/roaring_treemap_codec.rs
@@ -1,33 +0,0 @@
-use std::borrow::Cow;
-
-use roaring::RoaringTreemap;
-
-use crate::heed_codec::BytesDecodeOwned;
-
-pub struct RoaringTreemapCodec;
-
-impl heed::BytesDecode<'_> for RoaringTreemapCodec {
-    type DItem = RoaringTreemap;
-
-    fn bytes_decode(bytes: &[u8]) -> Option<Self::DItem> {
-        RoaringTreemap::deserialize_unchecked_from(bytes).ok()
-    }
-}
-
-impl BytesDecodeOwned for RoaringTreemapCodec {
-    type DItem = RoaringTreemap;
-
-    fn bytes_decode_owned(bytes: &[u8]) -> Option<Self::DItem> {
-        RoaringTreemap::deserialize_from(bytes).ok()
-    }
-}
-
-impl heed::BytesEncode<'_> for RoaringTreemapCodec {
-    type EItem = RoaringTreemap;
-
-    fn bytes_encode(item: &Self::EItem) -> Option<Cow<[u8]>> {
-        let mut bytes = Vec::with_capacity(item.serialized_size());
-        item.serialize_into(&mut bytes).ok()?;
-        Some(Cow::Owned(bytes))
-    }
-}
--- a/milli/src/search/new/matches/mod.rs
+++ b/milli/src/search/new/matches/mod.rs
@@ -418,19 +418,11 @@ impl<'t> Matcher<'t, '_> {
        } else {
            match &self.matches {
                Some((tokens, matches)) => {
-                    // If the text has to be cropped,
-                    // compute the best interval to crop around.
-                    let matches = match format_options.crop {
-                        Some(crop_size) if crop_size > 0 => {
-                            self.find_best_match_interval(matches, crop_size)
-                        }
-                        _ => matches,
-                    };
-
                    // If the text has to be cropped,
                    // crop around the best interval.
                    let (byte_start, byte_end) = match format_options.crop {
                        Some(crop_size) if crop_size > 0 => {
+                            let matches = self.find_best_match_interval(matches, crop_size);
                            self.crop_bounds(tokens, matches, crop_size)
                        }
                        _ => (0, self.text.len()),
@@ -450,6 +442,11 @@ impl<'t> Matcher<'t, '_> {
                        for m in matches {
                            let token = &tokens[m.token_position];

+                            // skip matches out of the crop window.
+                            if token.byte_start < byte_start || token.byte_end > byte_end {
+                                continue;
+                            }
+
                            if byte_index < token.byte_start {
                                formatted.push(&self.text[byte_index..token.byte_start]);
                            }
@@ -800,6 +797,37 @@ mod tests {
        );
    }

+    #[test]
+    fn format_highlight_crop_phrase_query() {
+        //! testing: https://github.com/meilisearch/meilisearch/issues/3975
+        let temp_index = TempIndex::new();
+        temp_index
+            .add_documents(documents!([
+                { "id": 1, "text": "The groundbreaking invention had the power to split the world between those who embraced progress and those who resisted change!" }
+            ]))
+            .unwrap();
+        let rtxn = temp_index.read_txn().unwrap();
+
+        let format_options = FormatOptions { highlight: true, crop: Some(10) };
+        let text = "The groundbreaking invention had the power to split the world between those who embraced progress and those who resisted change!";
+
+        let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "\"the world\"");
+        let mut matcher = builder.build(text);
+        // should return 10 words with a marker at the start as well the end, and the highlighted matches.
+        insta::assert_snapshot!(
+            matcher.format(format_options),
+            @"…had the power to split <em>the</em> <em>world</em> between those who…"
+        );
+
+        let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "those \"and those\"");
+        let mut matcher = builder.build(text);
+        // should highlight "those" and the phrase "and those".
+        insta::assert_snapshot!(
+            matcher.format(format_options),
+            @"…world between <em>those</em> who embraced progress <em>and</em> <em>those</em> who resisted…"
+        );
+    }
+
    #[test]
    fn smaller_crop_size() {
        //! testing: https://github.com/meilisearch/specifications/pull/120#discussion_r836536295
--- a/milli/src/update/delete_documents.rs
+++ b/milli/src/update/delete_documents.rs
@@ -108,15 +108,17 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
        self.delete_document(docid);
        Some(docid)
    }
-    pub fn execute(self) -> Result<DocumentDeletionResult> {
-        puffin::profile_function!();

+    pub fn execute(self) -> Result<DocumentDeletionResult> {
        let DetailedDocumentDeletionResult { deleted_documents, remaining_documents } =
            self.execute_inner()?;

        Ok(DocumentDeletionResult { deleted_documents, remaining_documents })
    }
+
    pub(crate) fn execute_inner(mut self) -> Result<DetailedDocumentDeletionResult> {
+        puffin::profile_function!();
+
        self.index.set_updated_at(self.wtxn, &OffsetDateTime::now_utc())?;

        // We retrieve the current documents ids that are in the database.
@@ -476,6 +478,8 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
        C: for<'a> BytesDecode<'a, DItem = RoaringBitmap>
            + for<'a> BytesEncode<'a, EItem = RoaringBitmap>,
    {
+        puffin::profile_function!();
+
        while let Some(result) = iter.next() {
            let (bytes, mut docids) = result?;
            let previous_len = docids.len();
@@ -498,6 +502,8 @@ fn remove_from_word_prefix_docids(
    db: &Database<Str, RoaringBitmapCodec>,
    to_remove: &RoaringBitmap,
 ) -> Result<fst::Set<Vec<u8>>> {
+    puffin::profile_function!();
+
    let mut prefixes_to_delete = fst::SetBuilder::memory();

    // We iterate over the word prefix docids database and remove the deleted documents ids
@@ -528,6 +534,8 @@ fn remove_from_word_docids(
    words_to_keep: &mut BTreeSet<String>,
    words_to_remove: &mut BTreeSet<String>,
 ) -> Result<()> {
+    puffin::profile_function!();
+
    // We create an iterator to be able to get the content and delete the word docids.
    // It's faster to acquire a cursor to get and delete or put, as we avoid traversing
    // the LMDB B-Tree two times but only once.
@@ -559,6 +567,8 @@ fn remove_docids_from_field_id_docid_facet_value(
    field_id: FieldId,
    to_remove: &RoaringBitmap,
 ) -> heed::Result<HashSet<Vec<u8>>> {
+    puffin::profile_function!();
+
    let db = match facet_type {
        FacetType::String => {
            index.field_id_docid_facet_strings.remap_types::<ByteSlice, DecodeIgnore>()
@@ -594,6 +604,8 @@ fn remove_docids_from_facet_id_docids<'a, C>(
 where
    C: heed::BytesDecode<'a> + heed::BytesEncode<'a>,
 {
+    puffin::profile_function!();
+
    let mut iter = db.remap_key_type::<ByteSlice>().iter_mut(wtxn)?;
    while let Some(result) = iter.next() {
        let (bytes, mut docids) = result?;
--- a/milli/src/update/index_documents/extract/extract_docid_word_positions.rs
+++ b/milli/src/update/index_documents/extract/extract_docid_word_positions.rs
@@ -226,9 +226,9 @@ fn process_tokens<'a>(
 ) -> impl Iterator<Item = (usize, Token<'a>)> {
    tokens
        .skip_while(|token| token.is_separator())
-        .scan((0, None), |(offset, prev_kind), token| {
+        .scan((0, None), |(offset, prev_kind), mut token| {
            match token.kind {
-                TokenKind::Word | TokenKind::StopWord | TokenKind::Unknown => {
+                TokenKind::Word | TokenKind::StopWord if !token.lemma().is_empty() => {
                    *offset += match *prev_kind {
                        Some(TokenKind::Separator(SeparatorKind::Hard)) => 8,
                        Some(_) => 1,
@@ -244,7 +244,7 @@ fn process_tokens<'a>(
                {
                    *prev_kind = Some(token.kind);
                }
-                _ => (),
+                _ => token.kind = TokenKind::Unknown,
            }
            Some((*offset, token))
        })
--- a/milli/src/update/index_documents/extract/mod.rs
+++ b/milli/src/update/index_documents/extract/mod.rs
@@ -59,7 +59,13 @@ pub(crate) fn data_from_obkv_documents(
    original_obkv_chunks
        .par_bridge()
        .map(|original_documents_chunk| {
-            send_original_documents_data(original_documents_chunk, lmdb_writer_sx.clone())
+            send_original_documents_data(
+                original_documents_chunk,
+                indexer,
+                lmdb_writer_sx.clone(),
+                vectors_field_id,
+                primary_key_id,
+            )
        })
        .collect::<Result<()>>()?;

@@ -76,7 +82,6 @@ pub(crate) fn data_from_obkv_documents(
                    &faceted_fields,
                    primary_key_id,
                    geo_fields_ids,
-                    vectors_field_id,
                    &stop_words,
                    &allowed_separators,
                    &dictionary,
@@ -265,11 +270,33 @@ fn spawn_extraction_task<FE, FS, M>(
 /// - documents
 fn send_original_documents_data(
    original_documents_chunk: Result<grenad::Reader<File>>,
+    indexer: GrenadParameters,
    lmdb_writer_sx: Sender<Result<TypedChunk>>,
+    vectors_field_id: Option<FieldId>,
+    primary_key_id: FieldId,
 ) -> Result<()> {
    let original_documents_chunk =
        original_documents_chunk.and_then(|c| unsafe { as_cloneable_grenad(&c) })?;

+    if let Some(vectors_field_id) = vectors_field_id {
+        let documents_chunk_cloned = original_documents_chunk.clone();
+        let lmdb_writer_sx_cloned = lmdb_writer_sx.clone();
+        rayon::spawn(move || {
+            let result = extract_vector_points(
+                documents_chunk_cloned,
+                indexer,
+                primary_key_id,
+                vectors_field_id,
+            );
+            let _ = match result {
+                Ok(vector_points) => {
+                    lmdb_writer_sx_cloned.send(Ok(TypedChunk::VectorPoints(vector_points)))
+                }
+                Err(error) => lmdb_writer_sx_cloned.send(Err(error)),
+            };
+        });
+    }
+
    // TODO: create a custom internal error
    lmdb_writer_sx.send(Ok(TypedChunk::Documents(original_documents_chunk))).unwrap();
    Ok(())
@@ -291,7 +318,6 @@ fn send_and_extract_flattened_documents_data(
    faceted_fields: &HashSet<FieldId>,
    primary_key_id: FieldId,
    geo_fields_ids: Option<(FieldId, FieldId)>,
-    vectors_field_id: Option<FieldId>,
    stop_words: &Option<fst::Set<&[u8]>>,
    allowed_separators: &Option<&[&str]>,
    dictionary: &Option<&[&str]>,
@@ -322,25 +348,6 @@ fn send_and_extract_flattened_documents_data(
        });
    }

-    if let Some(vectors_field_id) = vectors_field_id {
-        let documents_chunk_cloned = flattened_documents_chunk.clone();
-        let lmdb_writer_sx_cloned = lmdb_writer_sx.clone();
-        rayon::spawn(move || {
-            let result = extract_vector_points(
-                documents_chunk_cloned,
-                indexer,
-                primary_key_id,
-                vectors_field_id,
-            );
-            let _ = match result {
-                Ok(vector_points) => {
-                    lmdb_writer_sx_cloned.send(Ok(TypedChunk::VectorPoints(vector_points)))
-                }
-                Err(error) => lmdb_writer_sx_cloned.send(Err(error)),
-            };
-        });
-    }
-
    let (docid_word_positions_chunk, docid_fid_facet_values_chunks): (Result<_>, Result<_>) =
        rayon::join(
            || {
--- a/milli/src/update/index_documents/helpers/grenad_helpers.rs
+++ b/milli/src/update/index_documents/helpers/grenad_helpers.rs
@@ -54,6 +54,8 @@ pub fn sorter_into_reader(
    sorter: grenad::Sorter<MergeFn>,
    indexer: GrenadParameters,
 ) -> Result<grenad::Reader<File>> {
+    puffin::profile_function!();
+
    let mut writer = create_writer(
        indexer.chunk_compression_type,
        indexer.chunk_compression_level,
--- a/milli/src/update/index_documents/mod.rs
+++ b/milli/src/update/index_documents/mod.rs
@@ -2550,6 +2550,25 @@ mod tests {
        db_snap!(index, word_position_docids, 3, @"74f556b91d161d997a89468b4da1cb8f");
    }

+    /// Index multiple different number of vectors in documents.
+    /// Vectors must be of the same length.
+    #[test]
+    fn test_multiple_vectors() {
+        let index = TempIndex::new();
+
+        index.add_documents(documents!([{"id": 0, "_vectors": [[0, 1, 2], [3, 4, 5]] }])).unwrap();
+        index.add_documents(documents!([{"id": 1, "_vectors": [6, 7, 8] }])).unwrap();
+        index
+            .add_documents(
+                documents!([{"id": 2, "_vectors": [[9, 10, 11], [12, 13, 14], [15, 16, 17]] }]),
+            )
+            .unwrap();
+
+        let rtxn = index.read_txn().unwrap();
+        let res = index.search(&rtxn).vector([0.0, 1.0, 2.0]).execute().unwrap();
+        assert_eq!(res.documents_ids.len(), 3);
+    }
+
    #[test]
    fn reproduce_the_bug() {
        /*
--- a/milli/src/update/settings.rs
+++ b/milli/src/update/settings.rs
@@ -573,7 +573,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
                    tokenizer
                        .tokenize(text)
                        .filter_map(|token| {
-                            if token.is_word() {
+                            if token.is_word() && !token.lemma().is_empty() {
                                Some(token.lemma().to_string())
                            } else {
                                None
@@ -608,13 +608,18 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
                for (word, synonyms) in user_synonyms {
                    // Normalize both the word and associated synonyms.
                    let normalized_word = normalize(&tokenizer, word);
-                    let normalized_synonyms =
-                        synonyms.iter().map(|synonym| normalize(&tokenizer, synonym));
+                    let normalized_synonyms: Vec<_> = synonyms
+                        .iter()
+                        .map(|synonym| normalize(&tokenizer, synonym))
+                        .filter(|synonym| !synonym.is_empty())
+                        .collect();

                    // Store the normalized synonyms under the normalized word,
                    // merging the possible duplicate words.
-                    let entry = new_synonyms.entry(normalized_word).or_insert_with(Vec::new);
-                    entry.extend(normalized_synonyms);
+                    if !normalized_word.is_empty() && !normalized_synonyms.is_empty() {
+                        let entry = new_synonyms.entry(normalized_word).or_insert_with(Vec::new);
+                        entry.extend(normalized_synonyms.into_iter());
+                    }
                }

                // Make sure that we don't have duplicate synonyms.
@@ -1422,6 +1427,43 @@ mod tests {
        assert!(result.documents_ids.is_empty());
    }

+    #[test]
+    fn thai_synonyms() {
+        let mut index = TempIndex::new();
+        index.index_documents_config.autogenerate_docids = true;
+
+        let mut wtxn = index.write_txn().unwrap();
+        // Send 3 documents with ids from 1 to 3.
+        index
+            .add_documents_using_wtxn(
+                &mut wtxn,
+                documents!([
+                    { "name": "ยี่ปุ่น" },
+                    { "name": "ญี่ปุ่น" },
+                ]),
+            )
+            .unwrap();
+
+        // In the same transaction provide some synonyms
+        index
+            .update_settings_using_wtxn(&mut wtxn, |settings| {
+                settings.set_synonyms(btreemap! {
+                    "japanese".to_string() => vec![S("ญี่ปุ่น"), S("ยี่ปุ่น")],
+                });
+            })
+            .unwrap();
+        wtxn.commit().unwrap();
+
+        // Ensure synonyms are effectively stored
+        let rtxn = index.read_txn().unwrap();
+        let synonyms = index.synonyms(&rtxn).unwrap();
+        assert!(!synonyms.is_empty()); // at this point the index should return something
+
+        // Check that we can use synonyms
+        let result = index.search(&rtxn).query("japanese").execute().unwrap();
+        assert_eq!(result.documents_ids.len(), 2);
+    }
+
    #[test]
    fn setting_searchable_recomputes_other_settings() {
        let index = TempIndex::new();
--- a/permissive-json-pointer/src/lib.rs
+++ b/permissive-json-pointer/src/lib.rs
@@ -186,12 +186,16 @@ fn create_value(value: &Document, mut selectors: HashSet<&str>) -> Document {
                    let array = create_array(array, &sub_selectors);
                    if !array.is_empty() {
                        new_value.insert(key.to_string(), array.into());
+                    } else {
+                        new_value.insert(key.to_string(), Value::Array(vec![]));
                    }
                }
                Value::Object(object) => {
                    let object = create_value(object, sub_selectors);
                    if !object.is_empty() {
                        new_value.insert(key.to_string(), object.into());
+                    } else {
+                        new_value.insert(key.to_string(), Value::Object(Map::new()));
                    }
                }
                _ => (),
@@ -211,6 +215,8 @@ fn create_array(array: &[Value], selectors: &HashSet<&str>) -> Vec<Value> {
                let array = create_array(array, selectors);
                if !array.is_empty() {
                    res.push(array.into());
+                } else {
+                    res.push(Value::Array(vec![]));
                }
            }
            Value::Object(object) => {
@@ -637,6 +643,24 @@ mod tests {
        );
    }

+    #[test]
+    fn empty_array_object_return_empty() {
+        let value: Value = json!({
+            "array": [],
+            "object": {},
+        });
+        let value: &Document = value.as_object().unwrap();
+
+        let res: Value = select_values(value, vec!["array.name", "object.name"]).into();
+        assert_eq!(
+            res,
+            json!({
+                "array": [],
+                "object": {},
+            })
+        );
+    }
+
    #[test]
    fn all_conflict_variation() {
        let value: Value = json!({
Author	SHA1	Message	Date
Clément Renault	7f0fc8008c	Add a puffin profiling log in the sorter_into_reader function	2023-09-20 16:31:14 +02:00
Clément Renault	26efcc990b	Add more puffin logs to the deletion functions	2023-09-20 11:13:29 +02:00
Kerollmops	b126bf3aec	Temporary use the charabia git repository to get the latest fixes	2023-09-19 10:15:17 +02:00
Kerollmops	e82ff56416	Enable by default the puffin server	2023-09-18 18:12:16 +02:00
Clément Renault	1b26dde438	Expose a new flag to limit the number of batched tasks	2023-09-18 18:11:12 +02:00
Tamo	3bb644b54d	update the description of the cli argument	2023-09-18 18:10:46 +02:00
Clément Renault	34b9145db2	Fix the tests	2023-09-18 18:10:29 +02:00
meili-bors[bot]	76c05d1b20	Merge #4053 4053: Fix the stats of the documents deletion by filter r=Kerollmops a=irevoire # Pull Request The issue was that the operation « DocumentDeletionByFilter » was not declared as an index operation. That means the index stats were not reprocessed after the application of the operation. ## Related issue Fixes #4018 ## What does this PR do? - Move the `DocumentDeletionByFilter` internal operation into the category of the `IndexOperation`. This means that the stats will automatically be re-processed after a batch is processed. - Update a test to ensure that the stats are valid after each operation ## PR checklist Please check if your PR fulfills the following requirements: - [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)? - [x] Have you read the contributing guidelines? - [x] Have you made sure that the title is accurate and descriptive of the changes? Thank you so much for contributing to Meilisearch! Co-authored-by: Tamo <tamo@meilisearch.com>	2023-09-11 15:53:26 +00:00
Tamo	34fac115d5	fix clippy	2023-09-11 17:15:57 +02:00
meili-bors[bot]	a09686fcbd	Merge #3997 3997: Refactor empty arrays/objects should return empty instead of null r=Kerollmops a=dogukanakkaya # Pull Request ## What does this PR do? At the moment if we select empty objects and array of object properties with dot notations like: ```json { "array": [], "object": {} } ``` ```rs GetDocumentOptions { fields: Some(vec!["array.name", "object.name"]) } ``` returns null if the array/object has no property yet. I am not sure if this is expected or it's the correct behaviour but I add my document with a property that is assigned to an empty array/object, later on when I select it, returns null which is kinda weird and unexpected in my opinion. This PR fixes that issue by returning an empty vector if the array is empty or an empty map if object is empty. This is not added for `permissive-json-pointer/src/lib.rs:224` because `create_array` loops over each item. Selecting a single property that is an object, in an array of objects would result other objects to be empty maps instead of none. ```json "doggos": [ { "jean": { "race": { "name": "bernese mountain", } } }, { "marc": { "age": 4, "race": { "name": "golden retriever", } } } ] ``` ```rs GetDocumentOptions { fields: Some(vec!["doggos.jean"]) } ``` Would result in `jean` object and an extra empty object for `marc`. ## PR checklist Please check if your PR fulfills the following requirements: - [ ] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)? - [x] Have you read the contributing guidelines? - [x] Have you made sure that the title is accurate and descriptive of the changes? Thank you so much for contributing to Meilisearch! Co-authored-by: dogukanakkaya <doguakkaya27@hotmail.com>	2023-09-11 13:46:02 +00:00
dogukanakkaya	393be40179	Refactor empty arrays/objects should return empty instead of null	2023-09-11 15:56:15 +03:00
meili-bors[bot]	487d493f49	Merge #4043 4043: Bring back hotfixes from v1.3.3 into v1.4.0 r=Kerollmops a=curquiza Co-authored-by: curquiza <curquiza@users.noreply.github.com> Co-authored-by: meili-bors[bot] <89034592+meili-bors[bot]@users.noreply.github.com> Co-authored-by: Kerollmops <clement@meilisearch.com> Co-authored-by: curquiza <clementine@meilisearch.com>	2023-09-11 12:27:34 +00:00
Tamo	9258e5b5bf	Fix the stats of the documents deletion by filter The issue was that the operation « DocumentDeletionByFilter » was not declared as an index operation. That means the indexes stats were not reprocessed after the application of the operation.	2023-09-11 14:04:10 +02:00
meili-bors[bot]	462b4654c4	Merge #4028 4028: Fix highlighting bug when searching for a phrase with cropping r=ManyTheFish a=vivek-26 # Pull Request ## Related issue Fixes #3975 ## What does this PR do? This PR - - Fixes the bug where searching only for a phrase (containing multiple words) along with cropping, highlighted only the first word of the phrase. - Adds unit test case for the above mentioned scenario. ## PR checklist Please check if your PR fulfills the following requirements: - [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)? - [x] Have you read the contributing guidelines? - [x] Have you made sure that the title is accurate and descriptive of the changes? Thank you so much for contributing to Meilisearch! Co-authored-by: Vivek Kumar <vivek.26@outlook.com>	2023-09-11 07:58:41 +00:00
Vivek Kumar	abfa7ded25	use a new temp index in the test	2023-09-08 12:32:47 +05:30
Vivek Kumar	f2837aaec2	add another test case	2023-09-08 11:39:54 +05:30
Vivek Kumar	11df155598	fix highlighting bug when searching for a phrase with cropping	2023-09-08 11:39:52 +05:30
curquiza	651657c03e	Fix git conflicts	2023-09-07 16:48:13 +02:00
meili-bors[bot]	b9ad59c969	Merge #4041 4041: Register the swap indexe task in a spawn blocking to be sure to never… r=ManyTheFish a=irevoire # Pull Request ## Related issue Fixes https://github.com/meilisearch/meilisearch/issues/4040 ## What does this PR do? - Register the swap indexes task in a spawn blocking task Co-authored-by: Tamo <tamo@meilisearch.com>	2023-09-07 10:22:01 +00:00
Tamo	66aa682e23	Register the swap indexe task in a spawn blocking to be sure to never block the main thread	2023-09-07 11:37:02 +02:00
meili-bors[bot]	256cf33bca	Merge #4039 4039: Fix multiple vectors dimensions r=ManyTheFish a=Kerollmops This PR fixes #4035, making providing multiple vectors in documents possible. This is fixed by extracting the vectors from the non-flattened version of the documents. Co-authored-by: Kerollmops <clement@meilisearch.com>	2023-09-07 09:25:58 +00:00
meili-bors[bot]	9945cbf9db	Merge #4038 4038: Fix filter escaping issues r=ManyTheFish a=Kerollmops This PR fixes #4034 by always escaping the sequences. Users must always put quotes (simple or double) to escape the filter values. Co-authored-by: Kerollmops <clement@meilisearch.com>	2023-09-06 12:29:29 +00:00
Kerollmops	03d0f628bd	Use the unescaper crate to unescape any char sequence	2023-09-06 13:59:45 +02:00
Kerollmops	ea78060916	Fix tests that were supposed to escape characters	2023-09-06 13:59:45 +02:00
Kerollmops	b42d48187a	Add a test case scenario	2023-09-06 13:59:44 +02:00
Kerollmops	679c0b0f97	Extract the vectors from the non-flattened version of the documents	2023-09-06 12:26:00 +02:00
Kerollmops	e02d0064bd	Add a test case scenario	2023-09-06 12:26:00 +02:00
meili-bors[bot]	7ef3572f11	Merge #4037 4037: Update version for the next release (v1.3.3) in Cargo.toml r=curquiza a=meili-bot ⚠️ This PR is automatically generated. Check the new version is the expected one and Cargo.lock has been updated before merging. Co-authored-by: curquiza <curquiza@users.noreply.github.com>	2023-09-06 09:50:58 +00:00
curquiza	93285041a9	Update version for the next release (v1.3.3) in Cargo.toml	2023-09-06 09:23:20 +00:00
meili-bors[bot]	dc3d9c90d9	Merge #3994 3994: Fix synonyms with separators r=Kerollmops a=ManyTheFish # Pull Request ## Related issue Fixes #3977 ## Available prototype ``` $ docker pull getmeili/meilisearch:prototype-fix-synonyms-with-separators-0 ``` ## What does this PR do? - add a new test - filter the empty synonyms after normalization Co-authored-by: ManyTheFish <many@meilisearch.com>	2023-09-05 14:42:46 +00:00
meili-bors[bot]	287cf25d39	Merge #4033 4033: Fix thai synonyms r=Kerollmops a=Kerollmops Fixes #4031 Co-authored-by: Kerollmops <clement@meilisearch.com> Co-authored-by: ManyTheFish <many@meilisearch.com>	2023-09-05 13:54:33 +00:00
ManyTheFish	66aa6d5871	Ignore tokens with empty normalized value during indexing process	2023-09-05 15:44:14 +02:00
Kerollmops	8ac5b765bc	Fix synonyms normalization	2023-09-04 16:12:48 +02:00
meili-bors[bot]	cea93e9a37	Merge #4016 4016: Define the full Homebrew formula path r=curquiza a=Kerollmops This PR fixes #4015 by defining the full Homebrew formula path. Co-authored-by: Clément Renault <clement@meilisearch.com>	2023-09-04 13:10:28 +00:00
Kerollmops	085aad0a94	Add a test	2023-09-04 14:39:33 +02:00
Clément Renault	6db80b0836	Define the full Homebrew formula path	2023-08-24 11:24:47 +02:00
ManyTheFish	8dc5acf998	Try fix	2023-08-08 16:52:36 +02:00
ManyTheFish	fc2590fc9d	Add a test	2023-08-08 16:43:08 +02:00