removed unnecessary borrow call

2025-12-02 10:45:36 +00:00 · 2023-08-10 12:02:07 +02:00
70 changed files with 781 additions and 3058 deletions
--- a/.github/workflows/publish-apt-brew-pkg.yml
+++ b/.github/workflows/publish-apt-brew-pkg.yml
@@ -53,6 +53,5 @@ jobs:
        uses: mislav/bump-homebrew-formula-action@v2
        with:
          formula-name: meilisearch
-          formula-path: Formula/m/meilisearch.rb
        env:
          COMMITTER_TOKEN: ${{ secrets.HOMEBREW_COMMITTER_TOKEN }}
--- a/.github/workflows/test-suite.yml
+++ b/.github/workflows/test-suite.yml
@@ -161,7 +161,7 @@ jobs:
      - uses: actions-rs/toolchain@v1
        with:
          profile: minimal
-          toolchain: 1.71.1
+          toolchain: 1.69.0
          override: true
          components: clippy
      - name: Cache dependencies
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -18,7 +18,7 @@ members = [
 ]

 [workspace.package]
-version = "1.4.1"
+version = "1.3.0"
 authors = ["Quentin de Quelen <quentin@dequelen.me>", "Clément Renault <clement@meilisearch.com>"]
 description = "Meilisearch HTTP server"
 homepage = "https://meilisearch.com"
--- a/dump/src/lib.rs
+++ b/dump/src/lib.rs
@@ -262,9 +262,6 @@ pub(crate) mod test {
            sortable_attributes: Setting::Set(btreeset! { S("age") }),
            ranking_rules: Setting::NotSet,
            stop_words: Setting::NotSet,
-            non_separator_tokens: Setting::NotSet,
-            separator_tokens: Setting::NotSet,
-            dictionary: Setting::NotSet,
            synonyms: Setting::NotSet,
            distinct_attribute: Setting::NotSet,
            typo_tolerance: Setting::NotSet,
--- a/dump/src/reader/compat/v5_to_v6.rs
+++ b/dump/src/reader/compat/v5_to_v6.rs
@@ -340,9 +340,6 @@ impl<T> From<v5::Settings<T>> for v6::Settings<v6::Unchecked> {
                }
            },
            stop_words: settings.stop_words.into(),
-            non_separator_tokens: v6::Setting::NotSet,
-            separator_tokens: v6::Setting::NotSet,
-            dictionary: v6::Setting::NotSet,
            synonyms: settings.synonyms.into(),
            distinct_attribute: settings.distinct_attribute.into(),
            typo_tolerance: match settings.typo_tolerance {
--- a/dump/src/reader/snapshots/dumpreadertest__import_dump_v1-10.snap
+++ b/dump/src/reader/snapshots/dumpreadertest__import_dump_v1-10.snap
@@ -1,24 +0,0 @@
---
-source: dump/src/reader/mod.rs
-expression: spells.settings().unwrap()
---
-{
-  "displayedAttributes": [
-    "*"
-  ],
-  "searchableAttributes": [
-    "*"
-  ],
-  "filterableAttributes": [],
-  "sortableAttributes": [],
-  "rankingRules": [
-    "typo",
-    "words",
-    "proximity",
-    "attribute",
-    "exactness"
-  ],
-  "stopWords": [],
-  "synonyms": {},
-  "distinctAttribute": null
-}
--- a/dump/src/reader/snapshots/dumpreadertest__import_dump_v1-4.snap
+++ b/dump/src/reader/snapshots/dumpreadertest__import_dump_v1-4.snap
@@ -1,38 +0,0 @@
---
-source: dump/src/reader/mod.rs
-expression: products.settings().unwrap()
---
-{
-  "displayedAttributes": [
-    "*"
-  ],
-  "searchableAttributes": [
-    "*"
-  ],
-  "filterableAttributes": [],
-  "sortableAttributes": [],
-  "rankingRules": [
-    "typo",
-    "words",
-    "proximity",
-    "attribute",
-    "exactness"
-  ],
-  "stopWords": [],
-  "synonyms": {
-    "android": [
-      "phone",
-      "smartphone"
-    ],
-    "iphone": [
-      "phone",
-      "smartphone"
-    ],
-    "phone": [
-      "android",
-      "iphone",
-      "smartphone"
-    ]
-  },
-  "distinctAttribute": null
-}
--- a/dump/src/reader/snapshots/dumpreadertest__import_dump_v1-7.snap
+++ b/dump/src/reader/snapshots/dumpreadertest__import_dump_v1-7.snap
@@ -1,31 +0,0 @@
---
-source: dump/src/reader/mod.rs
-expression: movies.settings().unwrap()
---
-{
-  "displayedAttributes": [
-    "*"
-  ],
-  "searchableAttributes": [
-    "*"
-  ],
-  "filterableAttributes": [
-    "genres",
-    "id"
-  ],
-  "sortableAttributes": [
-    "genres",
-    "id"
-  ],
-  "rankingRules": [
-    "typo",
-    "words",
-    "proximity",
-    "attribute",
-    "exactness",
-    "release_date:asc"
-  ],
-  "stopWords": [],
-  "synonyms": {},
-  "distinctAttribute": null
-}
--- a/filter-parser/Cargo.toml
+++ b/filter-parser/Cargo.toml
@@ -14,7 +14,6 @@ license.workspace = true
 [dependencies]
 nom = "7.1.3"
 nom_locate = "4.1.0"
-unescaper = "0.1.2"

 [dev-dependencies]
 insta = "1.29.0"
--- a/filter-parser/src/error.rs
+++ b/filter-parser/src/error.rs
@@ -62,7 +62,6 @@ pub enum ErrorKind<'a> {
    MisusedGeoRadius,
    MisusedGeoBoundingBox,
    InvalidPrimary,
-    InvalidEscapedNumber,
    ExpectedEof,
    ExpectedValue(ExpectedValueKind),
    MalformedValue,
@@ -148,9 +147,6 @@ impl<'a> Display for Error<'a> {
                let text = if input.trim().is_empty() { "but instead got nothing.".to_string() } else { format!("at `{}`.", escaped_input) };
                writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` {}", text)?
            }
-            ErrorKind::InvalidEscapedNumber => {
-                writeln!(f, "Found an invalid escaped sequence number: `{}`.", escaped_input)?
-            }
            ErrorKind::ExpectedEof => {
                writeln!(f, "Found unexpected characters at the end of the filter: `{}`. You probably forgot an `OR` or an `AND` rule.", escaped_input)?
            }
--- a/filter-parser/src/lib.rs
+++ b/filter-parser/src/lib.rs
@@ -545,8 +545,6 @@ impl<'a> std::fmt::Display for Token<'a> {

 #[cfg(test)]
 pub mod tests {
-    use FilterCondition as Fc;
-
    use super::*;

    /// Create a raw [Token]. You must specify the string that appear BEFORE your element followed by your element
@@ -558,22 +556,14 @@ pub mod tests {
        unsafe { Span::new_from_raw_offset(offset, lines as u32, value, "") }.into()
    }

-    fn p(s: &str) -> impl std::fmt::Display + '_ {
-        Fc::parse(s).unwrap().unwrap()
-    }
-
-    #[test]
-    fn parse_escaped() {
-        insta::assert_display_snapshot!(p(r#"title = 'foo\\'"#), @r#"{title} = {foo\}"#);
-        insta::assert_display_snapshot!(p(r#"title = 'foo\\\\'"#), @r#"{title} = {foo\\}"#);
-        insta::assert_display_snapshot!(p(r#"title = 'foo\\\\\\'"#), @r#"{title} = {foo\\\}"#);
-        insta::assert_display_snapshot!(p(r#"title = 'foo\\\\\\\\'"#), @r#"{title} = {foo\\\\}"#);
-        // but it also works with other sequencies
-        insta::assert_display_snapshot!(p(r#"title = 'foo\x20\n\t\"\'"'"#), @"{title} = {foo \n\t\"\'\"}");
-    }
-
    #[test]
    fn parse() {
+        use FilterCondition as Fc;
+
+        fn p(s: &str) -> impl std::fmt::Display + '_ {
+            Fc::parse(s).unwrap().unwrap()
+        }
+
        // Test equal
        insta::assert_display_snapshot!(p("channel = Ponce"), @"{channel} = {Ponce}");
        insta::assert_display_snapshot!(p("subscribers = 12"), @"{subscribers} = {12}");
--- a/filter-parser/src/value.rs
+++ b/filter-parser/src/value.rs
@@ -171,24 +171,7 @@ pub fn parse_value(input: Span) -> IResult<Token> {
        })
    })?;

-    match unescaper::unescape(value.value()) {
-        Ok(content) => {
-            if content.len() != value.value().len() {
-                Ok((input, Token::new(value.original_span(), Some(content))))
-            } else {
-                Ok((input, value))
-            }
-        }
-        Err(unescaper::Error::IncompleteStr(_)) => Err(nom::Err::Incomplete(nom::Needed::Unknown)),
-        Err(unescaper::Error::ParseIntError { .. }) => Err(nom::Err::Error(Error::new_from_kind(
-            value.original_span(),
-            ErrorKind::InvalidEscapedNumber,
-        ))),
-        Err(unescaper::Error::InvalidChar { .. }) => Err(nom::Err::Error(Error::new_from_kind(
-            value.original_span(),
-            ErrorKind::MalformedValue,
-        ))),
-    }
+    Ok((input, value))
 }

 fn is_value_component(c: char) -> bool {
@@ -335,17 +318,17 @@ pub mod test {
            ("\"cha'nnel\"", "cha'nnel", false),
            ("I'm tamo", "I", false),
            // escaped thing but not quote
-            (r#""\\""#, r#"\"#, true),
-            (r#""\\\\\\""#, r#"\\\"#, true),
-            (r#""aa\\aa""#, r#"aa\aa"#, true),
+            (r#""\\""#, r#"\\"#, false),
+            (r#""\\\\\\""#, r#"\\\\\\"#, false),
+            (r#""aa\\aa""#, r#"aa\\aa"#, false),
            // with double quote
            (r#""Hello \"world\"""#, r#"Hello "world""#, true),
-            (r#""Hello \\\"world\\\"""#, r#"Hello \"world\""#, true),
+            (r#""Hello \\\"world\\\"""#, r#"Hello \\"world\\""#, true),
            (r#""I'm \"super\" tamo""#, r#"I'm "super" tamo"#, true),
            (r#""\"\"""#, r#""""#, true),
            // with simple quote
            (r#"'Hello \'world\''"#, r#"Hello 'world'"#, true),
-            (r#"'Hello \\\'world\\\''"#, r#"Hello \'world\'"#, true),
+            (r#"'Hello \\\'world\\\''"#, r#"Hello \\'world\\'"#, true),
            (r#"'I\'m "super" tamo'"#, r#"I'm "super" tamo"#, true),
            (r#"'\'\''"#, r#"''"#, true),
        ];
@@ -367,14 +350,7 @@ pub mod test {
                "Filter `{}` was not supposed to be escaped",
                input
            );
-            assert_eq!(
-                token.value(),
-                expected,
-                "Filter `{}` failed by giving `{}` instead of `{}`.",
-                input,
-                token.value(),
-                expected
-            );
+            assert_eq!(token.value(), expected, "Filter `{}` failed.", input);
        }
    }

--- a/fuzzers/Cargo.toml
+++ b/fuzzers/Cargo.toml
@@ -13,7 +13,7 @@ license.workspace = true
 [dependencies]
 arbitrary = { version = "1.3.0", features = ["derive"] }
 clap = { version = "4.3.0", features = ["derive"] }
-fastrand = "2.0.0"
+fastrand = "1.9.0"
 milli = { path = "../milli" }
 serde = { version = "1.0.160", features = ["derive"] }
 serde_json = { version = "1.0.95", features = ["preserve_order"] }
--- a/index-scheduler/src/batch.rs
+++ b/index-scheduler/src/batch.rs
@@ -67,6 +67,10 @@ pub(crate) enum Batch {
        op: IndexOperation,
        must_create_index: bool,
    },
+    IndexDocumentDeletionByFilter {
+        index_uid: String,
+        task: Task,
+    },
    IndexCreation {
        index_uid: String,
        primary_key: Option<String>,
@@ -110,10 +114,6 @@ pub(crate) enum IndexOperation {
        documents: Vec<Vec<String>>,
        tasks: Vec<Task>,
    },
-    IndexDocumentDeletionByFilter {
-        index_uid: String,
-        task: Task,
-    },
    DocumentClear {
        index_uid: String,
        tasks: Vec<Task>,
@@ -155,6 +155,7 @@ impl Batch {
            | Batch::TaskDeletion(task)
            | Batch::Dump(task)
            | Batch::IndexCreation { task, .. }
+            | Batch::IndexDocumentDeletionByFilter { task, .. }
            | Batch::IndexUpdate { task, .. } => vec![task.uid],
            Batch::SnapshotCreation(tasks) | Batch::IndexDeletion { tasks, .. } => {
                tasks.iter().map(|task| task.uid).collect()
@@ -166,7 +167,6 @@ impl Batch {
                | IndexOperation::DocumentClear { tasks, .. } => {
                    tasks.iter().map(|task| task.uid).collect()
                }
-                IndexOperation::IndexDocumentDeletionByFilter { task, .. } => vec![task.uid],
                IndexOperation::SettingsAndDocumentOperation {
                    document_import_tasks: tasks,
                    settings_tasks: other,
@@ -194,7 +194,8 @@ impl Batch {
            IndexOperation { op, .. } => Some(op.index_uid()),
            IndexCreation { index_uid, .. }
            | IndexUpdate { index_uid, .. }
-            | IndexDeletion { index_uid, .. } => Some(index_uid),
+            | IndexDeletion { index_uid, .. }
+            | IndexDocumentDeletionByFilter { index_uid, .. } => Some(index_uid),
        }
    }
 }
@@ -204,7 +205,6 @@ impl IndexOperation {
        match self {
            IndexOperation::DocumentOperation { index_uid, .. }
            | IndexOperation::DocumentDeletion { index_uid, .. }
-            | IndexOperation::IndexDocumentDeletionByFilter { index_uid, .. }
            | IndexOperation::DocumentClear { index_uid, .. }
            | IndexOperation::Settings { index_uid, .. }
            | IndexOperation::DocumentClearAndSetting { index_uid, .. }
@@ -239,12 +239,9 @@ impl IndexScheduler {
                let task = self.get_task(rtxn, id)?.ok_or(Error::CorruptedTaskQueue)?;
                match &task.kind {
                    KindWithContent::DocumentDeletionByFilter { index_uid, .. } => {
-                        Ok(Some(Batch::IndexOperation {
-                            op: IndexOperation::IndexDocumentDeletionByFilter {
-                                index_uid: index_uid.clone(),
-                                task,
-                            },
-                            must_create_index: false,
+                        Ok(Some(Batch::IndexDocumentDeletionByFilter {
+                            index_uid: index_uid.clone(),
+                            task,
                        }))
                    }
                    _ => unreachable!(),
@@ -899,6 +896,51 @@ impl IndexScheduler {

                Ok(tasks)
            }
+            Batch::IndexDocumentDeletionByFilter { mut task, index_uid: _ } => {
+                let (index_uid, filter) =
+                    if let KindWithContent::DocumentDeletionByFilter { index_uid, filter_expr } =
+                        &task.kind
+                    {
+                        (index_uid, filter_expr)
+                    } else {
+                        unreachable!()
+                    };
+                let index = {
+                    let rtxn = self.env.read_txn()?;
+                    self.index_mapper.index(&rtxn, index_uid)?
+                };
+                let deleted_documents = delete_document_by_filter(filter, index);
+                let original_filter = if let Some(Details::DocumentDeletionByFilter {
+                    original_filter,
+                    deleted_documents: _,
+                }) = task.details
+                {
+                    original_filter
+                } else {
+                    // In the case of a `documentDeleteByFilter` the details MUST be set
+                    unreachable!();
+                };
+
+                match deleted_documents {
+                    Ok(deleted_documents) => {
+                        task.status = Status::Succeeded;
+                        task.details = Some(Details::DocumentDeletionByFilter {
+                            original_filter,
+                            deleted_documents: Some(deleted_documents),
+                        });
+                    }
+                    Err(e) => {
+                        task.status = Status::Failed;
+                        task.details = Some(Details::DocumentDeletionByFilter {
+                            original_filter,
+                            deleted_documents: Some(0),
+                        });
+                        task.error = Some(e.into());
+                    }
+                }
+
+                Ok(vec![task])
+            }
            Batch::IndexCreation { index_uid, primary_key, task } => {
                let wtxn = self.env.write_txn()?;
                if self.index_mapper.exists(&wtxn, &index_uid)? {
@@ -1257,47 +1299,6 @@ impl IndexScheduler {

                Ok(tasks)
            }
-            IndexOperation::IndexDocumentDeletionByFilter { mut task, index_uid: _ } => {
-                let filter =
-                    if let KindWithContent::DocumentDeletionByFilter { filter_expr, .. } =
-                        &task.kind
-                    {
-                        filter_expr
-                    } else {
-                        unreachable!()
-                    };
-                let deleted_documents = delete_document_by_filter(index_wtxn, filter, index);
-                let original_filter = if let Some(Details::DocumentDeletionByFilter {
-                    original_filter,
-                    deleted_documents: _,
-                }) = task.details
-                {
-                    original_filter
-                } else {
-                    // In the case of a `documentDeleteByFilter` the details MUST be set
-                    unreachable!();
-                };
-
-                match deleted_documents {
-                    Ok(deleted_documents) => {
-                        task.status = Status::Succeeded;
-                        task.details = Some(Details::DocumentDeletionByFilter {
-                            original_filter,
-                            deleted_documents: Some(deleted_documents),
-                        });
-                    }
-                    Err(e) => {
-                        task.status = Status::Failed;
-                        task.details = Some(Details::DocumentDeletionByFilter {
-                            original_filter,
-                            deleted_documents: Some(0),
-                        });
-                        task.error = Some(e.into());
-                    }
-                }
-
-                Ok(vec![task])
-            }
            IndexOperation::Settings { index_uid: _, settings, mut tasks } => {
                let indexer_config = self.index_mapper.indexer_config();
                let mut builder = milli::update::Settings::new(index_wtxn, index, indexer_config);
@@ -1497,22 +1498,23 @@ impl IndexScheduler {
    }
 }

-fn delete_document_by_filter<'a>(
-    wtxn: &mut RwTxn<'a, '_>,
-    filter: &serde_json::Value,
-    index: &'a Index,
-) -> Result<u64> {
+fn delete_document_by_filter(filter: &serde_json::Value, index: Index) -> Result<u64> {
    let filter = Filter::from_json(filter)?;
    Ok(if let Some(filter) = filter {
-        let candidates = filter.evaluate(wtxn, index).map_err(|err| match err {
+        let mut wtxn = index.write_txn()?;
+
+        let candidates = filter.evaluate(&wtxn, &index).map_err(|err| match err {
            milli::Error::UserError(milli::UserError::InvalidFilter(_)) => {
                Error::from(err).with_custom_error_code(Code::InvalidDocumentFilter)
            }
            e => e.into(),
        })?;
-        let mut delete_operation = DeleteDocuments::new(wtxn, index)?;
+        let mut delete_operation = DeleteDocuments::new(&mut wtxn, &index)?;
        delete_operation.delete_documents(&candidates);
-        delete_operation.execute().map(|result| result.deleted_documents)?
+        let deleted_documents =
+            delete_operation.execute().map(|result| result.deleted_documents)?;
+        wtxn.commit()?;
+        deleted_documents
    } else {
        0
    })
--- a/index-scheduler/src/lib.rs
+++ b/index-scheduler/src/lib.rs
@@ -790,19 +790,10 @@ impl IndexScheduler {

        let mut res = BTreeMap::new();

-        let processing_tasks = { self.processing_tasks.read().unwrap().processing.len() };
-
        res.insert(
            "statuses".to_string(),
            enum_iterator::all::<Status>()
-                .map(|s| {
-                    let tasks = self.get_status(&rtxn, s)?.len();
-                    match s {
-                        Status::Enqueued => Ok((s.to_string(), tasks - processing_tasks)),
-                        Status::Processing => Ok((s.to_string(), processing_tasks)),
-                        s => Ok((s.to_string(), tasks)),
-                    }
-                })
+                .map(|s| Ok((s.to_string(), self.get_status(&rtxn, s)?.len())))
                .collect::<Result<BTreeMap<String, u64>>>()?,
        );
        res.insert(
@@ -4140,154 +4131,4 @@ mod tests {
        snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "everything_has_been_processed");
        drop(rtxn);
    }
-
-    #[test]
-    fn basic_get_stats() {
-        let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]);
-
-        let kind = index_creation_task("catto", "mouse");
-        let _task = index_scheduler.register(kind).unwrap();
-        let kind = index_creation_task("doggo", "sheep");
-        let _task = index_scheduler.register(kind).unwrap();
-        let kind = index_creation_task("whalo", "fish");
-        let _task = index_scheduler.register(kind).unwrap();
-
-        snapshot!(json_string!(index_scheduler.get_stats().unwrap()), @r###"
-        {
-          "indexes": {
-            "catto": 1,
-            "doggo": 1,
-            "whalo": 1
-          },
-          "statuses": {
-            "canceled": 0,
-            "enqueued": 3,
-            "failed": 0,
-            "processing": 0,
-            "succeeded": 0
-          },
-          "types": {
-            "documentAdditionOrUpdate": 0,
-            "documentDeletion": 0,
-            "dumpCreation": 0,
-            "indexCreation": 3,
-            "indexDeletion": 0,
-            "indexSwap": 0,
-            "indexUpdate": 0,
-            "settingsUpdate": 0,
-            "snapshotCreation": 0,
-            "taskCancelation": 0,
-            "taskDeletion": 0
-          }
-        }
-        "###);
-
-        handle.advance_till([Start, BatchCreated]);
-        snapshot!(json_string!(index_scheduler.get_stats().unwrap()), @r###"
-        {
-          "indexes": {
-            "catto": 1,
-            "doggo": 1,
-            "whalo": 1
-          },
-          "statuses": {
-            "canceled": 0,
-            "enqueued": 2,
-            "failed": 0,
-            "processing": 1,
-            "succeeded": 0
-          },
-          "types": {
-            "documentAdditionOrUpdate": 0,
-            "documentDeletion": 0,
-            "dumpCreation": 0,
-            "indexCreation": 3,
-            "indexDeletion": 0,
-            "indexSwap": 0,
-            "indexUpdate": 0,
-            "settingsUpdate": 0,
-            "snapshotCreation": 0,
-            "taskCancelation": 0,
-            "taskDeletion": 0
-          }
-        }
-        "###);
-
-        handle.advance_till([
-            InsideProcessBatch,
-            InsideProcessBatch,
-            ProcessBatchSucceeded,
-            AfterProcessing,
-            Start,
-            BatchCreated,
-        ]);
-        snapshot!(json_string!(index_scheduler.get_stats().unwrap()), @r###"
-        {
-          "indexes": {
-            "catto": 1,
-            "doggo": 1,
-            "whalo": 1
-          },
-          "statuses": {
-            "canceled": 0,
-            "enqueued": 1,
-            "failed": 0,
-            "processing": 1,
-            "succeeded": 1
-          },
-          "types": {
-            "documentAdditionOrUpdate": 0,
-            "documentDeletion": 0,
-            "dumpCreation": 0,
-            "indexCreation": 3,
-            "indexDeletion": 0,
-            "indexSwap": 0,
-            "indexUpdate": 0,
-            "settingsUpdate": 0,
-            "snapshotCreation": 0,
-            "taskCancelation": 0,
-            "taskDeletion": 0
-          }
-        }
-        "###);
-
-        // now we make one more batch, the started_at field of the new tasks will be past `second_start_time`
-        handle.advance_till([
-            InsideProcessBatch,
-            InsideProcessBatch,
-            ProcessBatchSucceeded,
-            AfterProcessing,
-            Start,
-            BatchCreated,
-        ]);
-        snapshot!(json_string!(index_scheduler.get_stats().unwrap()), @r###"
-        {
-          "indexes": {
-            "catto": 1,
-            "doggo": 1,
-            "whalo": 1
-          },
-          "statuses": {
-            "canceled": 0,
-            "enqueued": 0,
-            "failed": 0,
-            "processing": 1,
-            "succeeded": 2
-          },
-          "types": {
-            "documentAdditionOrUpdate": 0,
-            "documentDeletion": 0,
-            "dumpCreation": 0,
-            "indexCreation": 3,
-            "indexDeletion": 0,
-            "indexSwap": 0,
-            "indexUpdate": 0,
-            "settingsUpdate": 0,
-            "snapshotCreation": 0,
-            "taskCancelation": 0,
-            "taskDeletion": 0
-          }
-        }
-        "###);
-    }
 }
--- a/meili-snap/src/lib.rs
+++ b/meili-snap/src/lib.rs
@@ -167,9 +167,7 @@ macro_rules! snapshot {
        let (settings, snap_name, _) = $crate::default_snapshot_settings_for_test(test_name, Some(&snap_name));
        settings.bind(|| {
            let snap = format!("{}", $value);
-            insta::allow_duplicates! {
-                meili_snap::insta::assert_snapshot!(format!("{}", snap_name), snap);
-            }
+            meili_snap::insta::assert_snapshot!(format!("{}", snap_name), snap);
        });
    };
    ($value:expr, @$inline:literal) => {
@@ -178,9 +176,7 @@ macro_rules! snapshot {
        let (settings, _, _) = $crate::default_snapshot_settings_for_test("", Some("_dummy_argument"));
        settings.bind(|| {
            let snap = format!("{}", $value);
-            insta::allow_duplicates! {
-                meili_snap::insta::assert_snapshot!(snap, @$inline);
-            }
+            meili_snap::insta::assert_snapshot!(snap, @$inline);
        });
    };
    ($value:expr) => {
@@ -198,9 +194,7 @@ macro_rules! snapshot {
        let (settings, snap_name, _) = $crate::default_snapshot_settings_for_test(test_name, None);
        settings.bind(|| {
            let snap = format!("{}", $value);
-            insta::allow_duplicates! {
-                meili_snap::insta::assert_snapshot!(format!("{}", snap_name), snap);
-            }
+            meili_snap::insta::assert_snapshot!(format!("{}", snap_name), snap);
        });
    };
 }
--- a/meilisearch-types/Cargo.toml
+++ b/meilisearch-types/Cargo.toml
@@ -15,13 +15,13 @@ actix-web = { version = "4.3.1", default-features = false }
 anyhow = "1.0.70"
 convert_case = "0.6.0"
 csv = "1.2.1"
-deserr = { version = "0.6.0", features = ["actix-web"]}
+deserr = "0.5.0"
 either = { version = "1.8.1", features = ["serde"] }
 enum-iterator = "1.4.0"
 file-store = { path = "../file-store" }
 flate2 = "1.0.25"
 fst = "0.4.7"
-memmap2 = "0.7.1"
+memmap2 = "0.5.10"
 milli = { path = "../milli" }
 roaring = { version = "0.10.1", features = ["serde"] }
 serde = { version = "1.0.160", features = ["derive"] }
--- a/meilisearch-types/src/error.rs
+++ b/meilisearch-types/src/error.rs
@@ -259,9 +259,6 @@ InvalidSettingsRankingRules           , InvalidRequest       , BAD_REQUEST ;
 InvalidSettingsSearchableAttributes   , InvalidRequest       , BAD_REQUEST ;
 InvalidSettingsSortableAttributes     , InvalidRequest       , BAD_REQUEST ;
 InvalidSettingsStopWords              , InvalidRequest       , BAD_REQUEST ;
-InvalidSettingsNonSeparatorTokens     , InvalidRequest       , BAD_REQUEST ;
-InvalidSettingsSeparatorTokens        , InvalidRequest       , BAD_REQUEST ;
-InvalidSettingsDictionary             , InvalidRequest       , BAD_REQUEST ;
 InvalidSettingsSynonyms               , InvalidRequest       , BAD_REQUEST ;
 InvalidSettingsTypoTolerance          , InvalidRequest       , BAD_REQUEST ;
 InvalidState                          , Internal             , INTERNAL_SERVER_ERROR ;
--- a/meilisearch-types/src/settings.rs
+++ b/meilisearch-types/src/settings.rs
@@ -171,15 +171,6 @@ pub struct Settings<T> {
    #[deserr(default, error = DeserrJsonError<InvalidSettingsStopWords>)]
    pub stop_words: Setting<BTreeSet<String>>,
    #[serde(default, skip_serializing_if = "Setting::is_not_set")]
-    #[deserr(default, error = DeserrJsonError<InvalidSettingsNonSeparatorTokens>)]
-    pub non_separator_tokens: Setting<BTreeSet<String>>,
-    #[serde(default, skip_serializing_if = "Setting::is_not_set")]
-    #[deserr(default, error = DeserrJsonError<InvalidSettingsSeparatorTokens>)]
-    pub separator_tokens: Setting<BTreeSet<String>>,
-    #[serde(default, skip_serializing_if = "Setting::is_not_set")]
-    #[deserr(default, error = DeserrJsonError<InvalidSettingsDictionary>)]
-    pub dictionary: Setting<BTreeSet<String>>,
-    #[serde(default, skip_serializing_if = "Setting::is_not_set")]
    #[deserr(default, error = DeserrJsonError<InvalidSettingsSynonyms>)]
    pub synonyms: Setting<BTreeMap<String, Vec<String>>>,
    #[serde(default, skip_serializing_if = "Setting::is_not_set")]
@@ -210,9 +201,6 @@ impl Settings<Checked> {
            ranking_rules: Setting::Reset,
            stop_words: Setting::Reset,
            synonyms: Setting::Reset,
-            non_separator_tokens: Setting::Reset,
-            separator_tokens: Setting::Reset,
-            dictionary: Setting::Reset,
            distinct_attribute: Setting::Reset,
            typo_tolerance: Setting::Reset,
            faceting: Setting::Reset,
@@ -229,9 +217,6 @@ impl Settings<Checked> {
            sortable_attributes,
            ranking_rules,
            stop_words,
-            non_separator_tokens,
-            separator_tokens,
-            dictionary,
            synonyms,
            distinct_attribute,
            typo_tolerance,
@@ -247,9 +232,6 @@ impl Settings<Checked> {
            sortable_attributes,
            ranking_rules,
            stop_words,
-            non_separator_tokens,
-            separator_tokens,
-            dictionary,
            synonyms,
            distinct_attribute,
            typo_tolerance,
@@ -292,9 +274,6 @@ impl Settings<Unchecked> {
            ranking_rules: self.ranking_rules,
            stop_words: self.stop_words,
            synonyms: self.synonyms,
-            non_separator_tokens: self.non_separator_tokens,
-            separator_tokens: self.separator_tokens,
-            dictionary: self.dictionary,
            distinct_attribute: self.distinct_attribute,
            typo_tolerance: self.typo_tolerance,
            faceting: self.faceting,
@@ -356,28 +335,6 @@ pub fn apply_settings_to_builder(
        Setting::NotSet => (),
    }

-    match settings.non_separator_tokens {
-        Setting::Set(ref non_separator_tokens) => {
-            builder.set_non_separator_tokens(non_separator_tokens.clone())
-        }
-        Setting::Reset => builder.reset_non_separator_tokens(),
-        Setting::NotSet => (),
-    }
-
-    match settings.separator_tokens {
-        Setting::Set(ref separator_tokens) => {
-            builder.set_separator_tokens(separator_tokens.clone())
-        }
-        Setting::Reset => builder.reset_separator_tokens(),
-        Setting::NotSet => (),
-    }
-
-    match settings.dictionary {
-        Setting::Set(ref dictionary) => builder.set_dictionary(dictionary.clone()),
-        Setting::Reset => builder.reset_dictionary(),
-        Setting::NotSet => (),
-    }
-
    match settings.synonyms {
        Setting::Set(ref synonyms) => builder.set_synonyms(synonyms.clone().into_iter().collect()),
        Setting::Reset => builder.reset_synonyms(),
@@ -502,14 +459,15 @@ pub fn settings(
        })
        .transpose()?
        .unwrap_or_default();
-
-    let non_separator_tokens = index.non_separator_tokens(rtxn)?.unwrap_or_default();
-    let separator_tokens = index.separator_tokens(rtxn)?.unwrap_or_default();
-    let dictionary = index.dictionary(rtxn)?.unwrap_or_default();
-
    let distinct_field = index.distinct_field(rtxn)?.map(String::from);

-    let synonyms = index.user_defined_synonyms(rtxn)?;
+    // in milli each word in the synonyms map were split on their separator. Since we lost
+    // this information we are going to put space between words.
+    let synonyms = index
+        .synonyms(rtxn)?
+        .iter()
+        .map(|(key, values)| (key.join(" "), values.iter().map(|value| value.join(" ")).collect()))
+        .collect();

    let min_typo_word_len = MinWordSizeTyposSetting {
        one_typo: Setting::Set(index.min_word_len_one_typo(rtxn)?),
@@ -562,9 +520,6 @@ pub fn settings(
        sortable_attributes: Setting::Set(sortable_attributes),
        ranking_rules: Setting::Set(criteria.iter().map(|c| c.clone().into()).collect()),
        stop_words: Setting::Set(stop_words),
-        non_separator_tokens: Setting::Set(non_separator_tokens),
-        separator_tokens: Setting::Set(separator_tokens),
-        dictionary: Setting::Set(dictionary),
        distinct_attribute: match distinct_field {
            Some(field) => Setting::Set(field),
            None => Setting::Reset,
@@ -687,9 +642,6 @@ pub(crate) mod test {
            sortable_attributes: Setting::NotSet,
            ranking_rules: Setting::NotSet,
            stop_words: Setting::NotSet,
-            non_separator_tokens: Setting::NotSet,
-            separator_tokens: Setting::NotSet,
-            dictionary: Setting::NotSet,
            synonyms: Setting::NotSet,
            distinct_attribute: Setting::NotSet,
            typo_tolerance: Setting::NotSet,
@@ -711,9 +663,6 @@ pub(crate) mod test {
            sortable_attributes: Setting::NotSet,
            ranking_rules: Setting::NotSet,
            stop_words: Setting::NotSet,
-            non_separator_tokens: Setting::NotSet,
-            separator_tokens: Setting::NotSet,
-            dictionary: Setting::NotSet,
            synonyms: Setting::NotSet,
            distinct_attribute: Setting::NotSet,
            typo_tolerance: Setting::NotSet,
--- a/meilisearch/Cargo.toml
+++ b/meilisearch/Cargo.toml
@@ -39,7 +39,7 @@ byte-unit = { version = "4.0.19", default-features = false, features = [
 bytes = "1.4.0"
 clap = { version = "4.2.1", features = ["derive", "env"] }
 crossbeam-channel = "0.5.8"
-deserr = { version = "0.6.0", features = ["actix-web"]}
+deserr = "0.5.0"
 dump = { path = "../dump" }
 either = "1.8.1"
 env_logger = "0.10.0"
@@ -50,9 +50,9 @@ futures = "0.3.28"
 futures-util = "0.3.28"
 http = "0.2.9"
 index-scheduler = { path = "../index-scheduler" }
-indexmap = { version = "2.0.0", features = ["serde"] }
+indexmap = { version = "1.9.3", features = ["serde-1"] }
 is-terminal = "0.4.8"
-itertools = "0.11.0"
+itertools = "0.10.5"
 jsonwebtoken = "8.3.0"
 lazy_static = "1.4.0"
 log = "0.4.17"
@@ -87,7 +87,7 @@ sha2 = "0.10.6"
 siphasher = "0.3.10"
 slice-group-by = "0.3.0"
 static-files = { version = "0.2.3", optional = true }
-sysinfo = "0.29.7"
+sysinfo = "0.28.4"
 tar = "0.4.38"
 tempfile = "3.5.0"
 thiserror = "1.0.40"
--- a/meilisearch/src/analytics/mock_analytics.rs
+++ b/meilisearch/src/analytics/mock_analytics.rs
@@ -20,7 +20,7 @@ pub struct SearchAggregator;
 #[allow(dead_code)]
 impl SearchAggregator {
    pub fn from_query(_: &dyn Any, _: &dyn Any) -> Self {
-        Self
+        Self::default()
    }

    pub fn succeed(&mut self, _: &dyn Any) {}
@@ -32,7 +32,7 @@ pub struct MultiSearchAggregator;
 #[allow(dead_code)]
 impl MultiSearchAggregator {
    pub fn from_queries(_: &dyn Any, _: &dyn Any) -> Self {
-        Self
+        Self::default()
    }

    pub fn succeed(&mut self) {}
@@ -44,7 +44,7 @@ pub struct FacetSearchAggregator;
 #[allow(dead_code)]
 impl FacetSearchAggregator {
    pub fn from_query(_: &dyn Any, _: &dyn Any) -> Self {
-        Self
+        Self::default()
    }

    pub fn succeed(&mut self, _: &dyn Any) {}
--- a/meilisearch/src/routes/indexes/settings.rs
+++ b/meilisearch/src/routes/indexes/settings.rs
@@ -310,81 +310,6 @@ make_setting_route!(
    }
 );

-make_setting_route!(
-    "/non-separator-tokens",
-    put,
-    std::collections::BTreeSet<String>,
-    meilisearch_types::deserr::DeserrJsonError<
-        meilisearch_types::error::deserr_codes::InvalidSettingsNonSeparatorTokens,
-    >,
-    non_separator_tokens,
-    "nonSeparatorTokens",
-    analytics,
-    |non_separator_tokens: &Option<std::collections::BTreeSet<String>>, req: &HttpRequest| {
-        use serde_json::json;
-
-        analytics.publish(
-            "nonSeparatorTokens Updated".to_string(),
-            json!({
-                "non_separator_tokens": {
-                    "total": non_separator_tokens.as_ref().map(|non_separator_tokens| non_separator_tokens.len()),
-                },
-            }),
-            Some(req),
-        );
-    }
-);
-
-make_setting_route!(
-    "/separator-tokens",
-    put,
-    std::collections::BTreeSet<String>,
-    meilisearch_types::deserr::DeserrJsonError<
-        meilisearch_types::error::deserr_codes::InvalidSettingsSeparatorTokens,
-    >,
-    separator_tokens,
-    "separatorTokens",
-    analytics,
-    |separator_tokens: &Option<std::collections::BTreeSet<String>>, req: &HttpRequest| {
-        use serde_json::json;
-
-        analytics.publish(
-            "separatorTokens Updated".to_string(),
-            json!({
-                "separator_tokens": {
-                    "total": separator_tokens.as_ref().map(|separator_tokens| separator_tokens.len()),
-                },
-            }),
-            Some(req),
-        );
-    }
-);
-
-make_setting_route!(
-    "/dictionary",
-    put,
-    std::collections::BTreeSet<String>,
-    meilisearch_types::deserr::DeserrJsonError<
-        meilisearch_types::error::deserr_codes::InvalidSettingsDictionary,
-    >,
-    dictionary,
-    "dictionary",
-    analytics,
-    |dictionary: &Option<std::collections::BTreeSet<String>>, req: &HttpRequest| {
-        use serde_json::json;
-
-        analytics.publish(
-            "dictionary Updated".to_string(),
-            json!({
-                "dictionary": {
-                    "total": dictionary.as_ref().map(|dictionary| dictionary.len()),
-                },
-            }),
-            Some(req),
-        );
-    }
-);
-
 make_setting_route!(
    "/synonyms",
    put,
@@ -541,9 +466,6 @@ generate_configure!(
    searchable_attributes,
    distinct_attribute,
    stop_words,
-    separator_tokens,
-    non_separator_tokens,
-    dictionary,
    synonyms,
    ranking_rules,
    typo_tolerance,
--- a/meilisearch/src/routes/swap_indexes.rs
+++ b/meilisearch/src/routes/swap_indexes.rs
@@ -60,7 +60,8 @@ pub async fn swap_indexes(
    }

    let task = KindWithContent::IndexSwap { swaps };
-    let task: SummarizedTaskView =
-        tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
+
+    let task = index_scheduler.register(task)?;
+    let task: SummarizedTaskView = task.into();
    Ok(HttpResponse::Accepted().json(task))
 }
--- a/meilisearch/src/search.rs
+++ b/meilisearch/src/search.rs
@@ -491,20 +491,6 @@ pub fn perform_search(
        tokenizer_builder.allow_list(&script_lang_map);
    }

-    let separators = index.allowed_separators(&rtxn)?;
-    let separators: Option<Vec<_>> =
-        separators.as_ref().map(|x| x.iter().map(String::as_str).collect());
-    if let Some(ref separators) = separators {
-        tokenizer_builder.separators(separators);
-    }
-
-    let dictionary = index.dictionary(&rtxn)?;
-    let dictionary: Option<Vec<_>> =
-        dictionary.as_ref().map(|x| x.iter().map(String::as_str).collect());
-    if let Some(ref dictionary) = dictionary {
-        tokenizer_builder.words_dict(dictionary);
-    }
-
    let mut formatter_builder = MatcherBuilder::new(matching_words, tokenizer_builder.build());
    formatter_builder.crop_marker(query.crop_marker);
    formatter_builder.highlight_prefix(query.highlight_pre_tag);
@@ -680,7 +666,6 @@ fn compute_semantic_score(query: &[f32], vectors: Value) -> milli::Result<Option
        .map_err(InternalError::SerdeJson)?;
    Ok(vectors
        .into_iter()
-        .flatten()
        .map(|v| OrderedFloat(dot_product_similarity(query, &v)))
        .max()
        .map(OrderedFloat::into_inner))
--- a/meilisearch/tests/documents/delete_documents.rs
+++ b/meilisearch/tests/documents/delete_documents.rs
@@ -154,19 +154,6 @@ async fn delete_document_by_filter() {
        )
        .await;
    index.wait_task(1).await;
-
-    let (stats, _) = index.stats().await;
-    snapshot!(json_string!(stats), @r###"
-    {
-      "numberOfDocuments": 4,
-      "isIndexing": false,
-      "fieldDistribution": {
-        "color": 3,
-        "id": 4
-      }
-    }
-    "###);
-
    let (response, code) =
        index.delete_document_by_filter(json!({ "filter": "color = blue"})).await;
    snapshot!(code, @"202 Accepted");
@@ -201,18 +188,6 @@ async fn delete_document_by_filter() {
    }
    "###);

-    let (stats, _) = index.stats().await;
-    snapshot!(json_string!(stats), @r###"
-    {
-      "numberOfDocuments": 2,
-      "isIndexing": false,
-      "fieldDistribution": {
-        "color": 1,
-        "id": 2
-      }
-    }
-    "###);
-
    let (documents, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await;
    snapshot!(code, @"200 OK");
    snapshot!(json_string!(documents), @r###"
@@ -266,18 +241,6 @@ async fn delete_document_by_filter() {
    }
    "###);

-    let (stats, _) = index.stats().await;
-    snapshot!(json_string!(stats), @r###"
-    {
-      "numberOfDocuments": 1,
-      "isIndexing": false,
-      "fieldDistribution": {
-        "color": 1,
-        "id": 1
-      }
-    }
-    "###);
-
    let (documents, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await;
    snapshot!(code, @"200 OK");
    snapshot!(json_string!(documents), @r###"
--- a/meilisearch/tests/dumps/mod.rs
+++ b/meilisearch/tests/dumps/mod.rs
--- a/meilisearch/tests/search/distinct.rs
+++ b/meilisearch/tests/search/distinct.rs
@@ -1,63 +0,0 @@
-use meili_snap::snapshot;
-use once_cell::sync::Lazy;
-use serde_json::{json, Value};
-
-use crate::common::Server;
-
-pub(self) static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
-    json!([
-        {"productId": 1, "shopId": 1},
-        {"productId": 2, "shopId": 1},
-        {"productId": 3, "shopId": 2},
-        {"productId": 4, "shopId": 2},
-        {"productId": 5, "shopId": 3},
-        {"productId": 6, "shopId": 3},
-        {"productId": 7, "shopId": 4},
-        {"productId": 8, "shopId": 4},
-        {"productId": 9, "shopId": 5},
-        {"productId": 10, "shopId": 5}
-    ])
-});
-
-pub(self) static DOCUMENT_PRIMARY_KEY: &str = "productId";
-pub(self) static DOCUMENT_DISTINCT_KEY: &str = "shopId";
-
-/// testing: https://github.com/meilisearch/meilisearch/issues/4078
-#[actix_rt::test]
-async fn distinct_search_with_offset_no_ranking() {
-    let server = Server::new().await;
-    let index = server.index("test");
-
-    let documents = DOCUMENTS.clone();
-    index.add_documents(documents, Some(DOCUMENT_PRIMARY_KEY)).await;
-    index.update_distinct_attribute(json!(DOCUMENT_DISTINCT_KEY)).await;
-    index.wait_task(1).await;
-
-    fn get_hits(response: Value) -> Vec<i64> {
-        let hits_array = response["hits"].as_array().unwrap();
-        hits_array.iter().map(|h| h[DOCUMENT_DISTINCT_KEY].as_i64().unwrap()).collect::<Vec<_>>()
-    }
-
-    let (response, code) = index.search_post(json!({"limit": 2, "offset": 0})).await;
-    let hits = get_hits(response);
-    snapshot!(code, @"200 OK");
-    snapshot!(hits.len(), @"2");
-    snapshot!(format!("{:?}", hits), @"[1, 2]");
-
-    let (response, code) = index.search_post(json!({"limit": 2, "offset": 2})).await;
-    let hits = get_hits(response);
-    snapshot!(code, @"200 OK");
-    snapshot!(hits.len(), @"2");
-    snapshot!(format!("{:?}", hits), @"[3, 4]");
-
-    let (response, code) = index.search_post(json!({"limit": 10, "offset": 4})).await;
-    let hits = get_hits(response);
-    snapshot!(code, @"200 OK");
-    snapshot!(hits.len(), @"1");
-    snapshot!(format!("{:?}", hits), @"[5]");
-
-    let (response, code) = index.search_post(json!({"limit": 10, "offset": 5})).await;
-    let hits = get_hits(response);
-    snapshot!(code, @"200 OK");
-    snapshot!(hits.len(), @"0");
-}
--- a/meilisearch/tests/search/geo.rs
+++ b/meilisearch/tests/search/geo.rs
@@ -1,4 +1,3 @@
-use meili_snap::{json_string, snapshot};
 use once_cell::sync::Lazy;
 use serde_json::{json, Value};

@@ -61,59 +60,3 @@ async fn geo_sort_with_geo_strings() {
        )
        .await;
 }
-
-#[actix_rt::test]
-async fn geo_bounding_box_with_string_and_number() {
-    let server = Server::new().await;
-    let index = server.index("test");
-
-    let documents = DOCUMENTS.clone();
-    index.update_settings_filterable_attributes(json!(["_geo"])).await;
-    index.update_settings_sortable_attributes(json!(["_geo"])).await;
-    index.add_documents(documents, None).await;
-    index.wait_task(2).await;
-
-    index
-        .search(
-            json!({
-                "filter": "_geoBoundingBox([89, 179], [-89, -179])",
-            }),
-            |response, code| {
-                assert_eq!(code, 200, "{}", response);
-                snapshot!(json_string!(response, { ".processingTimeMs" => "[time]" }), @r###"
-                {
-                  "hits": [
-                    {
-                      "id": 1,
-                      "name": "Taco Truck",
-                      "address": "444 Salsa Street, Burritoville",
-                      "type": "Mexican",
-                      "rating": 9,
-                      "_geo": {
-                        "lat": 34.0522,
-                        "lng": -118.2437
-                      }
-                    },
-                    {
-                      "id": 2,
-                      "name": "La Bella Italia",
-                      "address": "456 Elm Street, Townsville",
-                      "type": "Italian",
-                      "rating": 9,
-                      "_geo": {
-                        "lat": "45.4777599",
-                        "lng": "9.1967508"
-                      }
-                    }
-                  ],
-                  "query": "",
-                  "processingTimeMs": "[time]",
-                  "limit": 20,
-                  "offset": 0,
-                  "estimatedTotalHits": 2
-                }
-                "###);
-            },
-        )
-        .await;
-}
--- a/meilisearch/tests/search/mod.rs
+++ b/meilisearch/tests/search/mod.rs
@@ -1,7 +1,6 @@
 // This modules contains all the test concerning search. Each particular feature of the search
 // should be tested in its own module to isolate tests and keep the tests readable.

-mod distinct;
 mod errors;
 mod facet_search;
 mod formatted;
@@ -1105,59 +1104,3 @@ async fn camelcased_words() {
        })
        .await;
 }
-
-#[actix_rt::test]
-async fn simple_search_with_strange_synonyms() {
-    let server = Server::new().await;
-    let index = server.index("test");
-
-    index.update_settings(json!({ "synonyms": {"&": ["to"], "to": ["&"]} })).await;
-    let r = index.wait_task(0).await;
-    meili_snap::snapshot!(r["status"], @r###""succeeded""###);
-
-    let documents = DOCUMENTS.clone();
-    index.add_documents(documents, None).await;
-    index.wait_task(1).await;
-
-    index
-        .search(json!({"q": "How to train"}), |response, code| {
-            meili_snap::snapshot!(code, @"200 OK");
-            meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @r###"
-            [
-              {
-                "title": "How to Train Your Dragon: The Hidden World",
-                "id": "166428"
-              }
-            ]
-            "###);
-        })
-        .await;
-
-    index
-        .search(json!({"q": "How & train"}), |response, code| {
-            meili_snap::snapshot!(code, @"200 OK");
-            meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @r###"
-            [
-              {
-                "title": "How to Train Your Dragon: The Hidden World",
-                "id": "166428"
-              }
-            ]
-            "###);
-        })
-        .await;
-
-    index
-        .search(json!({"q": "to"}), |response, code| {
-            meili_snap::snapshot!(code, @"200 OK");
-            meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @r###"
-            [
-              {
-                "title": "How to Train Your Dragon: The Hidden World",
-                "id": "166428"
-              }
-            ]
-            "###);
-        })
-        .await;
-}
--- a/meilisearch/tests/settings/get_settings.rs
+++ b/meilisearch/tests/settings/get_settings.rs
@@ -16,9 +16,6 @@ static DEFAULT_SETTINGS_VALUES: Lazy<HashMap<&'static str, Value>> = Lazy::new(|
        json!(["words", "typo", "proximity", "attribute", "sort", "exactness"]),
    );
    map.insert("stop_words", json!([]));
-    map.insert("non_separator_tokens", json!([]));
-    map.insert("separator_tokens", json!([]));
-    map.insert("dictionary", json!([]));
    map.insert("synonyms", json!({}));
    map.insert(
        "faceting",
@@ -54,7 +51,7 @@ async fn get_settings() {
    let (response, code) = index.settings().await;
    assert_eq!(code, 200);
    let settings = response.as_object().unwrap();
-    assert_eq!(settings.keys().len(), 14);
+    assert_eq!(settings.keys().len(), 11);
    assert_eq!(settings["displayedAttributes"], json!(["*"]));
    assert_eq!(settings["searchableAttributes"], json!(["*"]));
    assert_eq!(settings["filterableAttributes"], json!([]));
@@ -65,9 +62,6 @@ async fn get_settings() {
        json!(["words", "typo", "proximity", "attribute", "sort", "exactness"])
    );
    assert_eq!(settings["stopWords"], json!([]));
-    assert_eq!(settings["nonSeparatorTokens"], json!([]));
-    assert_eq!(settings["separatorTokens"], json!([]));
-    assert_eq!(settings["dictionary"], json!([]));
    assert_eq!(
        settings["faceting"],
        json!({
@@ -278,9 +272,6 @@ test_setting_routes!(
    searchable_attributes put,
    distinct_attribute put,
    stop_words put,
-    separator_tokens put,
-    non_separator_tokens put,
-    dictionary put,
    ranking_rules put,
    synonyms put,
    pagination patch,
--- a/meilisearch/tests/settings/mod.rs
+++ b/meilisearch/tests/settings/mod.rs
@@ -1,4 +1,3 @@
 mod distinct;
 mod errors;
 mod get_settings;
-mod tokenizer_customization;
--- a/meilisearch/tests/settings/tokenizer_customization.rs
+++ b/meilisearch/tests/settings/tokenizer_customization.rs
@@ -1,467 +0,0 @@
-use meili_snap::{json_string, snapshot};
-use serde_json::json;
-
-use crate::common::Server;
-
-#[actix_rt::test]
-async fn set_and_reset() {
-    let server = Server::new().await;
-    let index = server.index("test");
-
-    let (_response, _code) = index
-        .update_settings(json!({
-            "nonSeparatorTokens": ["#", "&"],
-            "separatorTokens": ["&sep", "<br/>"],
-            "dictionary": ["J.R.R.", "J. R. R."],
-        }))
-        .await;
-    index.wait_task(0).await;
-
-    let (response, _) = index.settings().await;
-    snapshot!(json_string!(response["nonSeparatorTokens"]), @r###"
-    [
-      "#",
-      "&"
-    ]
-    "###);
-    snapshot!(json_string!(response["separatorTokens"]), @r###"
-    [
-      "&sep",
-      "<br/>"
-    ]
-    "###);
-    snapshot!(json_string!(response["dictionary"]), @r###"
-    [
-      "J. R. R.",
-      "J.R.R."
-    ]
-    "###);
-
-    index
-        .update_settings(json!({
-            "nonSeparatorTokens": null,
-            "separatorTokens": null,
-            "dictionary": null,
-        }))
-        .await;
-
-    index.wait_task(1).await;
-
-    let (response, _) = index.settings().await;
-    snapshot!(json_string!(response["nonSeparatorTokens"]), @"[]");
-    snapshot!(json_string!(response["separatorTokens"]), @"[]");
-    snapshot!(json_string!(response["dictionary"]), @"[]");
-}
-
-#[actix_rt::test]
-async fn set_and_search() {
-    let documents = json!([
-        {
-            "id": 1,
-            "content": "Mac & cheese",
-        },
-        {
-            "id": 2,
-            "content": "G#D#G#D#G#C#D#G#C#",
-        },
-        {
-            "id": 3,
-            "content": "Mac&sep&&sepcheese",
-        },
-    ]);
-
-    let server = Server::new().await;
-    let index = server.index("test");
-
-    index.add_documents(documents, None).await;
-    index.wait_task(0).await;
-
-    let (_response, _code) = index
-        .update_settings(json!({
-            "nonSeparatorTokens": ["#", "&"],
-            "separatorTokens": ["<br/>", "&sep"],
-            "dictionary": ["#", "A#", "B#", "C#", "D#", "E#", "F#", "G#"],
-        }))
-        .await;
-    index.wait_task(1).await;
-
-    index
-        .search(json!({"q": "&", "attributesToHighlight": ["content"]}), |response, code| {
-            snapshot!(code, @"200 OK");
-            snapshot!(json_string!(response["hits"]), @r###"
-            [
-              {
-                "id": 1,
-                "content": "Mac & cheese",
-                "_formatted": {
-                  "id": "1",
-                  "content": "Mac <em>&</em> cheese"
-                }
-              },
-              {
-                "id": 3,
-                "content": "Mac&sep&&sepcheese",
-                "_formatted": {
-                  "id": "3",
-                  "content": "Mac&sep<em>&</em>&sepcheese"
-                }
-              }
-            ]
-            "###);
-        })
-        .await;
-
-    index
-        .search(
-            json!({"q": "Mac & cheese", "attributesToHighlight": ["content"]}),
-            |response, code| {
-                snapshot!(code, @"200 OK");
-                snapshot!(json_string!(response["hits"]), @r###"
-                [
-                  {
-                    "id": 1,
-                    "content": "Mac & cheese",
-                    "_formatted": {
-                      "id": "1",
-                      "content": "<em>Mac</em> <em>&</em> <em>cheese</em>"
-                    }
-                  },
-                  {
-                    "id": 3,
-                    "content": "Mac&sep&&sepcheese",
-                    "_formatted": {
-                      "id": "3",
-                      "content": "<em>Mac</em>&sep<em>&</em>&sep<em>cheese</em>"
-                    }
-                  }
-                ]
-                "###);
-            },
-        )
-        .await;
-
-    index
-        .search(
-            json!({"q": "Mac&sep&&sepcheese", "attributesToHighlight": ["content"]}),
-            |response, code| {
-                snapshot!(code, @"200 OK");
-                snapshot!(json_string!(response["hits"]), @r###"
-                [
-                  {
-                    "id": 1,
-                    "content": "Mac & cheese",
-                    "_formatted": {
-                      "id": "1",
-                      "content": "<em>Mac</em> <em>&</em> <em>cheese</em>"
-                    }
-                  },
-                  {
-                    "id": 3,
-                    "content": "Mac&sep&&sepcheese",
-                    "_formatted": {
-                      "id": "3",
-                      "content": "<em>Mac</em>&sep<em>&</em>&sep<em>cheese</em>"
-                    }
-                  }
-                ]
-                "###);
-            },
-        )
-        .await;
-
-    index
-        .search(json!({"q": "C#D#G", "attributesToHighlight": ["content"]}), |response, code| {
-            snapshot!(code, @"200 OK");
-            snapshot!(json_string!(response["hits"]), @r###"
-            [
-              {
-                "id": 2,
-                "content": "G#D#G#D#G#C#D#G#C#",
-                "_formatted": {
-                  "id": "2",
-                  "content": "<em>G</em>#<em>D#</em><em>G</em>#<em>D#</em><em>G</em>#<em>C#</em><em>D#</em><em>G</em>#<em>C#</em>"
-                }
-              }
-            ]
-            "###);
-        })
-        .await;
-
-    index
-        .search(json!({"q": "#", "attributesToHighlight": ["content"]}), |response, code| {
-            snapshot!(code, @"200 OK");
-            snapshot!(json_string!(response["hits"]), @"[]");
-        })
-        .await;
-}
-
-#[actix_rt::test]
-async fn advanced_synergies() {
-    let documents = json!([
-        {
-            "id": 1,
-            "content": "J.R.R. Tolkien",
-        },
-        {
-            "id": 2,
-            "content": "J. R. R. Tolkien",
-        },
-        {
-            "id": 3,
-            "content": "jrr Tolkien",
-        },
-        {
-            "id": 4,
-            "content": "J.K. Rowlings",
-        },
-        {
-            "id": 5,
-            "content": "J. K. Rowlings",
-        },
-        {
-            "id": 6,
-            "content": "jk Rowlings",
-        },
-    ]);
-
-    let server = Server::new().await;
-    let index = server.index("test");
-
-    index.add_documents(documents, None).await;
-    index.wait_task(0).await;
-
-    let (_response, _code) = index
-        .update_settings(json!({
-            "dictionary": ["J.R.R.", "J. R. R."],
-            "synonyms": {
-                "J.R.R.": ["jrr", "J. R. R."],
-                "J. R. R.": ["jrr", "J.R.R."],
-                "jrr": ["J.R.R.", "J. R. R."],
-                "J.K.": ["jk", "J. K."],
-                "J. K.": ["jk", "J.K."],
-                "jk": ["J.K.", "J. K."],
-            }
-        }))
-        .await;
-    index.wait_task(1).await;
-
-    index
-        .search(json!({"q": "J.R.R.", "attributesToHighlight": ["content"]}), |response, code| {
-            snapshot!(code, @"200 OK");
-            snapshot!(json_string!(response["hits"]), @r###"
-            [
-              {
-                "id": 1,
-                "content": "J.R.R. Tolkien",
-                "_formatted": {
-                  "id": "1",
-                  "content": "<em>J.R.R.</em> Tolkien"
-                }
-              },
-              {
-                "id": 2,
-                "content": "J. R. R. Tolkien",
-                "_formatted": {
-                  "id": "2",
-                  "content": "<em>J. R. R.</em> Tolkien"
-                }
-              },
-              {
-                "id": 3,
-                "content": "jrr Tolkien",
-                "_formatted": {
-                  "id": "3",
-                  "content": "<em>jrr</em> Tolkien"
-                }
-              }
-            ]
-            "###);
-        })
-        .await;
-
-    index
-        .search(json!({"q": "jrr", "attributesToHighlight": ["content"]}), |response, code| {
-            snapshot!(code, @"200 OK");
-            snapshot!(json_string!(response["hits"]), @r###"
-            [
-              {
-                "id": 3,
-                "content": "jrr Tolkien",
-                "_formatted": {
-                  "id": "3",
-                  "content": "<em>jrr</em> Tolkien"
-                }
-              },
-              {
-                "id": 1,
-                "content": "J.R.R. Tolkien",
-                "_formatted": {
-                  "id": "1",
-                  "content": "<em>J.R.R.</em> Tolkien"
-                }
-              },
-              {
-                "id": 2,
-                "content": "J. R. R. Tolkien",
-                "_formatted": {
-                  "id": "2",
-                  "content": "<em>J. R. R.</em> Tolkien"
-                }
-              }
-            ]
-            "###);
-        })
-        .await;
-
-    index
-        .search(json!({"q": "J. R. R.", "attributesToHighlight": ["content"]}), |response, code| {
-            snapshot!(code, @"200 OK");
-            snapshot!(json_string!(response["hits"]), @r###"
-            [
-              {
-                "id": 2,
-                "content": "J. R. R. Tolkien",
-                "_formatted": {
-                  "id": "2",
-                  "content": "<em>J. R. R.</em> Tolkien"
-                }
-              },
-              {
-                "id": 1,
-                "content": "J.R.R. Tolkien",
-                "_formatted": {
-                  "id": "1",
-                  "content": "<em>J.R.R.</em> Tolkien"
-                }
-              },
-              {
-                "id": 3,
-                "content": "jrr Tolkien",
-                "_formatted": {
-                  "id": "3",
-                  "content": "<em>jrr</em> Tolkien"
-                }
-              }
-            ]
-            "###);
-        })
-        .await;
-
-    // Only update dictionary, the synonyms should be recomputed.
-    let (_response, _code) = index
-        .update_settings(json!({
-            "dictionary": ["J.R.R.", "J. R. R.", "J.K.", "J. K."],
-        }))
-        .await;
-    index.wait_task(2).await;
-
-    index
-        .search(json!({"q": "jk", "attributesToHighlight": ["content"]}), |response, code| {
-            snapshot!(code, @"200 OK");
-            snapshot!(json_string!(response["hits"]), @r###"
-            [
-              {
-                "id": 6,
-                "content": "jk Rowlings",
-                "_formatted": {
-                  "id": "6",
-                  "content": "<em>jk</em> Rowlings"
-                }
-              },
-              {
-                "id": 4,
-                "content": "J.K. Rowlings",
-                "_formatted": {
-                  "id": "4",
-                  "content": "<em>J.K.</em> Rowlings"
-                }
-              },
-              {
-                "id": 5,
-                "content": "J. K. Rowlings",
-                "_formatted": {
-                  "id": "5",
-                  "content": "<em>J. K.</em> Rowlings"
-                }
-              }
-            ]
-            "###);
-        })
-        .await;
-
-    index
-        .search(json!({"q": "J.K.", "attributesToHighlight": ["content"]}), |response, code| {
-            snapshot!(code, @"200 OK");
-            snapshot!(json_string!(response["hits"]), @r###"
-            [
-              {
-                "id": 4,
-                "content": "J.K. Rowlings",
-                "_formatted": {
-                  "id": "4",
-                  "content": "<em>J.K.</em> Rowlings"
-                }
-              },
-              {
-                "id": 5,
-                "content": "J. K. Rowlings",
-                "_formatted": {
-                  "id": "5",
-                  "content": "<em>J. K.</em> Rowlings"
-                }
-              },
-              {
-                "id": 6,
-                "content": "jk Rowlings",
-                "_formatted": {
-                  "id": "6",
-                  "content": "<em>jk</em> Rowlings"
-                }
-              }
-            ]
-            "###);
-        })
-        .await;
-
-    index
-        .search(json!({"q": "J. K.", "attributesToHighlight": ["content"]}), |response, code| {
-            snapshot!(code, @"200 OK");
-            snapshot!(json_string!(response["hits"]), @r###"
-            [
-              {
-                "id": 5,
-                "content": "J. K. Rowlings",
-                "_formatted": {
-                  "id": "5",
-                  "content": "<em>J. K.</em> Rowlings"
-                }
-              },
-              {
-                "id": 4,
-                "content": "J.K. Rowlings",
-                "_formatted": {
-                  "id": "4",
-                  "content": "<em>J.K.</em> Rowlings"
-                }
-              },
-              {
-                "id": 6,
-                "content": "jk Rowlings",
-                "_formatted": {
-                  "id": "6",
-                  "content": "<em>jk</em> Rowlings"
-                }
-              },
-              {
-                "id": 2,
-                "content": "J. R. R. Tolkien",
-                "_formatted": {
-                  "id": "2",
-                  "content": "<em>J. R.</em> R. Tolkien"
-                }
-              }
-            ]
-            "###);
-        })
-        .await;
-}
--- a/milli/Cargo.toml
+++ b/milli/Cargo.toml
@@ -17,10 +17,10 @@ bincode = "1.3.3"
 bstr = "1.4.0"
 bytemuck = { version = "1.13.1", features = ["extern_crate_alloc"] }
 byteorder = "1.4.3"
-charabia = { version = "0.8.3", default-features = false }
+charabia = { version = "0.8.2", default-features = false }
 concat-arrays = "0.1.2"
 crossbeam-channel = "0.5.8"
-deserr = { version = "0.6.0", features = ["actix-web"]}
+deserr = "0.5.0"
 either = { version = "1.8.1", features = ["serde"] }
 flatten-serde-json = { path = "../flatten-serde-json" }
 fst = "0.4.7"
@@ -32,18 +32,18 @@ grenad = { version = "0.4.4", default-features = false, features = [
 heed = { git = "https://github.com/meilisearch/heed", tag = "v0.12.7", default-features = false, features = [
    "lmdb", "read-txn-no-tls"
 ] }
-indexmap = { version = "2.0.0", features = ["serde"] }
+indexmap = { version = "1.9.3", features = ["serde"] }
 instant-distance = { version = "0.6.1", features = ["with-serde"] }
 json-depth-checker = { path = "../json-depth-checker" }
 levenshtein_automata = { version = "0.2.1", features = ["fst_automaton"] }
-memmap2 = "0.7.1"
+memmap2 = "0.5.10"
 obkv = "0.2.0"
 once_cell = "1.17.1"
 ordered-float = "3.6.0"
 rand_pcg = { version = "0.3.1", features = ["serde1"] }
 rayon = "1.7.0"
 roaring = "0.10.1"
-rstar = { version = "0.11.0", features = ["serde"] }
+rstar = { version = "0.10.0", features = ["serde"] }
 serde = { version = "1.0.160", features = ["derive"] }
 serde_json = { version = "1.0.95", features = ["preserve_order"] }
 slice-group-by = "0.3.0"
@@ -63,7 +63,7 @@ uuid = { version = "1.3.1", features = ["v4"] }
 filter-parser = { path = "../filter-parser" }

 # documents words self-join
-itertools = "0.11.0"
+itertools = "0.10.5"

 # profiling
 puffin = "0.16.0"
--- a/milli/src/documents/enriched.rs
+++ b/milli/src/documents/enriched.rs
@@ -1,5 +1,4 @@
 use std::fs::File;
-use std::io::BufReader;
 use std::{io, str};

 use obkv::KvReader;
@@ -20,14 +19,14 @@ use crate::FieldId;
 pub struct EnrichedDocumentsBatchReader<R> {
    documents: DocumentsBatchReader<R>,
    primary_key: String,
-    external_ids: grenad::ReaderCursor<BufReader<File>>,
+    external_ids: grenad::ReaderCursor<File>,
 }

 impl<R: io::Read + io::Seek> EnrichedDocumentsBatchReader<R> {
    pub fn new(
        documents: DocumentsBatchReader<R>,
        primary_key: String,
-        external_ids: grenad::Reader<BufReader<File>>,
+        external_ids: grenad::Reader<File>,
    ) -> Result<Self, Error> {
        if documents.documents_count() as u64 == external_ids.len() {
            Ok(EnrichedDocumentsBatchReader {
@@ -76,7 +75,7 @@ pub struct EnrichedDocument<'a> {
 pub struct EnrichedDocumentsBatchCursor<R> {
    documents: DocumentsBatchCursor<R>,
    primary_key: String,
-    external_ids: grenad::ReaderCursor<BufReader<File>>,
+    external_ids: grenad::ReaderCursor<File>,
 }

 impl<R> EnrichedDocumentsBatchCursor<R> {
--- a/milli/src/error.rs
+++ b/milli/src/error.rs
@@ -122,28 +122,22 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco
        .field,
        match .valid_fields.is_empty() {
            true => "This index does not have configured sortable attributes.".to_string(),
-            false => format!("Available sortable attributes are: `{}{}`.",
-                    valid_fields.iter().map(AsRef::as_ref).collect::<Vec<&str>>().join(", "),
-                    .hidden_fields.then_some(", <..hidden-attributes>").unwrap_or(""),
+            false => format!("Available sortable attributes are: `{}`.",
+                    valid_fields.iter().map(AsRef::as_ref).collect::<Vec<&str>>().join(", ")
                ),
        }
    )]
-    InvalidSortableAttribute { field: String, valid_fields: BTreeSet<String>, hidden_fields: bool },
+    InvalidSortableAttribute { field: String, valid_fields: BTreeSet<String> },
    #[error("Attribute `{}` is not facet-searchable. {}",
        .field,
        match .valid_fields.is_empty() {
            true => "This index does not have configured facet-searchable attributes. To make it facet-searchable add it to the `filterableAttributes` index settings.".to_string(),
-            false => format!("Available facet-searchable attributes are: `{}{}`. To make it facet-searchable add it to the `filterableAttributes` index settings.",
-                    valid_fields.iter().map(AsRef::as_ref).collect::<Vec<&str>>().join(", "),
-                    .hidden_fields.then_some(", <..hidden-attributes>").unwrap_or(""),
+            false => format!("Available facet-searchable attributes are: `{}`. To make it facet-searchable add it to the `filterableAttributes` index settings.",
+                    valid_fields.iter().map(AsRef::as_ref).collect::<Vec<&str>>().join(", ")
                ),
        }
    )]
-    InvalidFacetSearchFacetName {
-        field: String,
-        valid_fields: BTreeSet<String>,
-        hidden_fields: bool,
-    },
+    InvalidFacetSearchFacetName { field: String, valid_fields: BTreeSet<String> },
    #[error("Attribute `{}` is not searchable. Available searchable attributes are: `{}{}`.",
        .field,
        .valid_fields.iter().map(AsRef::as_ref).collect::<Vec<&str>>().join(", "),
@@ -346,11 +340,8 @@ fn conditionally_lookup_for_error_message() {
    ];

    for (list, suffix) in messages {
-        let err = UserError::InvalidSortableAttribute {
-            field: "name".to_string(),
-            valid_fields: list,
-            hidden_fields: false,
-        };
+        let err =
+            UserError::InvalidSortableAttribute { field: "name".to_string(), valid_fields: list };

        assert_eq!(err.to_string(), format!("{} {}", prefix, suffix));
    }
--- a/milli/src/index.rs
+++ b/milli/src/index.rs
@@ -1,5 +1,5 @@
 use std::borrow::Cow;
-use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
+use std::collections::{BTreeSet, HashMap, HashSet};
 use std::fs::File;
 use std::mem::size_of;
 use std::path::Path;
@@ -61,12 +61,8 @@ pub mod main_key {
    pub const USER_DEFINED_SEARCHABLE_FIELDS_KEY: &str = "user-defined-searchable-fields";
    pub const SOFT_EXTERNAL_DOCUMENTS_IDS_KEY: &str = "soft-external-documents-ids";
    pub const STOP_WORDS_KEY: &str = "stop-words";
-    pub const NON_SEPARATOR_TOKENS_KEY: &str = "non-separator-tokens";
-    pub const SEPARATOR_TOKENS_KEY: &str = "separator-tokens";
-    pub const DICTIONARY_KEY: &str = "dictionary";
    pub const STRING_FACETED_DOCUMENTS_IDS_PREFIX: &str = "string-faceted-documents-ids";
    pub const SYNONYMS_KEY: &str = "synonyms";
-    pub const USER_DEFINED_SYNONYMS_KEY: &str = "user-defined-synonyms";
    pub const WORDS_FST_KEY: &str = "words-fst";
    pub const WORDS_PREFIXES_FST_KEY: &str = "words-prefixes-fst";
    pub const CREATED_AT_KEY: &str = "created-at";
@@ -655,26 +651,6 @@ impl Index {
        }
    }

-    /* remove hidden fields */
-    pub fn remove_hidden_fields(
-        &self,
-        rtxn: &RoTxn,
-        fields: impl IntoIterator<Item = impl AsRef<str>>,
-    ) -> Result<(BTreeSet<String>, bool)> {
-        let mut valid_fields =
-            fields.into_iter().map(|f| f.as_ref().to_string()).collect::<BTreeSet<String>>();
-
-        let fields_len = valid_fields.len();
-
-        if let Some(dn) = self.displayed_fields(rtxn)? {
-            let displayable_names = dn.iter().map(|s| s.to_string()).collect();
-            valid_fields = &valid_fields & &displayable_names;
-        }
-
-        let hidden_fields = fields_len > valid_fields.len();
-        Ok((valid_fields, hidden_fields))
-    }
-
    /* searchable fields */

    /// Write the user defined searchable fields and generate the real searchable fields from the specified fields ids map.
@@ -1079,116 +1055,18 @@ impl Index {
        }
    }

-    /* non separator tokens */
-
-    pub(crate) fn put_non_separator_tokens(
-        &self,
-        wtxn: &mut RwTxn,
-        set: &BTreeSet<String>,
-    ) -> heed::Result<()> {
-        self.main.put::<_, Str, SerdeBincode<_>>(wtxn, main_key::NON_SEPARATOR_TOKENS_KEY, set)
-    }
-
-    pub(crate) fn delete_non_separator_tokens(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
-        self.main.delete::<_, Str>(wtxn, main_key::NON_SEPARATOR_TOKENS_KEY)
-    }
-
-    pub fn non_separator_tokens(&self, rtxn: &RoTxn) -> Result<Option<BTreeSet<String>>> {
-        Ok(self.main.get::<_, Str, SerdeBincode<BTreeSet<String>>>(
-            rtxn,
-            main_key::NON_SEPARATOR_TOKENS_KEY,
-        )?)
-    }
-
-    /* separator tokens */
-
-    pub(crate) fn put_separator_tokens(
-        &self,
-        wtxn: &mut RwTxn,
-        set: &BTreeSet<String>,
-    ) -> heed::Result<()> {
-        self.main.put::<_, Str, SerdeBincode<_>>(wtxn, main_key::SEPARATOR_TOKENS_KEY, set)
-    }
-
-    pub(crate) fn delete_separator_tokens(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
-        self.main.delete::<_, Str>(wtxn, main_key::SEPARATOR_TOKENS_KEY)
-    }
-
-    pub fn separator_tokens(&self, rtxn: &RoTxn) -> Result<Option<BTreeSet<String>>> {
-        Ok(self
-            .main
-            .get::<_, Str, SerdeBincode<BTreeSet<String>>>(rtxn, main_key::SEPARATOR_TOKENS_KEY)?)
-    }
-
-    /* separators easing method */
-
-    pub fn allowed_separators(&self, rtxn: &RoTxn) -> Result<Option<BTreeSet<String>>> {
-        let default_separators =
-            charabia::separators::DEFAULT_SEPARATORS.iter().map(|s| s.to_string());
-        let mut separators: Option<BTreeSet<_>> = None;
-        if let Some(mut separator_tokens) = self.separator_tokens(rtxn)? {
-            separator_tokens.extend(default_separators.clone());
-            separators = Some(separator_tokens);
-        }
-
-        if let Some(non_separator_tokens) = self.non_separator_tokens(rtxn)? {
-            separators = separators
-                .or_else(|| Some(default_separators.collect()))
-                .map(|separators| &separators - &non_separator_tokens);
-        }
-
-        Ok(separators)
-    }
-
-    /* dictionary */
-
-    pub(crate) fn put_dictionary(
-        &self,
-        wtxn: &mut RwTxn,
-        set: &BTreeSet<String>,
-    ) -> heed::Result<()> {
-        self.main.put::<_, Str, SerdeBincode<_>>(wtxn, main_key::DICTIONARY_KEY, set)
-    }
-
-    pub(crate) fn delete_dictionary(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
-        self.main.delete::<_, Str>(wtxn, main_key::DICTIONARY_KEY)
-    }
-
-    pub fn dictionary(&self, rtxn: &RoTxn) -> Result<Option<BTreeSet<String>>> {
-        Ok(self
-            .main
-            .get::<_, Str, SerdeBincode<BTreeSet<String>>>(rtxn, main_key::DICTIONARY_KEY)?)
-    }
-
    /* synonyms */

    pub(crate) fn put_synonyms(
        &self,
        wtxn: &mut RwTxn,
        synonyms: &HashMap<Vec<String>, Vec<Vec<String>>>,
-        user_defined_synonyms: &BTreeMap<String, Vec<String>>,
    ) -> heed::Result<()> {
-        self.main.put::<_, Str, SerdeBincode<_>>(wtxn, main_key::SYNONYMS_KEY, synonyms)?;
-        self.main.put::<_, Str, SerdeBincode<_>>(
-            wtxn,
-            main_key::USER_DEFINED_SYNONYMS_KEY,
-            user_defined_synonyms,
-        )
+        self.main.put::<_, Str, SerdeBincode<_>>(wtxn, main_key::SYNONYMS_KEY, synonyms)
    }

    pub(crate) fn delete_synonyms(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
-        self.main.delete::<_, Str>(wtxn, main_key::SYNONYMS_KEY)?;
-        self.main.delete::<_, Str>(wtxn, main_key::USER_DEFINED_SYNONYMS_KEY)
-    }
-
-    pub fn user_defined_synonyms(
-        &self,
-        rtxn: &RoTxn,
-    ) -> heed::Result<BTreeMap<String, Vec<String>>> {
-        Ok(self
-            .main
-            .get::<_, Str, SerdeBincode<_>>(rtxn, main_key::USER_DEFINED_SYNONYMS_KEY)?
-            .unwrap_or_default())
+        self.main.delete::<_, Str>(wtxn, main_key::SYNONYMS_KEY)
    }

    pub fn synonyms(&self, rtxn: &RoTxn) -> heed::Result<HashMap<Vec<String>, Vec<Vec<String>>>> {
@@ -1840,11 +1718,11 @@ pub(crate) mod tests {
            .unwrap();
        index
            .add_documents(documents!([
-                { "id": 0, "_geo": { "lat": "0", "lng": "0" } },
-                { "id": 1, "_geo": { "lat": 0, "lng": "-175" } },
-                { "id": 2, "_geo": { "lat": "0", "lng": 175 } },
+                { "id": 0, "_geo": { "lat": 0, "lng": 0 } },
+                { "id": 1, "_geo": { "lat": 0, "lng": -175 } },
+                { "id": 2, "_geo": { "lat": 0, "lng": 175 } },
                { "id": 3, "_geo": { "lat": 85, "lng": 0 } },
-                { "id": 4, "_geo": { "lat": "-85", "lng": "0" } },
+                { "id": 4, "_geo": { "lat": -85, "lng": 0 } },
            ]))
            .unwrap();

--- a/milli/src/lib.rs
+++ b/milli/src/lib.rs
@@ -97,7 +97,7 @@ const MAX_LMDB_KEY_LENGTH: usize = 500;
 ///
 /// This number is determined by the keys of the different facet databases
 /// and adding a margin of safety.
-pub const MAX_FACET_VALUE_LENGTH: usize = MAX_LMDB_KEY_LENGTH - 32;
+pub const MAX_FACET_VALUE_LENGTH: usize = MAX_LMDB_KEY_LENGTH - 20;

 /// The maximum length a word can be
 pub const MAX_WORD_LENGTH: usize = MAX_LMDB_KEY_LENGTH / 2;
@@ -293,15 +293,15 @@ pub fn normalize_facet(original: &str) -> String {
 #[derive(serde::Serialize, serde::Deserialize, Debug)]
 #[serde(transparent)]
 pub struct VectorOrArrayOfVectors {
-    #[serde(with = "either::serde_untagged_optional")]
-    inner: Option<either::Either<Vec<f32>, Vec<Vec<f32>>>>,
+    #[serde(with = "either::serde_untagged")]
+    inner: either::Either<Vec<f32>, Vec<Vec<f32>>>,
 }

 impl VectorOrArrayOfVectors {
-    pub fn into_array_of_vectors(self) -> Option<Vec<Vec<f32>>> {
-        match self.inner? {
-            either::Either::Left(vector) => Some(vec![vector]),
-            either::Either::Right(vectors) => Some(vectors),
+    pub fn into_array_of_vectors(self) -> Vec<Vec<f32>> {
+        match self.inner {
+            either::Either::Left(vector) => vec![vector],
+            either::Either::Right(vectors) => vectors,
        }
    }
 }
--- a/milli/src/search/mod.rs
+++ b/milli/src/search/mod.rs
@@ -280,13 +280,9 @@ impl<'a> SearchForFacetValues<'a> {

        let filterable_fields = index.filterable_fields(rtxn)?;
        if !filterable_fields.contains(&self.facet) {
-            let (valid_fields, hidden_fields) =
-                index.remove_hidden_fields(rtxn, filterable_fields)?;
-
            return Err(UserError::InvalidFacetSearchFacetName {
                field: self.facet.clone(),
-                valid_fields,
-                hidden_fields,
+                valid_fields: filterable_fields.into_iter().collect(),
            }
            .into());
        }
--- a/milli/src/search/new/bucket_sort.rs
+++ b/milli/src/search/new/bucket_sort.rs
@@ -46,27 +46,18 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
        if let Some(distinct_fid) = distinct_fid {
            let mut excluded = RoaringBitmap::new();
            let mut results = vec![];
-            let mut skip = 0;
            for docid in universe.iter() {
-                if results.len() >= length {
+                if results.len() >= from + length {
                    break;
                }
                if excluded.contains(docid) {
                    continue;
                }
-
                distinct_single_docid(ctx.index, ctx.txn, distinct_fid, docid, &mut excluded)?;
-                skip += 1;
-                if skip <= from {
-                    continue;
-                }
-
                results.push(docid);
            }
-
            let mut all_candidates = universe - excluded;
            all_candidates.extend(results.iter().copied());
-
            return Ok(BucketSortOutput {
                scores: vec![Default::default(); results.len()],
                docids: results,
@@ -100,12 +91,11 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
    /// Update the universes accordingly and inform the logger.
    macro_rules! back {
        () => {
-            // FIXME: temporarily disabled assert: see <https://github.com/meilisearch/meilisearch/pull/4013>
-            // assert!(
-            //     ranking_rule_universes[cur_ranking_rule_index].is_empty(),
-            //     "The ranking rule {} did not sort its bucket exhaustively",
-            //     ranking_rules[cur_ranking_rule_index].id()
-            // );
+            assert!(
+                ranking_rule_universes[cur_ranking_rule_index].is_empty(),
+                "The ranking rule {} did not sort its bucket exhaustively",
+                ranking_rules[cur_ranking_rule_index].id()
+            );
            logger.end_iteration_ranking_rule(
                cur_ranking_rule_index,
                ranking_rules[cur_ranking_rule_index].as_ref(),
--- a/milli/src/search/new/matches/mod.rs
+++ b/milli/src/search/new/matches/mod.rs
@@ -418,11 +418,19 @@ impl<'t> Matcher<'t, '_> {
        } else {
            match &self.matches {
                Some((tokens, matches)) => {
+                    // If the text has to be cropped,
+                    // compute the best interval to crop around.
+                    let matches = match format_options.crop {
+                        Some(crop_size) if crop_size > 0 => {
+                            self.find_best_match_interval(matches, crop_size)
+                        }
+                        _ => matches,
+                    };
+
                    // If the text has to be cropped,
                    // crop around the best interval.
                    let (byte_start, byte_end) = match format_options.crop {
                        Some(crop_size) if crop_size > 0 => {
-                            let matches = self.find_best_match_interval(matches, crop_size);
                            self.crop_bounds(tokens, matches, crop_size)
                        }
                        _ => (0, self.text.len()),
@@ -442,11 +450,6 @@ impl<'t> Matcher<'t, '_> {
                        for m in matches {
                            let token = &tokens[m.token_position];

-                            // skip matches out of the crop window.
-                            if token.byte_start < byte_start || token.byte_end > byte_end {
-                                continue;
-                            }
-
                            if byte_index < token.byte_start {
                                formatted.push(&self.text[byte_index..token.byte_start]);
                            }
@@ -797,37 +800,6 @@ mod tests {
        );
    }

-    #[test]
-    fn format_highlight_crop_phrase_query() {
-        //! testing: https://github.com/meilisearch/meilisearch/issues/3975
-        let temp_index = TempIndex::new();
-        temp_index
-            .add_documents(documents!([
-                { "id": 1, "text": "The groundbreaking invention had the power to split the world between those who embraced progress and those who resisted change!" }
-            ]))
-            .unwrap();
-        let rtxn = temp_index.read_txn().unwrap();
-
-        let format_options = FormatOptions { highlight: true, crop: Some(10) };
-        let text = "The groundbreaking invention had the power to split the world between those who embraced progress and those who resisted change!";
-
-        let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "\"the world\"");
-        let mut matcher = builder.build(text);
-        // should return 10 words with a marker at the start as well the end, and the highlighted matches.
-        insta::assert_snapshot!(
-            matcher.format(format_options),
-            @"…had the power to split <em>the</em> <em>world</em> between those who…"
-        );
-
-        let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "those \"and those\"");
-        let mut matcher = builder.build(text);
-        // should highlight "those" and the phrase "and those".
-        insta::assert_snapshot!(
-            matcher.format(format_options),
-            @"…world between <em>those</em> who embraced progress <em>and</em> <em>those</em> who resisted…"
-        );
-    }
-
    #[test]
    fn smaller_crop_size() {
        //! testing: https://github.com/meilisearch/specifications/pull/120#discussion_r836536295
--- a/milli/src/search/new/mod.rs
+++ b/milli/src/search/new/mod.rs
@@ -20,7 +20,7 @@ mod sort;
 #[cfg(test)]
 mod tests;

-use std::collections::HashSet;
+use std::collections::{BTreeSet, HashSet};

 use bucket_sort::{bucket_sort, BucketSortOutput};
 use charabia::TokenizerBuilder;
@@ -108,11 +108,24 @@ impl<'ctx> SearchContext<'ctx> {
                (None, None) => continue,
                // The field is not searchable => User error
                (_fid, Some(false)) => {
-                    let (valid_fields, hidden_fields) = match searchable_names {
-                        Some(sn) => self.index.remove_hidden_fields(self.txn, sn)?,
-                        None => self.index.remove_hidden_fields(self.txn, fids_map.names())?,
-                    };
+                    let mut valid_fields: BTreeSet<_> =
+                        fids_map.names().map(String::from).collect();

+                    // Filter by the searchable names
+                    if let Some(sn) = searchable_names {
+                        let searchable_names = sn.iter().map(|s| s.to_string()).collect();
+                        valid_fields = &valid_fields & &searchable_names;
+                    }
+
+                    let searchable_count = valid_fields.len();
+
+                    // Remove hidden fields
+                    if let Some(dn) = self.index.displayed_fields(self.txn)? {
+                        let displayable_names = dn.iter().map(|s| s.to_string()).collect();
+                        valid_fields = &valid_fields & &displayable_names;
+                    }
+
+                    let hidden_fields = searchable_count > valid_fields.len();
                    let field = field_name.to_string();
                    return Err(UserError::InvalidSearchableAttribute {
                        field,
@@ -475,20 +488,6 @@ pub fn execute_search(
            tokbuilder.stop_words(stop_words);
        }

-        let separators = ctx.index.allowed_separators(ctx.txn)?;
-        let separators: Option<Vec<_>> =
-            separators.as_ref().map(|x| x.iter().map(String::as_str).collect());
-        if let Some(ref separators) = separators {
-            tokbuilder.separators(separators);
-        }
-
-        let dictionary = ctx.index.dictionary(ctx.txn)?;
-        let dictionary: Option<Vec<_>> =
-            dictionary.as_ref().map(|x| x.iter().map(String::as_str).collect());
-        if let Some(ref dictionary) = dictionary {
-            tokbuilder.words_dict(dictionary);
-        }
-
        let script_lang_map = ctx.index.script_language(ctx.txn)?;
        if !script_lang_map.is_empty() {
            tokbuilder.allow_list(&script_lang_map);
@@ -591,24 +590,16 @@ fn check_sort_criteria(ctx: &SearchContext, sort_criteria: Option<&Vec<AscDesc>>
    for asc_desc in sort_criteria {
        match asc_desc.member() {
            Member::Field(ref field) if !crate::is_faceted(field, &sortable_fields) => {
-                let (valid_fields, hidden_fields) =
-                    ctx.index.remove_hidden_fields(ctx.txn, sortable_fields)?;
-
                return Err(UserError::InvalidSortableAttribute {
                    field: field.to_string(),
-                    valid_fields,
-                    hidden_fields,
-                })?;
+                    valid_fields: sortable_fields.into_iter().collect(),
+                })?
            }
            Member::Geo(_) if !sortable_fields.contains("_geo") => {
-                let (valid_fields, hidden_fields) =
-                    ctx.index.remove_hidden_fields(ctx.txn, sortable_fields)?;
-
                return Err(UserError::InvalidSortableAttribute {
                    field: "_geo".to_string(),
-                    valid_fields,
-                    hidden_fields,
-                })?;
+                    valid_fields: sortable_fields.into_iter().collect(),
+                })?
            }
            _ => (),
        }
--- a/milli/src/search/new/tests/integration.rs
+++ b/milli/src/search/new/tests/integration.rs
@@ -2,7 +2,7 @@ use std::io::Cursor;

 use big_s::S;
 use heed::EnvOpenOptions;
-use maplit::{btreemap, hashset};
+use maplit::{hashmap, hashset};

 use crate::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
 use crate::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings};
@@ -33,7 +33,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
        S("tag"),
        S("asc_desc_rank"),
    });
-    builder.set_synonyms(btreemap! {
+    builder.set_synonyms(hashmap! {
        S("hello") => vec![S("good morning")],
        S("world") => vec![S("earth")],
        S("america") => vec![S("the united states")],
--- a/milli/src/search/new/tests/proximity.rs
+++ b/milli/src/search/new/tests/proximity.rs
@@ -15,7 +15,7 @@ they store fewer sprximities than the regular word sprximity DB.

 */

-use std::collections::BTreeMap;
+use std::collections::HashMap;

 use crate::index::tests::TempIndex;
 use crate::search::new::tests::collect_field_values;
@@ -336,7 +336,7 @@ fn test_proximity_split_word() {

    index
        .update_settings(|s| {
-            let mut syns = BTreeMap::new();
+            let mut syns = HashMap::new();
            syns.insert("xyz".to_owned(), vec!["sun flower".to_owned()]);
            s.set_synonyms(syns);
        })
--- a/milli/src/search/new/tests/typo.rs
+++ b/milli/src/search/new/tests/typo.rs
@@ -18,7 +18,7 @@ if `words` doesn't exist before it.
 14. Synonyms cost nothing according to the typo ranking rule
 */

-use std::collections::BTreeMap;
+use std::collections::HashMap;

 use crate::index::tests::TempIndex;
 use crate::search::new::tests::collect_field_values;
@@ -591,7 +591,7 @@ fn test_typo_synonyms() {
        .update_settings(|s| {
            s.set_criteria(vec![Criterion::Typo]);

-            let mut synonyms = BTreeMap::new();
+            let mut synonyms = HashMap::new();
            synonyms.insert("lackadaisical".to_owned(), vec!["lazy".to_owned()]);
            synonyms.insert("fast brownish".to_owned(), vec!["quick brown".to_owned()]);

--- a/milli/src/update/facet/bulk.rs
+++ b/milli/src/update/facet/bulk.rs
@@ -1,6 +1,5 @@
 use std::borrow::Cow;
 use std::fs::File;
-use std::io::BufReader;

 use grenad::CompressionType;
 use heed::types::ByteSlice;
@@ -31,7 +30,7 @@ pub struct FacetsUpdateBulk<'i> {
    facet_type: FacetType,
    field_ids: Vec<FieldId>,
    // None if level 0 does not need to be updated
-    new_data: Option<grenad::Reader<BufReader<File>>>,
+    new_data: Option<grenad::Reader<File>>,
 }

 impl<'i> FacetsUpdateBulk<'i> {
@@ -39,7 +38,7 @@ impl<'i> FacetsUpdateBulk<'i> {
        index: &'i Index,
        field_ids: Vec<FieldId>,
        facet_type: FacetType,
-        new_data: grenad::Reader<BufReader<File>>,
+        new_data: grenad::Reader<File>,
        group_size: u8,
        min_level_size: u8,
    ) -> FacetsUpdateBulk<'i> {
@@ -188,7 +187,7 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
        &self,
        field_id: FieldId,
        txn: &RoTxn,
-    ) -> Result<(Vec<grenad::Reader<BufReader<File>>>, RoaringBitmap)> {
+    ) -> Result<(Vec<grenad::Reader<File>>, RoaringBitmap)> {
        let mut all_docids = RoaringBitmap::new();
        let subwriters = self.compute_higher_levels(txn, field_id, 32, &mut |bitmaps, _| {
            for bitmap in bitmaps {
@@ -260,7 +259,7 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
        field_id: u16,
        level: u8,
        handle_group: &mut dyn FnMut(&[RoaringBitmap], &'t [u8]) -> Result<()>,
-    ) -> Result<Vec<grenad::Reader<BufReader<File>>>> {
+    ) -> Result<Vec<grenad::Reader<File>>> {
        if level == 0 {
            self.read_level_0(rtxn, field_id, handle_group)?;
            // Level 0 is already in the database
--- a/milli/src/update/facet/incremental.rs
+++ b/milli/src/update/facet/incremental.rs
@@ -1,6 +1,5 @@
 use std::collections::HashMap;
 use std::fs::File;
-use std::io::BufReader;

 use heed::types::{ByteSlice, DecodeIgnore};
 use heed::{BytesDecode, Error, RoTxn, RwTxn};
@@ -35,14 +34,14 @@ pub struct FacetsUpdateIncremental<'i> {
    index: &'i Index,
    inner: FacetsUpdateIncrementalInner,
    facet_type: FacetType,
-    new_data: grenad::Reader<BufReader<File>>,
+    new_data: grenad::Reader<File>,
 }

 impl<'i> FacetsUpdateIncremental<'i> {
    pub fn new(
        index: &'i Index,
        facet_type: FacetType,
-        new_data: grenad::Reader<BufReader<File>>,
+        new_data: grenad::Reader<File>,
        group_size: u8,
        min_level_size: u8,
        max_group_size: u8,
--- a/milli/src/update/facet/mod.rs
+++ b/milli/src/update/facet/mod.rs
@@ -78,7 +78,6 @@ pub const FACET_MIN_LEVEL_SIZE: u8 = 5;

 use std::collections::BTreeSet;
 use std::fs::File;
-use std::io::BufReader;
 use std::iter::FromIterator;

 use charabia::normalizer::{Normalize, NormalizerOption};
@@ -95,7 +94,7 @@ use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValu
 use crate::heed_codec::ByteSliceRefCodec;
 use crate::update::index_documents::create_sorter;
 use crate::update::merge_btreeset_string;
-use crate::{BEU16StrCodec, Index, Result, BEU16, MAX_FACET_VALUE_LENGTH};
+use crate::{BEU16StrCodec, Index, Result, BEU16};

 pub mod bulk;
 pub mod delete;
@@ -109,17 +108,13 @@ pub struct FacetsUpdate<'i> {
    index: &'i Index,
    database: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
    facet_type: FacetType,
-    new_data: grenad::Reader<BufReader<File>>,
+    new_data: grenad::Reader<File>,
    group_size: u8,
    max_group_size: u8,
    min_level_size: u8,
 }
 impl<'i> FacetsUpdate<'i> {
-    pub fn new(
-        index: &'i Index,
-        facet_type: FacetType,
-        new_data: grenad::Reader<BufReader<File>>,
-    ) -> Self {
+    pub fn new(index: &'i Index, facet_type: FacetType, new_data: grenad::Reader<File>) -> Self {
        let database = match facet_type {
            FacetType::String => index
                .facet_id_string_docids
@@ -196,16 +191,7 @@ impl<'i> FacetsUpdate<'i> {
        for result in database.iter(wtxn)? {
            let (facet_group_key, ()) = result?;
            if let FacetGroupKey { field_id, level: 0, left_bound } = facet_group_key {
-                let mut normalized_facet = left_bound.normalize(&options);
-                let normalized_truncated_facet: String;
-                if normalized_facet.len() > MAX_FACET_VALUE_LENGTH {
-                    normalized_truncated_facet = normalized_facet
-                        .char_indices()
-                        .take_while(|(idx, _)| *idx < MAX_FACET_VALUE_LENGTH)
-                        .map(|(_, c)| c)
-                        .collect();
-                    normalized_facet = normalized_truncated_facet.into();
-                }
+                let normalized_facet = left_bound.normalize(&options);
                let set = BTreeSet::from_iter(std::iter::once(left_bound));
                let key = (field_id, normalized_facet.as_ref());
                let key = BEU16StrCodec::bytes_encode(&key).ok_or(heed::Error::Encoding)?;
--- a/milli/src/update/index_documents/enrich.rs
+++ b/milli/src/update/index_documents/enrich.rs
@@ -1,4 +1,4 @@
-use std::io::{BufWriter, Read, Seek};
+use std::io::{Read, Seek};
 use std::result::Result as StdResult;
 use std::{fmt, iter};

@@ -35,7 +35,7 @@ pub fn enrich_documents_batch<R: Read + Seek>(

    let (mut cursor, mut documents_batch_index) = reader.into_cursor_and_fields_index();

-    let mut external_ids = tempfile::tempfile().map(BufWriter::new).map(grenad::Writer::new)?;
+    let mut external_ids = tempfile::tempfile().map(grenad::Writer::new)?;
    let mut uuid_buffer = [0; uuid::fmt::Hyphenated::LENGTH];

    // The primary key *field id* that has already been set for this index or the one
--- a/milli/src/update/index_documents/extract/extract_docid_word_positions.rs
+++ b/milli/src/update/index_documents/extract/extract_docid_word_positions.rs
@@ -1,7 +1,6 @@
 use std::collections::{HashMap, HashSet};
 use std::convert::TryInto;
 use std::fs::File;
-use std::io::BufReader;
 use std::{io, mem, str};

 use charabia::{Language, Script, SeparatorKind, Token, TokenKind, Tokenizer, TokenizerBuilder};
@@ -29,10 +28,8 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
    indexer: GrenadParameters,
    searchable_fields: &Option<HashSet<FieldId>>,
    stop_words: Option<&fst::Set<&[u8]>>,
-    allowed_separators: Option<&[&str]>,
-    dictionary: Option<&[&str]>,
    max_positions_per_attributes: Option<u32>,
-) -> Result<(RoaringBitmap, grenad::Reader<BufReader<File>>, ScriptLanguageDocidsMap)> {
+) -> Result<(RoaringBitmap, grenad::Reader<File>, ScriptLanguageDocidsMap)> {
    puffin::profile_function!();

    let max_positions_per_attributes = max_positions_per_attributes
@@ -55,12 +52,6 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
    if let Some(stop_words) = stop_words {
        tokenizer_builder.stop_words(stop_words);
    }
-    if let Some(dictionary) = dictionary {
-        tokenizer_builder.words_dict(dictionary);
-    }
-    if let Some(separators) = allowed_separators {
-        tokenizer_builder.separators(separators);
-    }
    let tokenizer = tokenizer_builder.build();

    let mut cursor = obkv_documents.into_cursor()?;
@@ -227,9 +218,9 @@ fn process_tokens<'a>(
 ) -> impl Iterator<Item = (usize, Token<'a>)> {
    tokens
        .skip_while(|token| token.is_separator())
-        .scan((0, None), |(offset, prev_kind), mut token| {
+        .scan((0, None), |(offset, prev_kind), token| {
            match token.kind {
-                TokenKind::Word | TokenKind::StopWord if !token.lemma().is_empty() => {
+                TokenKind::Word | TokenKind::StopWord | TokenKind::Unknown => {
                    *offset += match *prev_kind {
                        Some(TokenKind::Separator(SeparatorKind::Hard)) => 8,
                        Some(_) => 1,
@@ -245,7 +236,7 @@ fn process_tokens<'a>(
                {
                    *prev_kind = Some(token.kind);
                }
-                _ => token.kind = TokenKind::Unknown,
+                _ => (),
            }
            Some((*offset, token))
        })
--- a/milli/src/update/index_documents/extract/extract_facet_number_docids.rs
+++ b/milli/src/update/index_documents/extract/extract_facet_number_docids.rs
@@ -1,5 +1,5 @@
 use std::fs::File;
-use std::io::{self, BufReader};
+use std::io;

 use heed::{BytesDecode, BytesEncode};

@@ -19,7 +19,7 @@ use crate::Result;
 pub fn extract_facet_number_docids<R: io::Read + io::Seek>(
    docid_fid_facet_number: grenad::Reader<R>,
    indexer: GrenadParameters,
-) -> Result<grenad::Reader<BufReader<File>>> {
+) -> Result<grenad::Reader<File>> {
    puffin::profile_function!();

    let max_memory = indexer.max_memory_by_thread();
--- a/milli/src/update/index_documents/extract/extract_facet_string_docids.rs
+++ b/milli/src/update/index_documents/extract/extract_facet_string_docids.rs
@@ -1,5 +1,5 @@
 use std::fs::File;
-use std::io::{self, BufReader};
+use std::io;

 use heed::BytesEncode;

@@ -17,7 +17,7 @@ use crate::{FieldId, Result, MAX_FACET_VALUE_LENGTH};
 pub fn extract_facet_string_docids<R: io::Read + io::Seek>(
    docid_fid_facet_string: grenad::Reader<R>,
    indexer: GrenadParameters,
-) -> Result<grenad::Reader<BufReader<File>>> {
+) -> Result<grenad::Reader<File>> {
    puffin::profile_function!();

    let max_memory = indexer.max_memory_by_thread();
@@ -46,7 +46,7 @@ pub fn extract_facet_string_docids<R: io::Read + io::Seek>(
        if normalised_value.len() > MAX_FACET_VALUE_LENGTH {
            normalised_truncated_value = normalised_value
                .char_indices()
-                .take_while(|(idx, _)| *idx < MAX_FACET_VALUE_LENGTH)
+                .take_while(|(idx, _)| idx + 4 < MAX_FACET_VALUE_LENGTH)
                .map(|(_, c)| c)
                .collect();
            normalised_value = normalised_truncated_value.as_str();
--- a/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs
+++ b/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs
@@ -1,7 +1,7 @@
 use std::collections::{BTreeMap, HashSet};
 use std::convert::TryInto;
 use std::fs::File;
-use std::io::{self, BufReader};
+use std::io;
 use std::mem::size_of;

 use heed::zerocopy::AsBytes;
@@ -17,24 +17,22 @@ use crate::{CboRoaringBitmapCodec, DocumentId, FieldId, Result, BEU32, MAX_FACET

 /// The extracted facet values stored in grenad files by type.
 pub struct ExtractedFacetValues {
-    pub docid_fid_facet_numbers_chunk: grenad::Reader<BufReader<File>>,
-    pub docid_fid_facet_strings_chunk: grenad::Reader<BufReader<File>>,
-    pub fid_facet_is_null_docids_chunk: grenad::Reader<BufReader<File>>,
-    pub fid_facet_is_empty_docids_chunk: grenad::Reader<BufReader<File>>,
-    pub fid_facet_exists_docids_chunk: grenad::Reader<BufReader<File>>,
+    pub docid_fid_facet_numbers_chunk: grenad::Reader<File>,
+    pub docid_fid_facet_strings_chunk: grenad::Reader<File>,
+    pub fid_facet_is_null_docids_chunk: grenad::Reader<File>,
+    pub fid_facet_is_empty_docids_chunk: grenad::Reader<File>,
+    pub fid_facet_exists_docids_chunk: grenad::Reader<File>,
 }

 /// Extracts the facet values of each faceted field of each document.
 ///
 /// Returns the generated grenad reader containing the docid the fid and the orginal value as key
 /// and the normalized value as value extracted from the given chunk of documents.
-/// We need the fid of the geofields to correctly parse them as numbers if they were sent as strings initially.
 #[logging_timer::time]
 pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
    obkv_documents: grenad::Reader<R>,
    indexer: GrenadParameters,
    faceted_fields: &HashSet<FieldId>,
-    geo_fields_ids: Option<(FieldId, FieldId)>,
 ) -> Result<ExtractedFacetValues> {
    puffin::profile_function!();

@@ -86,10 +84,7 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(

                let value = from_slice(field_bytes).map_err(InternalError::SerdeJson)?;

-                match extract_facet_values(
-                    &value,
-                    geo_fields_ids.map_or(false, |(lat, lng)| field_id == lat || field_id == lng),
-                ) {
+                match extract_facet_values(&value) {
                    FilterableValues::Null => {
                        facet_is_null_docids.entry(field_id).or_default().insert(document);
                    }
@@ -182,13 +177,12 @@ enum FilterableValues {
    Values { numbers: Vec<f64>, strings: Vec<(String, String)> },
 }

-fn extract_facet_values(value: &Value, geo_field: bool) -> FilterableValues {
+fn extract_facet_values(value: &Value) -> FilterableValues {
    fn inner_extract_facet_values(
        value: &Value,
        can_recurse: bool,
        output_numbers: &mut Vec<f64>,
        output_strings: &mut Vec<(String, String)>,
-        geo_field: bool,
    ) {
        match value {
            Value::Null => (),
@@ -199,30 +193,13 @@ fn extract_facet_values(value: &Value, geo_field: bool) -> FilterableValues {
                }
            }
            Value::String(original) => {
-                // if we're working on a geofield it MUST be something we can parse or else there was an internal error
-                // in the enrich pipeline. But since the enrich pipeline worked, we want to avoid crashing at all costs.
-                if geo_field {
-                    if let Ok(float) = original.parse() {
-                        output_numbers.push(float);
-                    } else {
-                        log::warn!(
-                            "Internal error, could not parse a geofield that has been validated. Please open an issue."
-                        )
-                    }
-                }
                let normalized = crate::normalize_facet(original);
                output_strings.push((normalized, original.clone()));
            }
            Value::Array(values) => {
                if can_recurse {
                    for value in values {
-                        inner_extract_facet_values(
-                            value,
-                            false,
-                            output_numbers,
-                            output_strings,
-                            geo_field,
-                        );
+                        inner_extract_facet_values(value, false, output_numbers, output_strings);
                    }
                }
            }
@@ -238,7 +215,7 @@ fn extract_facet_values(value: &Value, geo_field: bool) -> FilterableValues {
        otherwise => {
            let mut numbers = Vec::new();
            let mut strings = Vec::new();
-            inner_extract_facet_values(otherwise, true, &mut numbers, &mut strings, geo_field);
+            inner_extract_facet_values(otherwise, true, &mut numbers, &mut strings);
            FilterableValues::Values { numbers, strings }
        }
    }
--- a/milli/src/update/index_documents/extract/extract_fid_word_count_docids.rs
+++ b/milli/src/update/index_documents/extract/extract_fid_word_count_docids.rs
@@ -1,6 +1,6 @@
 use std::collections::HashMap;
 use std::fs::File;
-use std::io::{self, BufReader};
+use std::io;

 use grenad::Sorter;

@@ -21,7 +21,7 @@ use crate::{relative_from_absolute_position, DocumentId, FieldId, Result};
 pub fn extract_fid_word_count_docids<R: io::Read + io::Seek>(
    docid_word_positions: grenad::Reader<R>,
    indexer: GrenadParameters,
-) -> Result<grenad::Reader<BufReader<File>>> {
+) -> Result<grenad::Reader<File>> {
    puffin::profile_function!();

    let max_memory = indexer.max_memory_by_thread();
--- a/milli/src/update/index_documents/extract/extract_geo_points.rs
+++ b/milli/src/update/index_documents/extract/extract_geo_points.rs
@@ -1,5 +1,5 @@
 use std::fs::File;
-use std::io::{self, BufReader};
+use std::io;

 use concat_arrays::concat_arrays;
 use serde_json::Value;
@@ -18,7 +18,7 @@ pub fn extract_geo_points<R: io::Read + io::Seek>(
    indexer: GrenadParameters,
    primary_key_id: FieldId,
    (lat_fid, lng_fid): (FieldId, FieldId),
-) -> Result<grenad::Reader<BufReader<File>>> {
+) -> Result<grenad::Reader<File>> {
    puffin::profile_function!();

    let mut writer = create_writer(
--- a/milli/src/update/index_documents/extract/extract_vector_points.rs
+++ b/milli/src/update/index_documents/extract/extract_vector_points.rs
@@ -1,6 +1,6 @@
 use std::convert::TryFrom;
 use std::fs::File;
-use std::io::{self, BufReader};
+use std::io;

 use bytemuck::cast_slice;
 use serde_json::{from_slice, Value};
@@ -18,7 +18,7 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
    indexer: GrenadParameters,
    primary_key_id: FieldId,
    vectors_fid: FieldId,
-) -> Result<grenad::Reader<BufReader<File>>> {
+) -> Result<grenad::Reader<File>> {
    puffin::profile_function!();

    let mut writer = create_writer(
@@ -35,7 +35,7 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
        // lazily get it when needed
        let document_id = || -> Value {
            let document_id = obkv.get(primary_key_id).unwrap();
-            from_slice(document_id).unwrap()
+            serde_json::from_slice(document_id).unwrap()
        };

        // first we retrieve the _vectors field
@@ -52,14 +52,12 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
                }
            };

-            if let Some(vectors) = vectors {
-                for (i, vector) in vectors.into_iter().enumerate().take(u16::MAX as usize) {
-                    let index = u16::try_from(i).unwrap();
-                    let mut key = docid_bytes.to_vec();
-                    key.extend_from_slice(&index.to_be_bytes());
-                    let bytes = cast_slice(&vector);
-                    writer.insert(key, bytes)?;
-                }
+            for (i, vector) in vectors.into_iter().enumerate().take(u16::MAX as usize) {
+                let index = u16::try_from(i).unwrap();
+                let mut key = docid_bytes.to_vec();
+                key.extend_from_slice(&index.to_be_bytes());
+                let bytes = cast_slice(&vector);
+                writer.insert(key, bytes)?;
            }
        }
        // else => the `_vectors` object was `null`, there is nothing to do
--- a/milli/src/update/index_documents/extract/extract_word_docids.rs
+++ b/milli/src/update/index_documents/extract/extract_word_docids.rs
@@ -1,6 +1,6 @@
 use std::collections::HashSet;
 use std::fs::File;
-use std::io::{self, BufReader};
+use std::io;
 use std::iter::FromIterator;

 use roaring::RoaringBitmap;
@@ -26,7 +26,7 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(
    docid_word_positions: grenad::Reader<R>,
    indexer: GrenadParameters,
    exact_attributes: &HashSet<FieldId>,
-) -> Result<(grenad::Reader<BufReader<File>>, grenad::Reader<BufReader<File>>)> {
+) -> Result<(grenad::Reader<File>, grenad::Reader<File>)> {
    puffin::profile_function!();

    let max_memory = indexer.max_memory_by_thread();
--- a/milli/src/update/index_documents/extract/extract_word_fid_docids.rs
+++ b/milli/src/update/index_documents/extract/extract_word_fid_docids.rs
@@ -1,5 +1,5 @@
 use std::fs::File;
-use std::io::{self, BufReader};
+use std::io;

 use super::helpers::{
    create_sorter, merge_cbo_roaring_bitmaps, read_u32_ne_bytes, sorter_into_reader,
@@ -14,7 +14,7 @@ use crate::{relative_from_absolute_position, DocumentId, Result};
 pub fn extract_word_fid_docids<R: io::Read + io::Seek>(
    docid_word_positions: grenad::Reader<R>,
    indexer: GrenadParameters,
-) -> Result<grenad::Reader<BufReader<File>>> {
+) -> Result<grenad::Reader<File>> {
    puffin::profile_function!();

    let max_memory = indexer.max_memory_by_thread();
--- a/milli/src/update/index_documents/extract/extract_word_pair_proximity_docids.rs
+++ b/milli/src/update/index_documents/extract/extract_word_pair_proximity_docids.rs
@@ -1,7 +1,6 @@
 use std::cmp::Ordering;
 use std::collections::{BinaryHeap, HashMap};
 use std::fs::File;
-use std::io::BufReader;
 use std::{cmp, io, mem, str, vec};

 use super::helpers::{
@@ -21,7 +20,7 @@ use crate::{DocumentId, Result};
 pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
    docid_word_positions: grenad::Reader<R>,
    indexer: GrenadParameters,
-) -> Result<grenad::Reader<BufReader<File>>> {
+) -> Result<grenad::Reader<File>> {
    puffin::profile_function!();

    let max_memory = indexer.max_memory_by_thread();
--- a/milli/src/update/index_documents/extract/extract_word_position_docids.rs
+++ b/milli/src/update/index_documents/extract/extract_word_position_docids.rs
@@ -1,5 +1,5 @@
 use std::fs::File;
-use std::io::{self, BufReader};
+use std::io;

 use super::helpers::{
    create_sorter, merge_cbo_roaring_bitmaps, read_u32_ne_bytes, sorter_into_reader,
@@ -17,7 +17,7 @@ use crate::{bucketed_position, relative_from_absolute_position, DocumentId, Resu
 pub fn extract_word_position_docids<R: io::Read + io::Seek>(
    docid_word_positions: grenad::Reader<R>,
    indexer: GrenadParameters,
-) -> Result<grenad::Reader<BufReader<File>>> {
+) -> Result<grenad::Reader<File>> {
    puffin::profile_function!();

    let max_memory = indexer.max_memory_by_thread();
--- a/milli/src/update/index_documents/extract/mod.rs
+++ b/milli/src/update/index_documents/extract/mod.rs
@@ -12,7 +12,6 @@ mod extract_word_position_docids;

 use std::collections::HashSet;
 use std::fs::File;
-use std::io::BufReader;

 use crossbeam_channel::Sender;
 use log::debug;
@@ -40,8 +39,8 @@ use crate::{FieldId, Result};
 /// Send data in grenad file over provided Sender.
 #[allow(clippy::too_many_arguments)]
 pub(crate) fn data_from_obkv_documents(
-    original_obkv_chunks: impl Iterator<Item = Result<grenad::Reader<BufReader<File>>>> + Send,
-    flattened_obkv_chunks: impl Iterator<Item = Result<grenad::Reader<BufReader<File>>>> + Send,
+    original_obkv_chunks: impl Iterator<Item = Result<grenad::Reader<File>>> + Send,
+    flattened_obkv_chunks: impl Iterator<Item = Result<grenad::Reader<File>>> + Send,
    indexer: GrenadParameters,
    lmdb_writer_sx: Sender<Result<TypedChunk>>,
    searchable_fields: Option<HashSet<FieldId>>,
@@ -50,8 +49,6 @@ pub(crate) fn data_from_obkv_documents(
    geo_fields_ids: Option<(FieldId, FieldId)>,
    vectors_field_id: Option<FieldId>,
    stop_words: Option<fst::Set<&[u8]>>,
-    allowed_separators: Option<&[&str]>,
-    dictionary: Option<&[&str]>,
    max_positions_per_attributes: Option<u32>,
    exact_attributes: HashSet<FieldId>,
 ) -> Result<()> {
@@ -60,13 +57,7 @@ pub(crate) fn data_from_obkv_documents(
    original_obkv_chunks
        .par_bridge()
        .map(|original_documents_chunk| {
-            send_original_documents_data(
-                original_documents_chunk,
-                indexer,
-                lmdb_writer_sx.clone(),
-                vectors_field_id,
-                primary_key_id,
-            )
+            send_original_documents_data(original_documents_chunk, lmdb_writer_sx.clone())
        })
        .collect::<Result<()>>()?;

@@ -83,9 +74,8 @@ pub(crate) fn data_from_obkv_documents(
                    &faceted_fields,
                    primary_key_id,
                    geo_fields_ids,
+                    vectors_field_id,
                    &stop_words,
-                    &allowed_separators,
-                    &dictionary,
                    max_positions_per_attributes,
                )
            })
@@ -153,7 +143,7 @@ pub(crate) fn data_from_obkv_documents(
        });
    }

-    spawn_extraction_task::<_, _, Vec<grenad::Reader<BufReader<File>>>>(
+    spawn_extraction_task::<_, _, Vec<grenad::Reader<File>>>(
        docid_word_positions_chunks.clone(),
        indexer,
        lmdb_writer_sx.clone(),
@@ -163,7 +153,7 @@ pub(crate) fn data_from_obkv_documents(
        "word-pair-proximity-docids",
    );

-    spawn_extraction_task::<_, _, Vec<grenad::Reader<BufReader<File>>>>(
+    spawn_extraction_task::<_, _, Vec<grenad::Reader<File>>>(
        docid_word_positions_chunks.clone(),
        indexer,
        lmdb_writer_sx.clone(),
@@ -173,11 +163,7 @@ pub(crate) fn data_from_obkv_documents(
        "field-id-wordcount-docids",
    );

-    spawn_extraction_task::<
-        _,
-        _,
-        Vec<(grenad::Reader<BufReader<File>>, grenad::Reader<BufReader<File>>)>,
-    >(
+    spawn_extraction_task::<_, _, Vec<(grenad::Reader<File>, grenad::Reader<File>)>>(
        docid_word_positions_chunks.clone(),
        indexer,
        lmdb_writer_sx.clone(),
@@ -190,7 +176,7 @@ pub(crate) fn data_from_obkv_documents(
        "word-docids",
    );

-    spawn_extraction_task::<_, _, Vec<grenad::Reader<BufReader<File>>>>(
+    spawn_extraction_task::<_, _, Vec<grenad::Reader<File>>>(
        docid_word_positions_chunks.clone(),
        indexer,
        lmdb_writer_sx.clone(),
@@ -199,7 +185,7 @@ pub(crate) fn data_from_obkv_documents(
        TypedChunk::WordPositionDocids,
        "word-position-docids",
    );
-    spawn_extraction_task::<_, _, Vec<grenad::Reader<BufReader<File>>>>(
+    spawn_extraction_task::<_, _, Vec<grenad::Reader<File>>>(
        docid_word_positions_chunks,
        indexer,
        lmdb_writer_sx.clone(),
@@ -209,7 +195,7 @@ pub(crate) fn data_from_obkv_documents(
        "word-fid-docids",
    );

-    spawn_extraction_task::<_, _, Vec<grenad::Reader<BufReader<File>>>>(
+    spawn_extraction_task::<_, _, Vec<grenad::Reader<File>>>(
        docid_fid_facet_strings_chunks,
        indexer,
        lmdb_writer_sx.clone(),
@@ -219,7 +205,7 @@ pub(crate) fn data_from_obkv_documents(
        "field-id-facet-string-docids",
    );

-    spawn_extraction_task::<_, _, Vec<grenad::Reader<BufReader<File>>>>(
+    spawn_extraction_task::<_, _, Vec<grenad::Reader<File>>>(
        docid_fid_facet_numbers_chunks,
        indexer,
        lmdb_writer_sx,
@@ -274,34 +260,12 @@ fn spawn_extraction_task<FE, FS, M>(
 /// Extract chunked data and send it into lmdb_writer_sx sender:
 /// - documents
 fn send_original_documents_data(
-    original_documents_chunk: Result<grenad::Reader<BufReader<File>>>,
-    indexer: GrenadParameters,
+    original_documents_chunk: Result<grenad::Reader<File>>,
    lmdb_writer_sx: Sender<Result<TypedChunk>>,
-    vectors_field_id: Option<FieldId>,
-    primary_key_id: FieldId,
 ) -> Result<()> {
    let original_documents_chunk =
        original_documents_chunk.and_then(|c| unsafe { as_cloneable_grenad(&c) })?;

-    if let Some(vectors_field_id) = vectors_field_id {
-        let documents_chunk_cloned = original_documents_chunk.clone();
-        let lmdb_writer_sx_cloned = lmdb_writer_sx.clone();
-        rayon::spawn(move || {
-            let result = extract_vector_points(
-                documents_chunk_cloned,
-                indexer,
-                primary_key_id,
-                vectors_field_id,
-            );
-            let _ = match result {
-                Ok(vector_points) => {
-                    lmdb_writer_sx_cloned.send(Ok(TypedChunk::VectorPoints(vector_points)))
-                }
-                Err(error) => lmdb_writer_sx_cloned.send(Err(error)),
-            };
-        });
-    }
-
    // TODO: create a custom internal error
    lmdb_writer_sx.send(Ok(TypedChunk::Documents(original_documents_chunk))).unwrap();
    Ok(())
@@ -316,16 +280,15 @@ fn send_original_documents_data(
 #[allow(clippy::too_many_arguments)]
 #[allow(clippy::type_complexity)]
 fn send_and_extract_flattened_documents_data(
-    flattened_documents_chunk: Result<grenad::Reader<BufReader<File>>>,
+    flattened_documents_chunk: Result<grenad::Reader<File>>,
    indexer: GrenadParameters,
    lmdb_writer_sx: Sender<Result<TypedChunk>>,
    searchable_fields: &Option<HashSet<FieldId>>,
    faceted_fields: &HashSet<FieldId>,
    primary_key_id: FieldId,
    geo_fields_ids: Option<(FieldId, FieldId)>,
+    vectors_field_id: Option<FieldId>,
    stop_words: &Option<fst::Set<&[u8]>>,
-    allowed_separators: &Option<&[&str]>,
-    dictionary: &Option<&[&str]>,
    max_positions_per_attributes: Option<u32>,
 ) -> Result<(
    grenad::Reader<CursorClonableMmap>,
@@ -333,10 +296,7 @@ fn send_and_extract_flattened_documents_data(
        grenad::Reader<CursorClonableMmap>,
        (
            grenad::Reader<CursorClonableMmap>,
-            (
-                grenad::Reader<BufReader<File>>,
-                (grenad::Reader<BufReader<File>>, grenad::Reader<BufReader<File>>),
-            ),
+            (grenad::Reader<File>, (grenad::Reader<File>, grenad::Reader<File>)),
        ),
    ),
 )> {
@@ -356,6 +316,25 @@ fn send_and_extract_flattened_documents_data(
        });
    }

+    if let Some(vectors_field_id) = vectors_field_id {
+        let documents_chunk_cloned = flattened_documents_chunk.clone();
+        let lmdb_writer_sx_cloned = lmdb_writer_sx.clone();
+        rayon::spawn(move || {
+            let result = extract_vector_points(
+                documents_chunk_cloned,
+                indexer,
+                primary_key_id,
+                vectors_field_id,
+            );
+            let _ = match result {
+                Ok(vector_points) => {
+                    lmdb_writer_sx_cloned.send(Ok(TypedChunk::VectorPoints(vector_points)))
+                }
+                Err(error) => lmdb_writer_sx_cloned.send(Err(error)),
+            };
+        });
+    }
+
    let (docid_word_positions_chunk, docid_fid_facet_values_chunks): (Result<_>, Result<_>) =
        rayon::join(
            || {
@@ -365,8 +344,6 @@ fn send_and_extract_flattened_documents_data(
                        indexer,
                        searchable_fields,
                        stop_words.as_ref(),
-                        *allowed_separators,
-                        *dictionary,
                        max_positions_per_attributes,
                    )?;

@@ -393,7 +370,6 @@ fn send_and_extract_flattened_documents_data(
                    flattened_documents_chunk.clone(),
                    indexer,
                    faceted_fields,
-                    geo_fields_ids,
                )?;

                // send docid_fid_facet_numbers_chunk to DB writer
--- a/milli/src/update/index_documents/helpers/grenad_helpers.rs
+++ b/milli/src/update/index_documents/helpers/grenad_helpers.rs
@@ -1,6 +1,6 @@
 use std::borrow::Cow;
 use std::fs::File;
-use std::io::{self, BufReader, BufWriter, Seek};
+use std::io::{self, Seek};
 use std::time::Instant;

 use grenad::{CompressionType, Sorter};
@@ -17,13 +17,13 @@ pub fn create_writer<R: io::Write>(
    typ: grenad::CompressionType,
    level: Option<u32>,
    file: R,
-) -> grenad::Writer<BufWriter<R>> {
+) -> grenad::Writer<R> {
    let mut builder = grenad::Writer::builder();
    builder.compression_type(typ);
    if let Some(level) = level {
        builder.compression_level(level);
    }
-    builder.build(BufWriter::new(file))
+    builder.build(file)
 }

 pub fn create_sorter(
@@ -53,7 +53,7 @@ pub fn create_sorter(
 pub fn sorter_into_reader(
    sorter: grenad::Sorter<MergeFn>,
    indexer: GrenadParameters,
-) -> Result<grenad::Reader<BufReader<File>>> {
+) -> Result<grenad::Reader<File>> {
    let mut writer = create_writer(
        indexer.chunk_compression_type,
        indexer.chunk_compression_level,
@@ -64,18 +64,16 @@ pub fn sorter_into_reader(
    writer_into_reader(writer)
 }

-pub fn writer_into_reader(
-    writer: grenad::Writer<BufWriter<File>>,
-) -> Result<grenad::Reader<BufReader<File>>> {
-    let mut file = writer.into_inner()?.into_inner().map_err(|err| err.into_error())?;
+pub fn writer_into_reader(writer: grenad::Writer<File>) -> Result<grenad::Reader<File>> {
+    let mut file = writer.into_inner()?;
    file.rewind()?;
-    grenad::Reader::new(BufReader::new(file)).map_err(Into::into)
+    grenad::Reader::new(file).map_err(Into::into)
 }

 pub unsafe fn as_cloneable_grenad(
-    reader: &grenad::Reader<BufReader<File>>,
+    reader: &grenad::Reader<File>,
 ) -> Result<grenad::Reader<CursorClonableMmap>> {
-    let file = reader.get_ref().get_ref();
+    let file = reader.get_ref();
    let mmap = memmap2::Mmap::map(file)?;
    let cursor = io::Cursor::new(ClonableMmap::from(mmap));
    let reader = grenad::Reader::new(cursor)?;
@@ -91,8 +89,8 @@ where
    fn merge(self, merge_fn: MergeFn, indexer: &GrenadParameters) -> Result<Self::Output>;
 }

-impl MergeableReader for Vec<grenad::Reader<BufReader<File>>> {
-    type Output = grenad::Reader<BufReader<File>>;
+impl MergeableReader for Vec<grenad::Reader<File>> {
+    type Output = grenad::Reader<File>;

    fn merge(self, merge_fn: MergeFn, params: &GrenadParameters) -> Result<Self::Output> {
        let mut merger = MergerBuilder::new(merge_fn);
@@ -101,8 +99,8 @@ impl MergeableReader for Vec<grenad::Reader<BufReader<File>>> {
    }
 }

-impl MergeableReader for Vec<(grenad::Reader<BufReader<File>>, grenad::Reader<BufReader<File>>)> {
-    type Output = (grenad::Reader<BufReader<File>>, grenad::Reader<BufReader<File>>);
+impl MergeableReader for Vec<(grenad::Reader<File>, grenad::Reader<File>)> {
+    type Output = (grenad::Reader<File>, grenad::Reader<File>);

    fn merge(self, merge_fn: MergeFn, params: &GrenadParameters) -> Result<Self::Output> {
        let mut m1 = MergerBuilder::new(merge_fn);
@@ -127,7 +125,7 @@ impl<R: io::Read + io::Seek> MergerBuilder<R> {
        Ok(())
    }

-    fn finish(self, params: &GrenadParameters) -> Result<grenad::Reader<BufReader<File>>> {
+    fn finish(self, params: &GrenadParameters) -> Result<grenad::Reader<File>> {
        let merger = self.0.build();
        let mut writer = create_writer(
            params.chunk_compression_type,
@@ -178,7 +176,7 @@ pub fn grenad_obkv_into_chunks<R: io::Read + io::Seek>(
    reader: grenad::Reader<R>,
    indexer: GrenadParameters,
    documents_chunk_size: usize,
-) -> Result<impl Iterator<Item = Result<grenad::Reader<BufReader<File>>>>> {
+) -> Result<impl Iterator<Item = Result<grenad::Reader<File>>>> {
    let mut continue_reading = true;
    let mut cursor = reader.into_cursor()?;

--- a/milli/src/update/index_documents/mod.rs
+++ b/milli/src/update/index_documents/mod.rs
@@ -316,12 +316,6 @@ where
        let vectors_field_id = self.index.fields_ids_map(self.wtxn)?.id("_vectors");

        let stop_words = self.index.stop_words(self.wtxn)?;
-        let separators = self.index.allowed_separators(self.wtxn)?;
-        let separators: Option<Vec<_>> =
-            separators.as_ref().map(|x| x.iter().map(String::as_str).collect());
-        let dictionary = self.index.dictionary(self.wtxn)?;
-        let dictionary: Option<Vec<_>> =
-            dictionary.as_ref().map(|x| x.iter().map(String::as_str).collect());
        let exact_attributes = self.index.exact_attributes_ids(self.wtxn)?;

        let pool_params = GrenadParameters {
@@ -359,8 +353,6 @@ where
                    geo_fields_ids,
                    vectors_field_id,
                    stop_words,
-                    separators.as_deref(),
-                    dictionary.as_deref(),
                    max_positions_per_attributes,
                    exact_attributes,
                )
@@ -2550,25 +2542,6 @@ mod tests {
        db_snap!(index, word_position_docids, 3, @"74f556b91d161d997a89468b4da1cb8f");
    }

-    /// Index multiple different number of vectors in documents.
-    /// Vectors must be of the same length.
-    #[test]
-    fn test_multiple_vectors() {
-        let index = TempIndex::new();
-
-        index.add_documents(documents!([{"id": 0, "_vectors": [[0, 1, 2], [3, 4, 5]] }])).unwrap();
-        index.add_documents(documents!([{"id": 1, "_vectors": [6, 7, 8] }])).unwrap();
-        index
-            .add_documents(
-                documents!([{"id": 2, "_vectors": [[9, 10, 11], [12, 13, 14], [15, 16, 17]] }]),
-            )
-            .unwrap();
-
-        let rtxn = index.read_txn().unwrap();
-        let res = index.search(&rtxn).vector([0.0, 1.0, 2.0]).execute().unwrap();
-        assert_eq!(res.documents_ids.len(), 3);
-    }
-
    #[test]
    fn reproduce_the_bug() {
        /*
--- a/milli/src/update/index_documents/transform.rs
+++ b/milli/src/update/index_documents/transform.rs
@@ -659,10 +659,8 @@ impl<'a, 'i> Transform<'a, 'i> {
            new_documents_ids: self.new_documents_ids,
            replaced_documents_ids: self.replaced_documents_ids,
            documents_count: self.documents_count,
-            original_documents: original_documents.into_inner().map_err(|err| err.into_error())?,
-            flattened_documents: flattened_documents
-                .into_inner()
-                .map_err(|err| err.into_error())?,
+            original_documents,
+            flattened_documents,
        })
    }

@@ -781,10 +779,8 @@ impl<'a, 'i> Transform<'a, 'i> {
            new_documents_ids: documents_ids,
            replaced_documents_ids: RoaringBitmap::default(),
            documents_count,
-            original_documents: original_documents.into_inner().map_err(|err| err.into_error())?,
-            flattened_documents: flattened_documents
-                .into_inner()
-                .map_err(|err| err.into_error())?,
+            original_documents,
+            flattened_documents,
        };

        let new_facets = output.compute_real_facets(wtxn, self.index)?;
--- a/milli/src/update/index_documents/typed_chunk.rs
+++ b/milli/src/update/index_documents/typed_chunk.rs
@@ -2,7 +2,7 @@ use std::borrow::Cow;
 use std::collections::HashMap;
 use std::convert::TryInto;
 use std::fs::File;
-use std::io::{self, BufReader};
+use std::io;

 use bytemuck::allocation::pod_collect_to_vec;
 use charabia::{Language, Script};
@@ -27,22 +27,22 @@ pub(crate) enum TypedChunk {
    FieldIdDocidFacetStrings(grenad::Reader<CursorClonableMmap>),
    FieldIdDocidFacetNumbers(grenad::Reader<CursorClonableMmap>),
    Documents(grenad::Reader<CursorClonableMmap>),
-    FieldIdWordcountDocids(grenad::Reader<BufReader<File>>),
+    FieldIdWordcountDocids(grenad::Reader<File>),
    NewDocumentsIds(RoaringBitmap),
    WordDocids {
-        word_docids_reader: grenad::Reader<BufReader<File>>,
-        exact_word_docids_reader: grenad::Reader<BufReader<File>>,
+        word_docids_reader: grenad::Reader<File>,
+        exact_word_docids_reader: grenad::Reader<File>,
    },
-    WordPositionDocids(grenad::Reader<BufReader<File>>),
-    WordFidDocids(grenad::Reader<BufReader<File>>),
-    WordPairProximityDocids(grenad::Reader<BufReader<File>>),
-    FieldIdFacetStringDocids(grenad::Reader<BufReader<File>>),
-    FieldIdFacetNumberDocids(grenad::Reader<BufReader<File>>),
-    FieldIdFacetExistsDocids(grenad::Reader<BufReader<File>>),
-    FieldIdFacetIsNullDocids(grenad::Reader<BufReader<File>>),
-    FieldIdFacetIsEmptyDocids(grenad::Reader<BufReader<File>>),
-    GeoPoints(grenad::Reader<BufReader<File>>),
-    VectorPoints(grenad::Reader<BufReader<File>>),
+    WordPositionDocids(grenad::Reader<File>),
+    WordFidDocids(grenad::Reader<File>),
+    WordPairProximityDocids(grenad::Reader<File>),
+    FieldIdFacetStringDocids(grenad::Reader<File>),
+    FieldIdFacetNumberDocids(grenad::Reader<File>),
+    FieldIdFacetExistsDocids(grenad::Reader<File>),
+    FieldIdFacetIsNullDocids(grenad::Reader<File>),
+    FieldIdFacetIsEmptyDocids(grenad::Reader<File>),
+    GeoPoints(grenad::Reader<File>),
+    VectorPoints(grenad::Reader<File>),
    ScriptLanguageDocids(HashMap<(Script, Language), RoaringBitmap>),
 }

--- a/milli/src/update/prefix_word_pairs/mod.rs
+++ b/milli/src/update/prefix_word_pairs/mod.rs
@@ -1,6 +1,6 @@
 use std::borrow::Cow;
 use std::collections::HashSet;
-use std::io::{BufReader, BufWriter};
+use std::io::BufReader;

 use grenad::CompressionType;
 use heed::types::ByteSlice;
@@ -119,9 +119,9 @@ pub fn insert_into_database(
 pub fn write_into_lmdb_database_without_merging(
    wtxn: &mut heed::RwTxn,
    database: heed::PolyDatabase,
-    writer: grenad::Writer<BufWriter<std::fs::File>>,
+    writer: grenad::Writer<std::fs::File>,
 ) -> Result<()> {
-    let file = writer.into_inner()?.into_inner().map_err(|err| err.into_error())?;
+    let file = writer.into_inner()?;
    let reader = grenad::Reader::new(BufReader::new(file))?;
    if database.is_empty(wtxn)? {
        let mut out_iter = database.iter_mut::<_, ByteSlice, ByteSlice>(wtxn)?;
--- a/milli/src/update/settings.rs
+++ b/milli/src/update/settings.rs
@@ -1,4 +1,4 @@
-use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
+use std::collections::{BTreeSet, HashMap, HashSet};
 use std::result::Result as StdResult;

 use charabia::{Normalize, Tokenizer, TokenizerBuilder};
@@ -112,11 +112,8 @@ pub struct Settings<'a, 't, 'u, 'i> {
    sortable_fields: Setting<HashSet<String>>,
    criteria: Setting<Vec<Criterion>>,
    stop_words: Setting<BTreeSet<String>>,
-    non_separator_tokens: Setting<BTreeSet<String>>,
-    separator_tokens: Setting<BTreeSet<String>>,
-    dictionary: Setting<BTreeSet<String>>,
    distinct_field: Setting<String>,
-    synonyms: Setting<BTreeMap<String, Vec<String>>>,
+    synonyms: Setting<HashMap<String, Vec<String>>>,
    primary_key: Setting<String>,
    authorize_typos: Setting<bool>,
    min_word_len_two_typos: Setting<u8>,
@@ -144,9 +141,6 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
            sortable_fields: Setting::NotSet,
            criteria: Setting::NotSet,
            stop_words: Setting::NotSet,
-            non_separator_tokens: Setting::NotSet,
-            separator_tokens: Setting::NotSet,
-            dictionary: Setting::NotSet,
            distinct_field: Setting::NotSet,
            synonyms: Setting::NotSet,
            primary_key: Setting::NotSet,
@@ -211,39 +205,6 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
            if stop_words.is_empty() { Setting::Reset } else { Setting::Set(stop_words) }
    }

-    pub fn reset_non_separator_tokens(&mut self) {
-        self.non_separator_tokens = Setting::Reset;
-    }
-
-    pub fn set_non_separator_tokens(&mut self, non_separator_tokens: BTreeSet<String>) {
-        self.non_separator_tokens = if non_separator_tokens.is_empty() {
-            Setting::Reset
-        } else {
-            Setting::Set(non_separator_tokens)
-        }
-    }
-
-    pub fn reset_separator_tokens(&mut self) {
-        self.separator_tokens = Setting::Reset;
-    }
-
-    pub fn set_separator_tokens(&mut self, separator_tokens: BTreeSet<String>) {
-        self.separator_tokens = if separator_tokens.is_empty() {
-            Setting::Reset
-        } else {
-            Setting::Set(separator_tokens)
-        }
-    }
-
-    pub fn reset_dictionary(&mut self) {
-        self.dictionary = Setting::Reset;
-    }
-
-    pub fn set_dictionary(&mut self, dictionary: BTreeSet<String>) {
-        self.dictionary =
-            if dictionary.is_empty() { Setting::Reset } else { Setting::Set(dictionary) }
-    }
-
    pub fn reset_distinct_field(&mut self) {
        self.distinct_field = Setting::Reset;
    }
@@ -256,7 +217,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
        self.synonyms = Setting::Reset;
    }

-    pub fn set_synonyms(&mut self, synonyms: BTreeMap<String, Vec<String>>) {
+    pub fn set_synonyms(&mut self, synonyms: HashMap<String, Vec<String>>) {
        self.synonyms = if synonyms.is_empty() { Setting::Reset } else { Setting::Set(synonyms) }
    }

@@ -491,89 +452,14 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
        }
    }

-    fn update_non_separator_tokens(&mut self) -> Result<bool> {
-        let changes = match self.non_separator_tokens {
-            Setting::Set(ref non_separator_tokens) => {
-                let current = self.index.non_separator_tokens(self.wtxn)?;
-
-                // Does the new list differ from the previous one?
-                if current.map_or(true, |current| &current != non_separator_tokens) {
-                    self.index.put_non_separator_tokens(self.wtxn, non_separator_tokens)?;
-                    true
-                } else {
-                    false
-                }
-            }
-            Setting::Reset => self.index.delete_non_separator_tokens(self.wtxn)?,
-            Setting::NotSet => false,
-        };
-
-        // the synonyms must be updated if non separator tokens have been updated.
-        if changes && self.synonyms == Setting::NotSet {
-            self.synonyms = Setting::Set(self.index.user_defined_synonyms(self.wtxn)?);
-        }
-
-        Ok(changes)
-    }
-
-    fn update_separator_tokens(&mut self) -> Result<bool> {
-        let changes = match self.separator_tokens {
-            Setting::Set(ref separator_tokens) => {
-                let current = self.index.separator_tokens(self.wtxn)?;
-
-                // Does the new list differ from the previous one?
-                if current.map_or(true, |current| &current != separator_tokens) {
-                    self.index.put_separator_tokens(self.wtxn, separator_tokens)?;
-                    true
-                } else {
-                    false
-                }
-            }
-            Setting::Reset => self.index.delete_separator_tokens(self.wtxn)?,
-            Setting::NotSet => false,
-        };
-
-        // the synonyms must be updated if separator tokens have been updated.
-        if changes && self.synonyms == Setting::NotSet {
-            self.synonyms = Setting::Set(self.index.user_defined_synonyms(self.wtxn)?);
-        }
-
-        Ok(changes)
-    }
-
-    fn update_dictionary(&mut self) -> Result<bool> {
-        let changes = match self.dictionary {
-            Setting::Set(ref dictionary) => {
-                let current = self.index.dictionary(self.wtxn)?;
-
-                // Does the new list differ from the previous one?
-                if current.map_or(true, |current| &current != dictionary) {
-                    self.index.put_dictionary(self.wtxn, dictionary)?;
-                    true
-                } else {
-                    false
-                }
-            }
-            Setting::Reset => self.index.delete_dictionary(self.wtxn)?,
-            Setting::NotSet => false,
-        };
-
-        // the synonyms must be updated if dictionary has been updated.
-        if changes && self.synonyms == Setting::NotSet {
-            self.synonyms = Setting::Set(self.index.user_defined_synonyms(self.wtxn)?);
-        }
-
-        Ok(changes)
-    }
-
    fn update_synonyms(&mut self) -> Result<bool> {
        match self.synonyms {
-            Setting::Set(ref user_synonyms) => {
+            Setting::Set(ref synonyms) => {
                fn normalize(tokenizer: &Tokenizer, text: &str) -> Vec<String> {
                    tokenizer
                        .tokenize(text)
                        .filter_map(|token| {
-                            if token.is_word() && !token.lemma().is_empty() {
+                            if token.is_word() {
                                Some(token.lemma().to_string())
                            } else {
                                None
@@ -587,39 +473,19 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
                if let Some(ref stop_words) = stop_words {
                    builder.stop_words(stop_words);
                }
-
-                let separators = self.index.allowed_separators(self.wtxn)?;
-                let separators: Option<Vec<_>> =
-                    separators.as_ref().map(|x| x.iter().map(String::as_str).collect());
-                if let Some(ref separators) = separators {
-                    builder.separators(separators);
-                }
-
-                let dictionary = self.index.dictionary(self.wtxn)?;
-                let dictionary: Option<Vec<_>> =
-                    dictionary.as_ref().map(|x| x.iter().map(String::as_str).collect());
-                if let Some(ref dictionary) = dictionary {
-                    builder.words_dict(dictionary);
-                }
-
                let tokenizer = builder.build();

                let mut new_synonyms = HashMap::new();
-                for (word, synonyms) in user_synonyms {
+                for (word, synonyms) in synonyms {
                    // Normalize both the word and associated synonyms.
                    let normalized_word = normalize(&tokenizer, word);
-                    let normalized_synonyms: Vec<_> = synonyms
-                        .iter()
-                        .map(|synonym| normalize(&tokenizer, synonym))
-                        .filter(|synonym| !synonym.is_empty())
-                        .collect();
+                    let normalized_synonyms =
+                        synonyms.iter().map(|synonym| normalize(&tokenizer, synonym));

                    // Store the normalized synonyms under the normalized word,
                    // merging the possible duplicate words.
-                    if !normalized_word.is_empty() && !normalized_synonyms.is_empty() {
-                        let entry = new_synonyms.entry(normalized_word).or_insert_with(Vec::new);
-                        entry.extend(normalized_synonyms.into_iter());
-                    }
+                    let entry = new_synonyms.entry(normalized_word).or_insert_with(Vec::new);
+                    entry.extend(normalized_synonyms);
                }

                // Make sure that we don't have duplicate synonyms.
@@ -631,7 +497,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
                let old_synonyms = self.index.synonyms(self.wtxn)?;

                if new_synonyms != old_synonyms {
-                    self.index.put_synonyms(self.wtxn, &new_synonyms, user_synonyms)?;
+                    self.index.put_synonyms(self.wtxn, &new_synonyms)?;
                    Ok(true)
                } else {
                    Ok(false)
@@ -891,17 +757,11 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
        let faceted_updated = old_faceted_fields != new_faceted_fields;

        let stop_words_updated = self.update_stop_words()?;
-        let non_separator_tokens_updated = self.update_non_separator_tokens()?;
-        let separator_tokens_updated = self.update_separator_tokens()?;
-        let dictionary_updated = self.update_dictionary()?;
        let synonyms_updated = self.update_synonyms()?;
        let searchable_updated = self.update_searchable()?;
        let exact_attributes_updated = self.update_exact_attributes()?;

        if stop_words_updated
-            || non_separator_tokens_updated
-            || separator_tokens_updated
-            || dictionary_updated
            || faceted_updated
            || synonyms_updated
            || searchable_updated
@@ -918,7 +778,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
 mod tests {
    use big_s::S;
    use heed::types::ByteSlice;
-    use maplit::{btreemap, btreeset, hashset};
+    use maplit::{btreeset, hashmap, hashset};

    use super::*;
    use crate::error::Error;
@@ -1384,7 +1244,7 @@ mod tests {
        // In the same transaction provide some synonyms
        index
            .update_settings_using_wtxn(&mut wtxn, |settings| {
-                settings.set_synonyms(btreemap! {
+                settings.set_synonyms(hashmap! {
                    "blini".to_string() => vec!["crepes".to_string()],
                    "super like".to_string() => vec!["love".to_string()],
                    "puppies".to_string() => vec!["dogs".to_string(), "doggos".to_string()]
@@ -1427,43 +1287,6 @@ mod tests {
        assert!(result.documents_ids.is_empty());
    }

-    #[test]
-    fn thai_synonyms() {
-        let mut index = TempIndex::new();
-        index.index_documents_config.autogenerate_docids = true;
-
-        let mut wtxn = index.write_txn().unwrap();
-        // Send 3 documents with ids from 1 to 3.
-        index
-            .add_documents_using_wtxn(
-                &mut wtxn,
-                documents!([
-                    { "name": "ยี่ปุ่น" },
-                    { "name": "ญี่ปุ่น" },
-                ]),
-            )
-            .unwrap();
-
-        // In the same transaction provide some synonyms
-        index
-            .update_settings_using_wtxn(&mut wtxn, |settings| {
-                settings.set_synonyms(btreemap! {
-                    "japanese".to_string() => vec![S("ญี่ปุ่น"), S("ยี่ปุ่น")],
-                });
-            })
-            .unwrap();
-        wtxn.commit().unwrap();
-
-        // Ensure synonyms are effectively stored
-        let rtxn = index.read_txn().unwrap();
-        let synonyms = index.synonyms(&rtxn).unwrap();
-        assert!(!synonyms.is_empty()); // at this point the index should return something
-
-        // Check that we can use synonyms
-        let result = index.search(&rtxn).query("japanese").execute().unwrap();
-        assert_eq!(result.documents_ids.len(), 2);
-    }
-
    #[test]
    fn setting_searchable_recomputes_other_settings() {
        let index = TempIndex::new();
@@ -1717,9 +1540,6 @@ mod tests {
                    sortable_fields,
                    criteria,
                    stop_words,
-                    non_separator_tokens,
-                    separator_tokens,
-                    dictionary,
                    distinct_field,
                    synonyms,
                    primary_key,
@@ -1738,9 +1558,6 @@ mod tests {
                assert!(matches!(sortable_fields, Setting::NotSet));
                assert!(matches!(criteria, Setting::NotSet));
                assert!(matches!(stop_words, Setting::NotSet));
-                assert!(matches!(non_separator_tokens, Setting::NotSet));
-                assert!(matches!(separator_tokens, Setting::NotSet));
-                assert!(matches!(dictionary, Setting::NotSet));
                assert!(matches!(distinct_field, Setting::NotSet));
                assert!(matches!(synonyms, Setting::NotSet));
                assert!(matches!(primary_key, Setting::NotSet));
--- a/milli/tests/search/distinct.rs
+++ b/milli/tests/search/distinct.rs
@@ -8,7 +8,7 @@ use Criterion::*;
 use crate::search::{self, EXTERNAL_DOCUMENTS_IDS};

 macro_rules! test_distinct {
-    ($func:ident, $distinct:ident, $exhaustive:ident, $limit:expr, $offset:expr, $criteria:expr, $n_res:expr) => {
+    ($func:ident, $distinct:ident, $exhaustive:ident, $limit:expr, $criteria:expr, $n_res:expr) => {
        #[test]
        fn $func() {
            let criteria = $criteria;
@@ -27,7 +27,6 @@ macro_rules! test_distinct {
            let mut search = Search::new(&rtxn, &index);
            search.query(search::TEST_QUERY);
            search.limit($limit);
-            search.offset($offset);
            search.exhaustive_number_hits($exhaustive);

            search.terms_matching_strategy(TermsMatchingStrategy::default());
@@ -48,7 +47,6 @@ macro_rules! test_distinct {
                            Some(d.id)
                        }
                    })
-                    .skip($offset)
                    .take($limit)
                    .collect();

@@ -63,7 +61,6 @@ test_distinct!(
    tag,
    true,
    1,
-    0,
    vec![Words, Typo, Proximity, Attribute, Exactness],
    3
 );
@@ -72,7 +69,6 @@ test_distinct!(
    asc_desc_rank,
    true,
    1,
-    0,
    vec![Words, Typo, Proximity, Attribute, Exactness],
    7
 );
@@ -81,7 +77,6 @@ test_distinct!(
    asc_desc_rank,
    true,
    0,
-    0,
    vec![Desc(S("attribute_rank")), Desc(S("exactness_rank")), Exactness, Typo],
    7
 );
@@ -91,7 +86,6 @@ test_distinct!(
    tag,
    false,
    EXTERNAL_DOCUMENTS_IDS.len(),
-    0,
    vec![Words, Typo, Proximity, Attribute, Exactness],
    3
 );
@@ -100,7 +94,6 @@ test_distinct!(
    asc_desc_rank,
    false,
    EXTERNAL_DOCUMENTS_IDS.len(),
-    0,
    vec![Words, Typo, Proximity, Attribute, Exactness],
    7
 );
@@ -109,7 +102,6 @@ test_distinct!(
    tag,
    false,
    EXTERNAL_DOCUMENTS_IDS.len(),
-    0,
    vec![Words],
    3
 );
@@ -118,7 +110,6 @@ test_distinct!(
    asc_desc_rank,
    false,
    EXTERNAL_DOCUMENTS_IDS.len(),
-    0,
    vec![Words],
    7
 );
@@ -127,7 +118,6 @@ test_distinct!(
    tag,
    false,
    EXTERNAL_DOCUMENTS_IDS.len(),
-    0,
    vec![Words, Typo],
    3
 );
@@ -136,7 +126,6 @@ test_distinct!(
    asc_desc_rank,
    false,
    EXTERNAL_DOCUMENTS_IDS.len(),
-    0,
    vec![Words, Typo],
    7
 );
@@ -145,7 +134,6 @@ test_distinct!(
    tag,
    false,
    EXTERNAL_DOCUMENTS_IDS.len(),
-    0,
    vec![Words, Proximity],
    3
 );
@@ -154,7 +142,6 @@ test_distinct!(
    asc_desc_rank,
    false,
    EXTERNAL_DOCUMENTS_IDS.len(),
-    0,
    vec![Words, Proximity],
    7
 );
@@ -163,7 +150,6 @@ test_distinct!(
    tag,
    false,
    EXTERNAL_DOCUMENTS_IDS.len(),
-    0,
    vec![Words, Attribute],
    3
 );
@@ -172,7 +158,6 @@ test_distinct!(
    asc_desc_rank,
    false,
    EXTERNAL_DOCUMENTS_IDS.len(),
-    0,
    vec![Words, Attribute],
    7
 );
@@ -181,7 +166,6 @@ test_distinct!(
    tag,
    false,
    EXTERNAL_DOCUMENTS_IDS.len(),
-    0,
    vec![Words, Exactness],
    3
 );
@@ -190,47 +174,6 @@ test_distinct!(
    asc_desc_rank,
    false,
    EXTERNAL_DOCUMENTS_IDS.len(),
-    0,
    vec![Words, Exactness],
    7
 );
-test_distinct!(
-    // testing: https://github.com/meilisearch/meilisearch/issues/4078
-    distinct_string_limit_and_offset,
-    tag,
-    false,
-    EXTERNAL_DOCUMENTS_IDS.len(),
-    1,
-    vec![],
-    2
-);
-test_distinct!(
-    // testing: https://github.com/meilisearch/meilisearch/issues/4078
-    exhaustive_distinct_string_limit_and_offset,
-    tag,
-    true,
-    1,
-    2,
-    vec![],
-    1
-);
-test_distinct!(
-    // testing: https://github.com/meilisearch/meilisearch/issues/4078
-    distinct_number_limit_and_offset,
-    asc_desc_rank,
-    false,
-    EXTERNAL_DOCUMENTS_IDS.len(),
-    2,
-    vec![],
-    5
-);
-test_distinct!(
-    // testing: https://github.com/meilisearch/meilisearch/issues/4078
-    exhaustive_distinct_number_limit_and_offset,
-    asc_desc_rank,
-    true,
-    2,
-    4,
-    vec![],
-    3
-);
--- a/milli/tests/search/mod.rs
+++ b/milli/tests/search/mod.rs
@@ -5,7 +5,7 @@ use std::io::Cursor;
 use big_s::S;
 use either::{Either, Left, Right};
 use heed::EnvOpenOptions;
-use maplit::{btreemap, hashset};
+use maplit::{hashmap, hashset};
 use milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
 use milli::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings};
 use milli::{AscDesc, Criterion, DocumentId, Index, Member, Object, TermsMatchingStrategy};
@@ -51,7 +51,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
        S("tag"),
        S("asc_desc_rank"),
    });
-    builder.set_synonyms(btreemap! {
+    builder.set_synonyms(hashmap! {
        S("hello") => vec![S("good morning")],
        S("world") => vec![S("earth")],
        S("america") => vec![S("the united states")],
--- a/permissive-json-pointer/src/lib.rs
+++ b/permissive-json-pointer/src/lib.rs
@@ -186,16 +186,12 @@ fn create_value(value: &Document, mut selectors: HashSet<&str>) -> Document {
                    let array = create_array(array, &sub_selectors);
                    if !array.is_empty() {
                        new_value.insert(key.to_string(), array.into());
-                    } else {
-                        new_value.insert(key.to_string(), Value::Array(vec![]));
                    }
                }
                Value::Object(object) => {
                    let object = create_value(object, sub_selectors);
                    if !object.is_empty() {
                        new_value.insert(key.to_string(), object.into());
-                    } else {
-                        new_value.insert(key.to_string(), Value::Object(Map::new()));
                    }
                }
                _ => (),
@@ -215,8 +211,6 @@ fn create_array(array: &[Value], selectors: &HashSet<&str>) -> Vec<Value> {
                let array = create_array(array, selectors);
                if !array.is_empty() {
                    res.push(array.into());
-                } else {
-                    res.push(Value::Array(vec![]));
                }
            }
            Value::Object(object) => {
@@ -643,24 +637,6 @@ mod tests {
        );
    }

-    #[test]
-    fn empty_array_object_return_empty() {
-        let value: Value = json!({
-            "array": [],
-            "object": {},
-        });
-        let value: &Document = value.as_object().unwrap();
-
-        let res: Value = select_values(value, vec!["array.name", "object.name"]).into();
-        assert_eq!(
-            res,
-            json!({
-                "array": [],
-                "object": {},
-            })
-        );
-    }
-
    #[test]
    fn all_conflict_variation() {
        let value: Value = json!({