fix clippy

stream and chunk the data
gzip the tasks
2025-07-19 21:10:34 +00:00 · 2023-11-29 15:04:41 +01:00 · 2023-11-29 14:27:50 +01:00 · 2023-11-29 13:09:04 +01:00 · 2023-11-28 16:28:11 +01:00 · 2023-11-28 15:55:48 +01:00
228 changed files with 5957 additions and 7828 deletions
--- a/.github/workflows/benchmarks-pr.yml
+++ b/.github/workflows/benchmarks-pr.yml
@ -90,7 +90,8 @@ jobs:
          set -x
          export base_ref=$(git merge-base origin/main ${{ steps.comment-branch.outputs.head_ref }} | head -c8)
          export base_filename=$(echo ${{ steps.command.outputs.command-arguments }}_main_${base_ref}.json)
-          echo 'Here are your benchmarks diff 👊' >> body.txt
+          export bench_name=$(echo ${{ steps.command.outputs.command-arguments }})
+          echo "Here are your $bench_name benchmarks diff 👊" >> body.txt
          echo '```' >> body.txt
          ./benchmarks/scripts/compare.sh $base_filename ${{ steps.file.outputs.basename }}.json >> body.txt
          echo '```' >> body.txt
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@ -2,6 +2,7 @@
 resolver = "2"
 members = [
    "meilisearch",
+    "meilitool",
    "meilisearch-types",
    "meilisearch-auth",
    "meili-snap",
@ -18,7 +19,7 @@ members = [
 ]

 [workspace.package]
-version = "1.4.1"
+version = "1.5.0"
 authors = ["Quentin de Quelen <quentin@dequelen.me>", "Clément Renault <clement@meilisearch.com>"]
 description = "Meilisearch HTTP server"
 homepage = "https://meilisearch.com"
--- a/11
+++ b/11
@ -3,7 +3,7 @@ FROM    rust:alpine3.16 AS compiler

 RUN     apk add -q --update-cache --no-cache build-base openssl-dev

-WORKDIR /meilisearch
+WORKDIR /

 ARG     COMMIT_SHA
 ARG     COMMIT_DATE
@ -17,7 +17,7 @@ RUN     set -eux; \
        if [ "$apkArch" = "aarch64" ]; then \
            export JEMALLOC_SYS_WITH_LG_PAGE=16; \
        fi && \
-        cargo build --release
+        cargo build --release -p meilisearch -p meilitool

 # Run
 FROM    alpine:3.16
@ -28,9 +28,10 @@ ENV     MEILI_SERVER_PROVIDER docker
 RUN     apk update --quiet \
        && apk add -q --no-cache libgcc tini curl

-# add meilisearch to the `/bin` so you can run it from anywhere and it's easy
-# to find.
-COPY    --from=compiler /meilisearch/target/release/meilisearch /bin/meilisearch
+# add meilisearch and meilitool to the `/bin` so you can run it from anywhere
+# and it's easy to find.
+COPY    --from=compiler /target/release/meilisearch /bin/meilisearch
+COPY    --from=compiler /target/release/meilitool /bin/meilitool
 # To stay compatible with the older version of the container (pre v0.27.0) we're
 # going to symlink the meilisearch binary in the path to `/meilisearch`
 RUN     ln -s /bin/meilisearch /meilisearch
--- a/benchmarks/benches/indexing.rs
+++ b/benchmarks/benches/indexing.rs
@ -6,9 +6,7 @@ use std::path::Path;

 use criterion::{criterion_group, criterion_main, Criterion};
 use milli::heed::{EnvOpenOptions, RwTxn};
-use milli::update::{
-    DeleteDocuments, IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings,
-};
+use milli::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings};
 use milli::Index;
 use rand::seq::SliceRandom;
 use rand_chacha::rand_core::SeedableRng;
@ -266,17 +264,7 @@ fn deleting_songs_in_batches_default(c: &mut Criterion) {
                (index, document_ids_to_delete)
            },
            move |(index, document_ids_to_delete)| {
-                let mut wtxn = index.write_txn().unwrap();
-
-                for ids in document_ids_to_delete {
-                    let mut builder = DeleteDocuments::new(&mut wtxn, &index).unwrap();
-                    builder.delete_documents(&ids);
-                    builder.execute().unwrap();
-                }
-
-                wtxn.commit().unwrap();
-
-                index.prepare_for_closing().wait();
+                delete_documents_from_ids(index, document_ids_to_delete)
            },
        )
    });
@ -613,17 +601,7 @@ fn deleting_wiki_in_batches_default(c: &mut Criterion) {
                (index, document_ids_to_delete)
            },
            move |(index, document_ids_to_delete)| {
-                let mut wtxn = index.write_txn().unwrap();
-
-                for ids in document_ids_to_delete {
-                    let mut builder = DeleteDocuments::new(&mut wtxn, &index).unwrap();
-                    builder.delete_documents(&ids);
-                    builder.execute().unwrap();
-                }
-
-                wtxn.commit().unwrap();
-
-                index.prepare_for_closing().wait();
+                delete_documents_from_ids(index, document_ids_to_delete)
            },
        )
    });
@ -875,22 +853,31 @@ fn deleting_movies_in_batches_default(c: &mut Criterion) {
                (index, document_ids_to_delete)
            },
            move |(index, document_ids_to_delete)| {
-                let mut wtxn = index.write_txn().unwrap();
-
-                for ids in document_ids_to_delete {
-                    let mut builder = DeleteDocuments::new(&mut wtxn, &index).unwrap();
-                    builder.delete_documents(&ids);
-                    builder.execute().unwrap();
-                }
-
-                wtxn.commit().unwrap();
-
-                index.prepare_for_closing().wait();
+                delete_documents_from_ids(index, document_ids_to_delete)
            },
        )
    });
 }

+fn delete_documents_from_ids(index: Index, document_ids_to_delete: Vec<RoaringBitmap>) {
+    let mut wtxn = index.write_txn().unwrap();
+
+    let indexer_config = IndexerConfig::default();
+    for ids in document_ids_to_delete {
+        let config = IndexDocumentsConfig::default();
+
+        let mut builder =
+            IndexDocuments::new(&mut wtxn, &index, &indexer_config, config, |_| (), || false)
+                .unwrap();
+        (builder, _) = builder.remove_documents_from_db_no_batch(&ids).unwrap();
+        builder.execute().unwrap();
+    }
+
+    wtxn.commit().unwrap();
+
+    index.prepare_for_closing().wait();
+}
+
 fn indexing_movies_in_three_batches(c: &mut Criterion) {
    let mut group = c.benchmark_group("indexing");
    group.sample_size(BENCHMARK_ITERATION);
@ -1112,17 +1099,7 @@ fn deleting_nested_movies_in_batches_default(c: &mut Criterion) {
                (index, document_ids_to_delete)
            },
            move |(index, document_ids_to_delete)| {
-                let mut wtxn = index.write_txn().unwrap();
-
-                for ids in document_ids_to_delete {
-                    let mut builder = DeleteDocuments::new(&mut wtxn, &index).unwrap();
-                    builder.delete_documents(&ids);
-                    builder.execute().unwrap();
-                }
-
-                wtxn.commit().unwrap();
-
-                index.prepare_for_closing().wait();
+                delete_documents_from_ids(index, document_ids_to_delete)
            },
        )
    });
@ -1338,17 +1315,7 @@ fn deleting_geo_in_batches_default(c: &mut Criterion) {
                (index, document_ids_to_delete)
            },
            move |(index, document_ids_to_delete)| {
-                let mut wtxn = index.write_txn().unwrap();
-
-                for ids in document_ids_to_delete {
-                    let mut builder = DeleteDocuments::new(&mut wtxn, &index).unwrap();
-                    builder.delete_documents(&ids);
-                    builder.execute().unwrap();
-                }
-
-                wtxn.commit().unwrap();
-
-                index.prepare_for_closing().wait();
+                delete_documents_from_ids(index, document_ids_to_delete)
            },
        )
    });
--- a/benchmarks/benches/utils.rs
+++ b/benchmarks/benches/utils.rs
@ -12,7 +12,7 @@ use milli::heed::EnvOpenOptions;
 use milli::update::{
    IndexDocuments, IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig, Settings,
 };
-use milli::{Filter, Index, Object, RankingRule, TermsMatchingStrategy};
+use milli::{Criterion, Filter, Index, Object, TermsMatchingStrategy};
 use serde_json::Value;

 pub struct Conf<'a> {
@ -78,11 +78,11 @@ pub fn base_setup(conf: &Conf) -> Index {

    if let Some(criterion) = conf.criterion {
        builder.reset_filterable_fields();
-        builder.reset_ranking_rules();
+        builder.reset_criteria();
        builder.reset_stop_words();

-        let criterion = criterion.iter().map(|s| RankingRule::from_str(s).unwrap()).collect();
-        builder.set_ranking_rules(criterion);
+        let criterion = criterion.iter().map(|s| Criterion::from_str(s).unwrap()).collect();
+        builder.set_criteria(criterion);
    }

    (conf.configure)(&mut builder);
--- a/dump/src/reader/snapshots/dumpreadertest__import_dump_v1-11.snap
+++ b/dump/src/reader/snapshots/dumpreadertest__import_dump_v1-11.snap
@ -1,24 +0,0 @@
---
-source: dump/src/reader/mod.rs
-expression: spells.settings().unwrap()
---
-{
-  "displayedAttributes": [
-    "*"
-  ],
-  "searchableAttributes": [
-    "*"
-  ],
-  "filterableAttributes": [],
-  "sortableAttributes": [],
-  "rankingRules": [
-    "typo",
-    "words",
-    "proximity",
-    "attribute",
-    "exactness"
-  ],
-  "stopWords": [],
-  "synonyms": {},
-  "distinctAttribute": null
-}
--- a/dump/src/reader/snapshots/dumpreadertest__import_dump_v1-5.snap
+++ b/dump/src/reader/snapshots/dumpreadertest__import_dump_v1-5.snap
@ -1,38 +0,0 @@
---
-source: dump/src/reader/mod.rs
-expression: products.settings().unwrap()
---
-{
-  "displayedAttributes": [
-    "*"
-  ],
-  "searchableAttributes": [
-    "*"
-  ],
-  "filterableAttributes": [],
-  "sortableAttributes": [],
-  "rankingRules": [
-    "typo",
-    "words",
-    "proximity",
-    "attribute",
-    "exactness"
-  ],
-  "stopWords": [],
-  "synonyms": {
-    "android": [
-      "phone",
-      "smartphone"
-    ],
-    "iphone": [
-      "phone",
-      "smartphone"
-    ],
-    "phone": [
-      "android",
-      "iphone",
-      "smartphone"
-    ]
-  },
-  "distinctAttribute": null
-}
--- a/dump/src/reader/snapshots/dumpreadertest__import_dump_v1-8.snap
+++ b/dump/src/reader/snapshots/dumpreadertest__import_dump_v1-8.snap
@ -1,31 +0,0 @@
---
-source: dump/src/reader/mod.rs
-expression: movies.settings().unwrap()
---
-{
-  "displayedAttributes": [
-    "*"
-  ],
-  "searchableAttributes": [
-    "*"
-  ],
-  "filterableAttributes": [
-    "genres",
-    "id"
-  ],
-  "sortableAttributes": [
-    "genres",
-    "id"
-  ],
-  "rankingRules": [
-    "typo",
-    "words",
-    "proximity",
-    "attribute",
-    "exactness",
-    "release_date:asc"
-  ],
-  "stopWords": [],
-  "synonyms": {},
-  "distinctAttribute": null
-}
--- a/index-scheduler/Cargo.toml
+++ b/index-scheduler/Cargo.toml
@ -18,11 +18,12 @@ derive_builder = "0.12.0"
 dump = { path = "../dump" }
 enum-iterator = "1.4.0"
 file-store = { path = "../file-store" }
+flate2 = "1.0.28"
 log = "0.4.17"
 meilisearch-auth = { path = "../meilisearch-auth" }
 meilisearch-types = { path = "../meilisearch-types" }
 page_size = "0.5.0"
-puffin = "0.16.0"
+puffin = { version = "0.16.0", features = ["serialization"] }
 roaring = { version = "0.10.1", features = ["serde"] }
 serde = { version = "1.0.160", features = ["derive"] }
 serde_json = { version = "1.0.95", features = ["preserve_order"] }
@ -30,6 +31,7 @@ synchronoise = "1.0.1"
 tempfile = "3.5.0"
 thiserror = "1.0.40"
 time = { version = "0.3.20", features = ["serde-well-known", "formatting", "parsing", "macros"] }
+ureq = "2.9.1"
 uuid = { version = "1.3.1", features = ["serde", "v4"] }

 [dev-dependencies]
--- a/index-scheduler/src/batch.rs
+++ b/index-scheduler/src/batch.rs
@ -24,14 +24,13 @@ use std::fs::{self, File};
 use std::io::BufWriter;

 use dump::IndexMetadata;
-use log::{debug, error, info};
+use log::{debug, error, info, trace};
 use meilisearch_types::error::Code;
 use meilisearch_types::heed::{RoTxn, RwTxn};
 use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader};
 use meilisearch_types::milli::heed::CompactionOption;
 use meilisearch_types::milli::update::{
-    DeleteDocuments, DocumentDeletionResult, IndexDocumentsConfig, IndexDocumentsMethod,
-    Settings as MilliSettings,
+    IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig, Settings as MilliSettings,
 };
 use meilisearch_types::milli::{self, Filter, BEU32};
 use meilisearch_types::settings::{apply_settings_to_builder, Settings, Unchecked};
@ -44,7 +43,7 @@ use uuid::Uuid;

 use crate::autobatcher::{self, BatchKind};
 use crate::utils::{self, swap_index_uid_in_task};
-use crate::{Error, IndexScheduler, ProcessingTasks, Result, TaskId};
+use crate::{Error, IndexScheduler, MustStopProcessing, ProcessingTasks, Result, TaskId};

 /// Represents a combination of tasks that can all be processed at the same time.
 ///
@ -105,12 +104,6 @@ pub(crate) enum IndexOperation {
        operations: Vec<DocumentOperation>,
        tasks: Vec<Task>,
    },
-    DocumentDeletion {
-        index_uid: String,
-        // The vec associated with each document deletion tasks.
-        documents: Vec<Vec<String>>,
-        tasks: Vec<Task>,
-    },
    IndexDocumentDeletionByFilter {
        index_uid: String,
        task: Task,
@ -162,7 +155,6 @@ impl Batch {
            }
            Batch::IndexOperation { op, .. } => match op {
                IndexOperation::DocumentOperation { tasks, .. }
-                | IndexOperation::DocumentDeletion { tasks, .. }
                | IndexOperation::Settings { tasks, .. }
                | IndexOperation::DocumentClear { tasks, .. } => {
                    tasks.iter().map(|task| task.uid).collect()
@ -227,7 +219,6 @@ impl IndexOperation {
    pub fn index_uid(&self) -> &str {
        match self {
            IndexOperation::DocumentOperation { index_uid, .. }
-            | IndexOperation::DocumentDeletion { index_uid, .. }
            | IndexOperation::IndexDocumentDeletionByFilter { index_uid, .. }
            | IndexOperation::DocumentClear { index_uid, .. }
            | IndexOperation::Settings { index_uid, .. }
@ -243,9 +234,6 @@ impl fmt::Display for IndexOperation {
            IndexOperation::DocumentOperation { .. } => {
                f.write_str("IndexOperation::DocumentOperation")
            }
-            IndexOperation::DocumentDeletion { .. } => {
-                f.write_str("IndexOperation::DocumentDeletion")
-            }
            IndexOperation::IndexDocumentDeletionByFilter { .. } => {
                f.write_str("IndexOperation::IndexDocumentDeletionByFilter")
            }
@ -348,18 +336,27 @@ impl IndexScheduler {
            BatchKind::DocumentDeletion { deletion_ids } => {
                let tasks = self.get_existing_tasks(rtxn, deletion_ids)?;

-                let mut documents = Vec::new();
+                let mut operations = Vec::with_capacity(tasks.len());
+                let mut documents_counts = Vec::with_capacity(tasks.len());
                for task in &tasks {
                    match task.kind {
                        KindWithContent::DocumentDeletion { ref documents_ids, .. } => {
-                            documents.push(documents_ids.clone())
+                            operations.push(DocumentOperation::Delete(documents_ids.clone()));
+                            documents_counts.push(documents_ids.len() as u64);
                        }
                        _ => unreachable!(),
                    }
                }

                Ok(Some(Batch::IndexOperation {
-                    op: IndexOperation::DocumentDeletion { index_uid, documents, tasks },
+                    op: IndexOperation::DocumentOperation {
+                        index_uid,
+                        primary_key: None,
+                        method: IndexDocumentsMethod::ReplaceDocuments,
+                        documents_counts,
+                        operations,
+                        tasks,
+                    },
                    must_create_index,
                }))
            }
@ -825,6 +822,10 @@ impl IndexScheduler {
                // 2. dump the tasks
                let mut dump_tasks = dump.create_tasks_queue()?;
                for ret in self.all_tasks.iter(&rtxn)? {
+                    if self.must_stop_processing.get() {
+                        return Err(Error::AbortedTask);
+                    }
+
                    let (_, mut t) = ret?;
                    let status = t.status;
                    let content_file = t.content_uuid();
@ -845,6 +846,9 @@ impl IndexScheduler {

                    // 2.1. Dump the `content_file` associated with the task if there is one and the task is not finished yet.
                    if let Some(content_file) = content_file {
+                        if self.must_stop_processing.get() {
+                            return Err(Error::AbortedTask);
+                        }
                        if status == Status::Enqueued {
                            let content_file = self.file_store.get_update(content_file)?;

@ -884,6 +888,9 @@ impl IndexScheduler {

                    // 3.1. Dump the documents
                    for ret in index.all_documents(&rtxn)? {
+                        if self.must_stop_processing.get() {
+                            return Err(Error::AbortedTask);
+                        }
                        let (_id, doc) = ret?;
                        let document = milli::obkv_to_json(&all_fields, &fields_ids_map, doc)?;
                        index_dumper.push_document(&document)?;
@ -903,6 +910,9 @@ impl IndexScheduler {
                    "[year repr:full][month repr:numerical][day padding:zero]-[hour padding:zero][minute padding:zero][second padding:zero][subsecond digits:3]"
                )).unwrap();

+                if self.must_stop_processing.get() {
+                    return Err(Error::AbortedTask);
+                }
                let path = self.dumps_path.join(format!("{}.dump", dump_uid));
                let file = File::create(path)?;
                dump.persist_to(BufWriter::new(file))?;
@ -923,6 +933,10 @@ impl IndexScheduler {
                    self.index_mapper.index(&rtxn, &index_uid)?
                };

+                // the index operation can take a long time, so save this handle to make it available to the search for the duration of the tick
+                *self.currently_updating_index.write().unwrap() =
+                    Some((index_uid.clone(), index.clone()));
+
                let mut index_wtxn = index.write_txn()?;
                let tasks = self.apply_index_operation(&mut index_wtxn, &index, op)?;
                index_wtxn.commit()?;
@ -1191,7 +1205,7 @@ impl IndexScheduler {
                    index,
                    indexer_config,
                    config,
-                    |indexing_step| debug!("update: {:?}", indexing_step),
+                    |indexing_step| trace!("update: {:?}", indexing_step),
                    || must_stop_processing.get(),
                )?;

@ -1238,7 +1252,8 @@ impl IndexScheduler {
                            let (new_builder, user_result) =
                                builder.remove_documents(document_ids)?;
                            builder = new_builder;
-
+                            // Uses Invariant: remove documents actually always returns Ok for the inner result
+                            let count = user_result.unwrap();
                            let provided_ids =
                                if let Some(Details::DocumentDeletion { provided_ids, .. }) =
                                    task.details
@ -1249,23 +1264,11 @@ impl IndexScheduler {
                                    unreachable!();
                                };

-                            match user_result {
-                                Ok(count) => {
-                                    task.status = Status::Succeeded;
-                                    task.details = Some(Details::DocumentDeletion {
-                                        provided_ids,
-                                        deleted_documents: Some(count),
-                                    });
-                                }
-                                Err(e) => {
-                                    task.status = Status::Failed;
-                                    task.details = Some(Details::DocumentDeletion {
-                                        provided_ids,
-                                        deleted_documents: Some(0),
-                                    });
-                                    task.error = Some(milli::Error::from(e).into());
-                                }
-                            }
+                            task.status = Status::Succeeded;
+                            task.details = Some(Details::DocumentDeletion {
+                                provided_ids,
+                                deleted_documents: Some(count),
+                            });
                        }
                    }
                }
@ -1280,31 +1283,13 @@ impl IndexScheduler {
                        milli::update::Settings::new(index_wtxn, index, indexer_config);
                    builder.reset_primary_key();
                    builder.execute(
-                        |indexing_step| debug!("update: {:?}", indexing_step),
+                        |indexing_step| trace!("update: {:?}", indexing_step),
                        || must_stop_processing.clone().get(),
                    )?;
                }

                Ok(tasks)
            }
-            IndexOperation::DocumentDeletion { index_uid: _, documents, mut tasks } => {
-                let mut builder = milli::update::DeleteDocuments::new(index_wtxn, index)?;
-                documents.iter().flatten().for_each(|id| {
-                    builder.delete_external_id(id);
-                });
-
-                let DocumentDeletionResult { deleted_documents, .. } = builder.execute()?;
-
-                for (task, documents) in tasks.iter_mut().zip(documents) {
-                    task.status = Status::Succeeded;
-                    task.details = Some(Details::DocumentDeletion {
-                        provided_ids: documents.len(),
-                        deleted_documents: Some(deleted_documents.min(documents.len() as u64)),
-                    });
-                }
-
-                Ok(tasks)
-            }
            IndexOperation::IndexDocumentDeletionByFilter { mut task, index_uid: _ } => {
                let filter =
                    if let KindWithContent::DocumentDeletionByFilter { filter_expr, .. } =
@ -1314,7 +1299,13 @@ impl IndexScheduler {
                    } else {
                        unreachable!()
                    };
-                let deleted_documents = delete_document_by_filter(index_wtxn, filter, index);
+                let deleted_documents = delete_document_by_filter(
+                    index_wtxn,
+                    filter,
+                    self.index_mapper.indexer_config(),
+                    self.must_stop_processing.clone(),
+                    index,
+                );
                let original_filter = if let Some(Details::DocumentDeletionByFilter {
                    original_filter,
                    deleted_documents: _,
@ -1548,6 +1539,8 @@ impl IndexScheduler {
 fn delete_document_by_filter<'a>(
    wtxn: &mut RwTxn<'a, '_>,
    filter: &serde_json::Value,
+    indexer_config: &IndexerConfig,
+    must_stop_processing: MustStopProcessing,
    index: &'a Index,
 ) -> Result<u64> {
    let filter = Filter::from_json(filter)?;
@ -1558,9 +1551,26 @@ fn delete_document_by_filter<'a>(
            }
            e => e.into(),
        })?;
-        let mut delete_operation = DeleteDocuments::new(wtxn, index)?;
-        delete_operation.delete_documents(&candidates);
-        delete_operation.execute().map(|result| result.deleted_documents)?
+
+        let config = IndexDocumentsConfig {
+            update_method: IndexDocumentsMethod::ReplaceDocuments,
+            ..Default::default()
+        };
+
+        let mut builder = milli::update::IndexDocuments::new(
+            wtxn,
+            index,
+            indexer_config,
+            config,
+            |indexing_step| debug!("update: {:?}", indexing_step),
+            || must_stop_processing.get(),
+        )?;
+
+        let (new_builder, count) = builder.remove_documents_from_db_no_batch(&candidates)?;
+        builder = new_builder;
+
+        let _ = builder.execute()?;
+        count
    } else {
        0
    })
--- a/index-scheduler/src/error.rs
+++ b/index-scheduler/src/error.rs
@ -108,6 +108,8 @@ pub enum Error {
    TaskDeletionWithEmptyQuery,
    #[error("Query parameters to filter the tasks to cancel are missing. Available query parameters are: `uids`, `indexUids`, `statuses`, `types`, `canceledBy`, `beforeEnqueuedAt`, `afterEnqueuedAt`, `beforeStartedAt`, `afterStartedAt`, `beforeFinishedAt`, `afterFinishedAt`.")]
    TaskCancelationWithEmptyQuery,
+    #[error("Aborted task")]
+    AbortedTask,

    #[error(transparent)]
    Dump(#[from] dump::Error),
@ -175,6 +177,7 @@ impl Error {
            | Error::TaskNotFound(_)
            | Error::TaskDeletionWithEmptyQuery
            | Error::TaskCancelationWithEmptyQuery
+            | Error::AbortedTask
            | Error::Dump(_)
            | Error::Heed(_)
            | Error::Milli(_)
@ -236,6 +239,9 @@ impl ErrorCode for Error {
            Error::TaskDatabaseUpdate(_) => Code::Internal,
            Error::CreateBatch(_) => Code::Internal,

+            // This one should never be seen by the end user
+            Error::AbortedTask => Code::Internal,
+
            #[cfg(test)]
            Error::PlannedFailure => Code::Internal,
        }
--- a/index-scheduler/src/insta_snapshot.rs
+++ b/index-scheduler/src/insta_snapshot.rs
@ -36,9 +36,11 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String {
        snapshots_path: _,
        auth_path: _,
        version_file_path: _,
+        webhook_url: _,
        test_breakpoint_sdr: _,
        planned_failures: _,
        run_loop_iteration: _,
+        currently_updating_index: _,
    } = scheduler;

    let rtxn = env.read_txn().unwrap();
--- a/index-scheduler/src/lib.rs
+++ b/index-scheduler/src/lib.rs
@ -27,13 +27,14 @@ mod index_mapper;
 mod insta_snapshot;
 mod lru;
 mod utils;
-mod uuid_codec;
+pub mod uuid_codec;

 pub type Result<T> = std::result::Result<T, Error>;
 pub type TaskId = u32;

 use std::collections::{BTreeMap, HashMap};
 use std::fs::File;
+use std::io::{self, BufReader, Read};
 use std::ops::{Bound, RangeBounds};
 use std::path::{Path, PathBuf};
 use std::sync::atomic::AtomicBool;
@ -45,6 +46,8 @@ use dump::{KindDump, TaskDump, UpdateFile};
 pub use error::Error;
 pub use features::RoFeatures;
 use file_store::FileStore;
+use flate2::bufread::GzEncoder;
+use flate2::Compression;
 use meilisearch_types::error::ResponseError;
 use meilisearch_types::features::{InstanceTogglableFeatures, RuntimeTogglableFeatures};
 use meilisearch_types::heed::types::{OwnedType, SerdeBincode, SerdeJson, Str};
@ -52,6 +55,7 @@ use meilisearch_types::heed::{self, Database, Env, RoTxn, RwTxn};
 use meilisearch_types::milli::documents::DocumentsBatchBuilder;
 use meilisearch_types::milli::update::IndexerConfig;
 use meilisearch_types::milli::{self, CboRoaringBitmapCodec, Index, RoaringBitmapCodec, BEU32};
+use meilisearch_types::task_view::TaskView;
 use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task};
 use puffin::FrameView;
 use roaring::RoaringBitmap;
@ -169,8 +173,8 @@ impl ProcessingTasks {
    }

    /// Set the processing tasks to an empty list
-    fn stop_processing(&mut self) {
-        self.processing = RoaringBitmap::new();
+    fn stop_processing(&mut self) -> RoaringBitmap {
+        std::mem::take(&mut self.processing)
    }

    /// Returns `true` if there, at least, is one task that is currently processing that we must stop.
@ -240,6 +244,7 @@ pub struct IndexSchedulerOptions {
    pub snapshots_path: PathBuf,
    /// The path to the folder containing the dumps.
    pub dumps_path: PathBuf,
+    pub webhook_url: Option<String>,
    /// The maximum size, in bytes, of the task index.
    pub task_db_size: usize,
    /// The size, in bytes, with which a meilisearch index is opened the first time of each meilisearch index.
@ -316,6 +321,9 @@ pub struct IndexScheduler {
    /// the finished tasks automatically.
    pub(crate) max_number_of_tasks: usize,

+    /// The webhook url we should send tasks to after processing every batches.
+    pub(crate) webhook_url: Option<String>,
+
    /// A frame to output the indexation profiling files to disk.
    pub(crate) puffin_frame: Arc<puffin::GlobalFrameView>,

@ -331,6 +339,10 @@ pub struct IndexScheduler {
    /// The path to the version file of Meilisearch.
    pub(crate) version_file_path: PathBuf,

+    /// A few types of long running batches of tasks that act on a single index set this field
+    /// so that a handle to the index is available from other threads (search) in an optimized manner.
+    currently_updating_index: Arc<RwLock<Option<(String, Index)>>>,
+
    // ================= test
    // The next entry is dedicated to the tests.
    /// Provide a way to set a breakpoint in multiple part of the scheduler.
@ -374,6 +386,8 @@ impl IndexScheduler {
            dumps_path: self.dumps_path.clone(),
            auth_path: self.auth_path.clone(),
            version_file_path: self.version_file_path.clone(),
+            webhook_url: self.webhook_url.clone(),
+            currently_updating_index: self.currently_updating_index.clone(),
            #[cfg(test)]
            test_breakpoint_sdr: self.test_breakpoint_sdr.clone(),
            #[cfg(test)]
@ -470,6 +484,8 @@ impl IndexScheduler {
            snapshots_path: options.snapshots_path,
            auth_path: options.auth_path,
            version_file_path: options.version_file_path,
+            webhook_url: options.webhook_url,
+            currently_updating_index: Arc::new(RwLock::new(None)),

            #[cfg(test)]
            test_breakpoint_sdr,
@ -652,6 +668,13 @@ impl IndexScheduler {
    /// If you need to fetch information from or perform an action on all indexes,
    /// see the `try_for_each_index` function.
    pub fn index(&self, name: &str) -> Result<Index> {
+        if let Some((current_name, current_index)) =
+            self.currently_updating_index.read().unwrap().as_ref()
+        {
+            if current_name == name {
+                return Ok(current_index.clone());
+            }
+        }
        let rtxn = self.env.read_txn()?;
        self.index_mapper.index(&rtxn, name)
    }
@ -1133,6 +1156,9 @@ impl IndexScheduler {
            handle.join().unwrap_or(Err(Error::ProcessBatchPanicked))
        };

+        // Reset the currently updating index to relinquish the index handle
+        *self.currently_updating_index.write().unwrap() = None;
+
        #[cfg(test)]
        self.maybe_fail(tests::FailureLocation::AcquiringWtxn)?;

@ -1167,7 +1193,8 @@ impl IndexScheduler {
            // If we have an abortion error we must stop the tick here and re-schedule tasks.
            Err(Error::Milli(milli::Error::InternalError(
                milli::InternalError::AbortedIndexation,
-            ))) => {
+            )))
+            | Err(Error::AbortedTask) => {
                #[cfg(test)]
                self.breakpoint(Breakpoint::AbortedIndexation);
                wtxn.abort().map_err(Error::HeedTransaction)?;
@ -1223,19 +1250,92 @@ impl IndexScheduler {
            }
        }

-        self.processing_tasks.write().unwrap().stop_processing();
+        let processed = self.processing_tasks.write().unwrap().stop_processing();

        #[cfg(test)]
        self.maybe_fail(tests::FailureLocation::CommittingWtxn)?;

        wtxn.commit().map_err(Error::HeedTransaction)?;

+        // We shouldn't crash the tick function if we can't send data to the webhook.
+        let _ = self.notify_webhook(&processed);
+
        #[cfg(test)]
        self.breakpoint(Breakpoint::AfterProcessing);

        Ok(TickOutcome::TickAgain(processed_tasks))
    }

+    /// Once the tasks changes have been commited we must send all the tasks that were updated to our webhook if there is one.
+    fn notify_webhook(&self, updated: &RoaringBitmap) -> Result<()> {
+        if let Some(ref url) = self.webhook_url {
+            struct TaskReader<'a, 'b> {
+                rtxn: &'a RoTxn<'a>,
+                index_scheduler: &'a IndexScheduler,
+                tasks: &'b mut roaring::bitmap::Iter<'b>,
+                buffer: Vec<u8>,
+                written: usize,
+            }
+
+            impl<'a, 'b> Read for TaskReader<'a, 'b> {
+                fn read(&mut self, mut buf: &mut [u8]) -> std::io::Result<usize> {
+                    if self.buffer.is_empty() {
+                        match self.tasks.next() {
+                            None => return Ok(0),
+                            Some(task_id) => {
+                                let task = self
+                                    .index_scheduler
+                                    .get_task(self.rtxn, task_id)
+                                    .map_err(|err| io::Error::new(io::ErrorKind::Other, err))?
+                                    .ok_or_else(|| {
+                                        io::Error::new(
+                                            io::ErrorKind::Other,
+                                            Error::CorruptedTaskQueue,
+                                        )
+                                    })?;
+
+                                serde_json::to_writer(
+                                    &mut self.buffer,
+                                    &TaskView::from_task(&task),
+                                )?;
+                                self.buffer.push(b'\n');
+                            }
+                        }
+                    }
+
+                    let mut to_write = &self.buffer[self.written..];
+                    let wrote = io::copy(&mut to_write, &mut buf)?;
+                    self.written += wrote as usize;
+
+                    // we wrote everything and must refresh our buffer on the next call
+                    if self.written == self.buffer.len() {
+                        self.written = 0;
+                        self.buffer.clear();
+                    }
+
+                    Ok(wrote as usize)
+                }
+            }
+
+            let rtxn = self.env.read_txn()?;
+
+            let task_reader = TaskReader {
+                rtxn: &rtxn,
+                index_scheduler: self,
+                tasks: &mut updated.into_iter(),
+                buffer: Vec::with_capacity(50), // on average a task is around ~100 bytes
+                written: 0,
+            };
+
+            let reader = GzEncoder::new(BufReader::new(task_reader), Compression::default());
+            if let Err(e) = ureq::post(url).set("Content-Encoding", "gzip").send(reader) {
+                log::error!("While sending data to the webhook: {e}");
+            }
+        }
+
+        Ok(())
+    }
+
    /// Register a task to cleanup the task queue if needed
    fn cleanup_task_queue(&self) -> Result<()> {
        let rtxn = self.env.read_txn().map_err(Error::HeedTransaction)?;
@ -1615,6 +1715,7 @@ mod tests {
                indexes_path: tempdir.path().join("indexes"),
                snapshots_path: tempdir.path().join("snapshots"),
                dumps_path: tempdir.path().join("dumps"),
+                webhook_url: None,
                task_db_size: 1000 * 1000, // 1 MB, we don't use MiB on purpose.
                index_base_map_size: 1000 * 1000, // 1 MB, we don't use MiB on purpose.
                enable_mdb_writemap: false,
@ -4323,4 +4424,26 @@ mod tests {
        }
        "###);
    }
+
+    #[test]
+    fn cancel_processing_dump() {
+        let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]);
+
+        let dump_creation = KindWithContent::DumpCreation { keys: Vec::new(), instance_uid: None };
+        let dump_cancellation = KindWithContent::TaskCancelation {
+            query: "cancel dump".to_owned(),
+            tasks: RoaringBitmap::from_iter([0]),
+        };
+        let _ = index_scheduler.register(dump_creation).unwrap();
+        snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_dump_register");
+        handle.advance_till([Start, BatchCreated, InsideProcessBatch]);
+
+        let _ = index_scheduler.register(dump_cancellation).unwrap();
+        snapshot!(snapshot_index_scheduler(&index_scheduler), name: "cancel_registered");
+
+        snapshot!(format!("{:?}", handle.advance()), @"AbortedIndexation");
+
+        handle.advance_one_successful_batch();
+        snapshot!(snapshot_index_scheduler(&index_scheduler), name: "cancel_processed");
+    }
 }
--- a/index-scheduler/src/snapshots/lib.rs/cancel_processing_dump/after_dump_register.snap
+++ b/index-scheduler/src/snapshots/lib.rs/cancel_processing_dump/after_dump_register.snap
@ -0,0 +1,35 @@
+---
+source: index-scheduler/src/lib.rs
+---
+### Autobatching Enabled = true
+### Processing Tasks:
+[]
+----------------------------------------------------------------------
+### All Tasks:
+0 {uid: 0, status: enqueued, details: { dump_uid: None }, kind: DumpCreation { keys: [], instance_uid: None }}
+----------------------------------------------------------------------
+### Status:
+enqueued [0,]
+----------------------------------------------------------------------
+### Kind:
+"dumpCreation" [0,]
+----------------------------------------------------------------------
+### Index Tasks:
+----------------------------------------------------------------------
+### Index Mapper:
+
+----------------------------------------------------------------------
+### Canceled By:
+
+----------------------------------------------------------------------
+### Enqueued At:
+[timestamp] [0,]
+----------------------------------------------------------------------
+### Started At:
+----------------------------------------------------------------------
+### Finished At:
+----------------------------------------------------------------------
+### File Store:
+
+----------------------------------------------------------------------
+
--- a/index-scheduler/src/snapshots/lib.rs/cancel_processing_dump/cancel_processed.snap
+++ b/index-scheduler/src/snapshots/lib.rs/cancel_processing_dump/cancel_processed.snap
@ -0,0 +1,45 @@
+---
+source: index-scheduler/src/lib.rs
+---
+### Autobatching Enabled = true
+### Processing Tasks:
+[]
+----------------------------------------------------------------------
+### All Tasks:
+0 {uid: 0, status: canceled, canceled_by: 1, details: { dump_uid: None }, kind: DumpCreation { keys: [], instance_uid: None }}
+1 {uid: 1, status: succeeded, details: { matched_tasks: 1, canceled_tasks: Some(0), original_filter: "cancel dump" }, kind: TaskCancelation { query: "cancel dump", tasks: RoaringBitmap<[0]> }}
+----------------------------------------------------------------------
+### Status:
+enqueued []
+succeeded [1,]
+canceled [0,]
+----------------------------------------------------------------------
+### Kind:
+"taskCancelation" [1,]
+"dumpCreation" [0,]
+----------------------------------------------------------------------
+### Index Tasks:
+----------------------------------------------------------------------
+### Index Mapper:
+
+----------------------------------------------------------------------
+### Canceled By:
+1 [0,]
+
+----------------------------------------------------------------------
+### Enqueued At:
+[timestamp] [0,]
+[timestamp] [1,]
+----------------------------------------------------------------------
+### Started At:
+[timestamp] [0,]
+[timestamp] [1,]
+----------------------------------------------------------------------
+### Finished At:
+[timestamp] [0,]
+[timestamp] [1,]
+----------------------------------------------------------------------
+### File Store:
+
+----------------------------------------------------------------------
+
--- a/index-scheduler/src/snapshots/lib.rs/cancel_processing_dump/cancel_registered.snap
+++ b/index-scheduler/src/snapshots/lib.rs/cancel_processing_dump/cancel_registered.snap
@ -0,0 +1,38 @@
+---
+source: index-scheduler/src/lib.rs
+---
+### Autobatching Enabled = true
+### Processing Tasks:
+[0,]
+----------------------------------------------------------------------
+### All Tasks:
+0 {uid: 0, status: enqueued, details: { dump_uid: None }, kind: DumpCreation { keys: [], instance_uid: None }}
+1 {uid: 1, status: enqueued, details: { matched_tasks: 1, canceled_tasks: None, original_filter: "cancel dump" }, kind: TaskCancelation { query: "cancel dump", tasks: RoaringBitmap<[0]> }}
+----------------------------------------------------------------------
+### Status:
+enqueued [0,1,]
+----------------------------------------------------------------------
+### Kind:
+"taskCancelation" [1,]
+"dumpCreation" [0,]
+----------------------------------------------------------------------
+### Index Tasks:
+----------------------------------------------------------------------
+### Index Mapper:
+
+----------------------------------------------------------------------
+### Canceled By:
+
+----------------------------------------------------------------------
+### Enqueued At:
+[timestamp] [0,]
+[timestamp] [1,]
+----------------------------------------------------------------------
+### Started At:
+----------------------------------------------------------------------
+### Finished At:
+----------------------------------------------------------------------
+### File Store:
+
+----------------------------------------------------------------------
+
--- a/meilisearch-types/Cargo.toml
+++ b/meilisearch-types/Cargo.toml
@ -50,6 +50,7 @@ hebrew = ["milli/hebrew"]
 japanese = ["milli/japanese"]
 # thai specialized tokenization
 thai = ["milli/thai"]
-
 # allow greek specialized tokenization
 greek = ["milli/greek"]
+# allow khmer specialized tokenization
+khmer = ["milli/khmer"]
--- a/meilisearch-types/src/error.rs
+++ b/meilisearch-types/src/error.rs
@ -235,7 +235,6 @@ InvalidSearchCropMarker               , InvalidRequest       , BAD_REQUEST ;
 InvalidSearchFacets                   , InvalidRequest       , BAD_REQUEST ;
 InvalidFacetSearchFacetName           , InvalidRequest       , BAD_REQUEST ;
 InvalidSearchFilter                   , InvalidRequest       , BAD_REQUEST ;
-InvalidSearchBoostingFilter           , InvalidRequest       , BAD_REQUEST ;
 InvalidSearchHighlightPostTag         , InvalidRequest       , BAD_REQUEST ;
 InvalidSearchHighlightPreTag          , InvalidRequest       , BAD_REQUEST ;
 InvalidSearchHitsPerPage              , InvalidRequest       , BAD_REQUEST ;
@ -325,14 +324,12 @@ impl ErrorCode for milli::Error {
                    UserError::SerdeJson(_)
                    | UserError::InvalidLmdbOpenOptions
                    | UserError::DocumentLimitReached
-                    | UserError::AccessingSoftDeletedDocument { .. }
                    | UserError::UnknownInternalDocumentId { .. } => Code::Internal,
                    UserError::InvalidStoreFile => Code::InvalidStoreFile,
                    UserError::NoSpaceLeftOnDevice => Code::NoSpaceLeftOnDevice,
                    UserError::MaxDatabaseSizeReached => Code::DatabaseSizeLimitReached,
                    UserError::AttributeLimitReached => Code::MaxFieldsLimitExceeded,
                    UserError::InvalidFilter(_) => Code::InvalidSearchFilter,
-                    UserError::InvalidBoostingFilter(_) => Code::InvalidSearchBoostingFilter,
                    UserError::InvalidFilterExpression(..) => Code::InvalidSearchFilter,
                    UserError::MissingDocumentId { .. } => Code::MissingDocumentId,
                    UserError::InvalidDocumentId { .. } | UserError::TooManyDocumentIds { .. } => {
--- a/meilisearch-types/src/lib.rs
+++ b/meilisearch-types/src/lib.rs
@ -9,6 +9,7 @@ pub mod index_uid_pattern;
 pub mod keys;
 pub mod settings;
 pub mod star_or;
+pub mod task_view;
 pub mod tasks;
 pub mod versioning;
 pub use milli::{heed, Index};
--- a/meilisearch-types/src/settings.rs
+++ b/meilisearch-types/src/settings.rs
@ -9,7 +9,7 @@ use std::str::FromStr;
 use deserr::{DeserializeError, Deserr, ErrorKind, MergeWithError, ValuePointerRef};
 use fst::IntoStreamer;
 use milli::update::Setting;
-use milli::{Index, RankingRule, RankingRuleError, DEFAULT_VALUES_PER_FACET};
+use milli::{Criterion, CriterionError, Index, DEFAULT_VALUES_PER_FACET};
 use serde::{Deserialize, Serialize, Serializer};

 use crate::deserr::DeserrJsonError;
@ -117,10 +117,10 @@ pub struct PaginationSettings {
    pub max_total_hits: Setting<usize>,
 }

-impl MergeWithError<milli::RankingRuleError> for DeserrJsonError<InvalidSettingsRankingRules> {
+impl MergeWithError<milli::CriterionError> for DeserrJsonError<InvalidSettingsRankingRules> {
    fn merge(
        _self_: Option<Self>,
-        other: milli::RankingRuleError,
+        other: milli::CriterionError,
        merge_location: ValuePointerRef,
    ) -> ControlFlow<Self, Self> {
        Self::error::<Infallible>(
@ -344,9 +344,9 @@ pub fn apply_settings_to_builder(

    match settings.ranking_rules {
        Setting::Set(ref criteria) => {
-            builder.set_ranking_rules(criteria.iter().map(|c| c.clone().into()).collect())
+            builder.set_criteria(criteria.iter().map(|c| c.clone().into()).collect())
        }
-        Setting::Reset => builder.reset_ranking_rules(),
+        Setting::Reset => builder.reset_criteria(),
        Setting::NotSet => (),
    }

@ -578,13 +578,11 @@ pub fn settings(
 }

 #[derive(Debug, Clone, PartialEq, Eq, Deserr)]
-#[deserr(try_from(&String) = FromStr::from_str -> RankingRuleError)]
+#[deserr(try_from(&String) = FromStr::from_str -> CriterionError)]
 pub enum RankingRuleView {
    /// Sorted by decreasing number of matched query terms.
    /// Query words at the front of an attribute is considered better than if it was at the back.
    Words,
-    /// Sorted by documents matching the given filter and then documents not matching it.
-    Boost(String),
    /// Sorted by increasing number of typos.
    Typo,
    /// Sorted by increasing distance between matched query terms.
@ -607,7 +605,7 @@ impl Serialize for RankingRuleView {
    where
        S: Serializer,
    {
-        serializer.serialize_str(&format!("{}", RankingRule::from(self.clone())))
+        serializer.serialize_str(&format!("{}", Criterion::from(self.clone())))
    }
 }
 impl<'de> Deserialize<'de> for RankingRuleView {
@ -625,7 +623,7 @@ impl<'de> Deserialize<'de> for RankingRuleView {
            where
                E: serde::de::Error,
            {
-                let criterion = RankingRule::from_str(v).map_err(|_| {
+                let criterion = Criterion::from_str(v).map_err(|_| {
                    E::invalid_value(serde::de::Unexpected::Str(v), &"a valid ranking rule")
                })?;
                Ok(RankingRuleView::from(criterion))
@ -635,44 +633,42 @@ impl<'de> Deserialize<'de> for RankingRuleView {
    }
 }
 impl FromStr for RankingRuleView {
-    type Err = <RankingRule as FromStr>::Err;
+    type Err = <Criterion as FromStr>::Err;

    fn from_str(s: &str) -> Result<Self, Self::Err> {
-        Ok(RankingRuleView::from(RankingRule::from_str(s)?))
+        Ok(RankingRuleView::from(Criterion::from_str(s)?))
    }
 }
 impl fmt::Display for RankingRuleView {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        fmt::Display::fmt(&RankingRule::from(self.clone()), f)
+        fmt::Display::fmt(&Criterion::from(self.clone()), f)
    }
 }
-impl From<RankingRule> for RankingRuleView {
-    fn from(value: RankingRule) -> Self {
+impl From<Criterion> for RankingRuleView {
+    fn from(value: Criterion) -> Self {
        match value {
-            RankingRule::Words => RankingRuleView::Words,
-            RankingRule::FilterBoosting(filter) => RankingRuleView::Boost(filter),
-            RankingRule::Typo => RankingRuleView::Typo,
-            RankingRule::Proximity => RankingRuleView::Proximity,
-            RankingRule::Attribute => RankingRuleView::Attribute,
-            RankingRule::Sort => RankingRuleView::Sort,
-            RankingRule::Exactness => RankingRuleView::Exactness,
-            RankingRule::Asc(x) => RankingRuleView::Asc(x),
-            RankingRule::Desc(x) => RankingRuleView::Desc(x),
+            Criterion::Words => RankingRuleView::Words,
+            Criterion::Typo => RankingRuleView::Typo,
+            Criterion::Proximity => RankingRuleView::Proximity,
+            Criterion::Attribute => RankingRuleView::Attribute,
+            Criterion::Sort => RankingRuleView::Sort,
+            Criterion::Exactness => RankingRuleView::Exactness,
+            Criterion::Asc(x) => RankingRuleView::Asc(x),
+            Criterion::Desc(x) => RankingRuleView::Desc(x),
        }
    }
 }
-impl From<RankingRuleView> for RankingRule {
+impl From<RankingRuleView> for Criterion {
    fn from(value: RankingRuleView) -> Self {
        match value {
-            RankingRuleView::Words => RankingRule::Words,
-            RankingRuleView::Boost(filter) => RankingRule::FilterBoosting(filter),
-            RankingRuleView::Typo => RankingRule::Typo,
-            RankingRuleView::Proximity => RankingRule::Proximity,
-            RankingRuleView::Attribute => RankingRule::Attribute,
-            RankingRuleView::Sort => RankingRule::Sort,
-            RankingRuleView::Exactness => RankingRule::Exactness,
-            RankingRuleView::Asc(x) => RankingRule::Asc(x),
-            RankingRuleView::Desc(x) => RankingRule::Desc(x),
+            RankingRuleView::Words => Criterion::Words,
+            RankingRuleView::Typo => Criterion::Typo,
+            RankingRuleView::Proximity => Criterion::Proximity,
+            RankingRuleView::Attribute => Criterion::Attribute,
+            RankingRuleView::Sort => Criterion::Sort,
+            RankingRuleView::Exactness => Criterion::Exactness,
+            RankingRuleView::Asc(x) => Criterion::Asc(x),
+            RankingRuleView::Desc(x) => Criterion::Desc(x),
        }
    }
 }
--- a/meilisearch-types/src/task_view.rs
+++ b/meilisearch-types/src/task_view.rs
@ -0,0 +1,139 @@
+use serde::Serialize;
+use time::{Duration, OffsetDateTime};
+
+use crate::error::ResponseError;
+use crate::settings::{Settings, Unchecked};
+use crate::tasks::{serialize_duration, Details, IndexSwap, Kind, Status, Task, TaskId};
+
+#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
+#[serde(rename_all = "camelCase")]
+pub struct TaskView {
+    pub uid: TaskId,
+    #[serde(default)]
+    pub index_uid: Option<String>,
+    pub status: Status,
+    #[serde(rename = "type")]
+    pub kind: Kind,
+    pub canceled_by: Option<TaskId>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub details: Option<DetailsView>,
+    pub error: Option<ResponseError>,
+    #[serde(serialize_with = "serialize_duration", default)]
+    pub duration: Option<Duration>,
+    #[serde(with = "time::serde::rfc3339")]
+    pub enqueued_at: OffsetDateTime,
+    #[serde(with = "time::serde::rfc3339::option", default)]
+    pub started_at: Option<OffsetDateTime>,
+    #[serde(with = "time::serde::rfc3339::option", default)]
+    pub finished_at: Option<OffsetDateTime>,
+}
+
+impl TaskView {
+    pub fn from_task(task: &Task) -> TaskView {
+        TaskView {
+            uid: task.uid,
+            index_uid: task.index_uid().map(ToOwned::to_owned),
+            status: task.status,
+            kind: task.kind.as_kind(),
+            canceled_by: task.canceled_by,
+            details: task.details.clone().map(DetailsView::from),
+            error: task.error.clone(),
+            duration: task.started_at.zip(task.finished_at).map(|(start, end)| end - start),
+            enqueued_at: task.enqueued_at,
+            started_at: task.started_at,
+            finished_at: task.finished_at,
+        }
+    }
+}
+
+#[derive(Default, Debug, PartialEq, Eq, Clone, Serialize)]
+#[serde(rename_all = "camelCase")]
+pub struct DetailsView {
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub received_documents: Option<u64>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub indexed_documents: Option<Option<u64>>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub primary_key: Option<Option<String>>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub provided_ids: Option<usize>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub deleted_documents: Option<Option<u64>>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub matched_tasks: Option<u64>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub canceled_tasks: Option<Option<u64>>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub deleted_tasks: Option<Option<u64>>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub original_filter: Option<Option<String>>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub dump_uid: Option<Option<String>>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    #[serde(flatten)]
+    pub settings: Option<Box<Settings<Unchecked>>>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub swaps: Option<Vec<IndexSwap>>,
+}
+
+impl From<Details> for DetailsView {
+    fn from(details: Details) -> Self {
+        match details {
+            Details::DocumentAdditionOrUpdate { received_documents, indexed_documents } => {
+                DetailsView {
+                    received_documents: Some(received_documents),
+                    indexed_documents: Some(indexed_documents),
+                    ..DetailsView::default()
+                }
+            }
+            Details::SettingsUpdate { settings } => {
+                DetailsView { settings: Some(settings), ..DetailsView::default() }
+            }
+            Details::IndexInfo { primary_key } => {
+                DetailsView { primary_key: Some(primary_key), ..DetailsView::default() }
+            }
+            Details::DocumentDeletion {
+                provided_ids: received_document_ids,
+                deleted_documents,
+            } => DetailsView {
+                provided_ids: Some(received_document_ids),
+                deleted_documents: Some(deleted_documents),
+                original_filter: Some(None),
+                ..DetailsView::default()
+            },
+            Details::DocumentDeletionByFilter { original_filter, deleted_documents } => {
+                DetailsView {
+                    provided_ids: Some(0),
+                    original_filter: Some(Some(original_filter)),
+                    deleted_documents: Some(deleted_documents),
+                    ..DetailsView::default()
+                }
+            }
+            Details::ClearAll { deleted_documents } => {
+                DetailsView { deleted_documents: Some(deleted_documents), ..DetailsView::default() }
+            }
+            Details::TaskCancelation { matched_tasks, canceled_tasks, original_filter } => {
+                DetailsView {
+                    matched_tasks: Some(matched_tasks),
+                    canceled_tasks: Some(canceled_tasks),
+                    original_filter: Some(Some(original_filter)),
+                    ..DetailsView::default()
+                }
+            }
+            Details::TaskDeletion { matched_tasks, deleted_tasks, original_filter } => {
+                DetailsView {
+                    matched_tasks: Some(matched_tasks),
+                    deleted_tasks: Some(deleted_tasks),
+                    original_filter: Some(Some(original_filter)),
+                    ..DetailsView::default()
+                }
+            }
+            Details::Dump { dump_uid } => {
+                DetailsView { dump_uid: Some(dump_uid), ..DetailsView::default() }
+            }
+            Details::IndexSwap { swaps } => {
+                DetailsView { swaps: Some(swaps), ..Default::default() }
+            }
+        }
+    }
+}
--- a/meilisearch/Cargo.toml
+++ b/meilisearch/Cargo.toml
@ -104,6 +104,7 @@ walkdir = "2.3.3"
 yaup = "0.2.1"
 serde_urlencoded = "0.7.1"
 termcolor = "1.2.0"
+url = { version = "2.5.0", features = ["serde"] }

 [dev-dependencies]
 actix-rt = "2.8.0"
@ -150,6 +151,7 @@ hebrew = ["meilisearch-types/hebrew"]
 japanese = ["meilisearch-types/japanese"]
 thai = ["meilisearch-types/thai"]
 greek = ["meilisearch-types/greek"]
+khmer = ["meilisearch-types/khmer"]

 [package.metadata.mini-dashboard]
 assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.11/build.zip"
--- a/meilisearch/src/analytics/segment_analytics.rs
+++ b/meilisearch/src/analytics/segment_analytics.rs
@ -288,6 +288,7 @@ impl From<Opt> for Infos {
            http_addr,
            master_key: _,
            env,
+            task_webhook_url: _,
            max_index_size: _,
            max_task_db_size: _,
            http_payload_size_limit,
@ -629,7 +630,6 @@ impl SearchAggregator {
            show_ranking_score,
            show_ranking_score_details,
            filter,
-            boosting_filter,
            sort,
            facets: _,
            highlight_pre_tag,
@ -1003,7 +1003,6 @@ impl MultiSearchAggregator {
                    show_ranking_score_details: _,
                    show_matches_position: _,
                    filter: _,
-                    boosting_filter: _,
                    sort: _,
                    facets: _,
                    highlight_pre_tag: _,
--- a/meilisearch/src/lib.rs
+++ b/meilisearch/src/lib.rs
@ -228,6 +228,7 @@ fn open_or_create_database_unchecked(
            indexes_path: opt.db_path.join("indexes"),
            snapshots_path: opt.snapshot_dir.clone(),
            dumps_path: opt.dump_dir.clone(),
+            webhook_url: opt.task_webhook_url.as_ref().map(|url| url.to_string()),
            task_db_size: opt.max_task_db_size.get_bytes() as usize,
            index_base_map_size: opt.max_index_size.get_bytes() as usize,
            enable_mdb_writemap: opt.experimental_reduce_indexing_memory_usage,
@ -362,7 +363,7 @@ fn import_dump(
                update_method: IndexDocumentsMethod::ReplaceDocuments,
                ..Default::default()
            },
-            |indexing_step| log::debug!("update: {:?}", indexing_step),
+            |indexing_step| log::trace!("update: {:?}", indexing_step),
            || false,
        )?;

@ -397,6 +398,7 @@ pub fn configure_data(
        .app_data(web::Data::from(analytics))
        .app_data(
            web::JsonConfig::default()
+                .limit(http_payload_size_limit)
                .content_type(|mime| mime == mime::APPLICATION_JSON)
                .error_handler(|err, req: &HttpRequest| match err {
                    JsonPayloadError::ContentType => match req.headers().get(CONTENT_TYPE) {
--- a/meilisearch/src/option.rs
+++ b/meilisearch/src/option.rs
@ -21,6 +21,7 @@ use rustls::RootCertStore;
 use rustls_pemfile::{certs, pkcs8_private_keys, rsa_private_keys};
 use serde::{Deserialize, Serialize};
 use sysinfo::{RefreshKind, System, SystemExt};
+use url::Url;

 const POSSIBLE_ENV: [&str; 2] = ["development", "production"];

@ -28,6 +29,7 @@ const MEILI_DB_PATH: &str = "MEILI_DB_PATH";
 const MEILI_HTTP_ADDR: &str = "MEILI_HTTP_ADDR";
 const MEILI_MASTER_KEY: &str = "MEILI_MASTER_KEY";
 const MEILI_ENV: &str = "MEILI_ENV";
+const MEILI_TASK_WEBHOOK_URL: &str = "MEILI_TASK_WEBHOOK_URL";
 #[cfg(feature = "analytics")]
 const MEILI_NO_ANALYTICS: &str = "MEILI_NO_ANALYTICS";
 const MEILI_HTTP_PAYLOAD_SIZE_LIMIT: &str = "MEILI_HTTP_PAYLOAD_SIZE_LIMIT";
@ -154,6 +156,10 @@ pub struct Opt {
    #[serde(default = "default_env")]
    pub env: String,

+    /// Called whenever a task finishes so a third party can be notified.
+    #[clap(long, env = MEILI_TASK_WEBHOOK_URL)]
+    pub task_webhook_url: Option<Url>,
+
    /// Deactivates Meilisearch's built-in telemetry when provided.
    ///
    /// Meilisearch automatically collects data from all instances that do not opt out using this flag.
@ -368,6 +374,7 @@ impl Opt {
            http_addr,
            master_key,
            env,
+            task_webhook_url,
            max_index_size: _,
            max_task_db_size: _,
            http_payload_size_limit,
@ -401,6 +408,10 @@ impl Opt {
            export_to_env_if_not_present(MEILI_MASTER_KEY, master_key);
        }
        export_to_env_if_not_present(MEILI_ENV, env);
+        if let Some(task_webhook_url) = task_webhook_url {
+            export_to_env_if_not_present(MEILI_TASK_WEBHOOK_URL, task_webhook_url.to_string());
+        }
+
        #[cfg(feature = "analytics")]
        {
            export_to_env_if_not_present(MEILI_NO_ANALYTICS, no_analytics.to_string());
--- a/meilisearch/src/routes/indexes/documents.rs
+++ b/meilisearch/src/routes/indexes/documents.rs
@ -612,8 +612,8 @@ fn retrieve_document<S: AsRef<str>>(
    let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();

    let internal_id = index
-        .external_documents_ids(&txn)?
-        .get(doc_id.as_bytes())
+        .external_documents_ids()
+        .get(&txn, doc_id)?
        .ok_or_else(|| MeilisearchHttpError::DocumentNotFound(doc_id.to_string()))?;

    let document = index
--- a/meilisearch/src/routes/indexes/facet_search.rs
+++ b/meilisearch/src/routes/indexes/facet_search.rs
@ -111,7 +111,6 @@ impl From<FacetSearchQuery> for SearchQuery {
            show_ranking_score: false,
            show_ranking_score_details: false,
            filter,
-            boosting_filter: None,
            sort: None,
            facets: None,
            highlight_pre_tag: DEFAULT_HIGHLIGHT_PRE_TAG(),
--- a/meilisearch/src/routes/indexes/search.rs
+++ b/meilisearch/src/routes/indexes/search.rs
@ -54,8 +54,6 @@ pub struct SearchQueryGet {
    attributes_to_highlight: Option<CS<String>>,
    #[deserr(default, error = DeserrQueryParamError<InvalidSearchFilter>)]
    filter: Option<String>,
-    #[deserr(default, error = DeserrQueryParamError<InvalidSearchBoostingFilter>)]
-    boosting_filter: Option<String>,
    #[deserr(default, error = DeserrQueryParamError<InvalidSearchSort>)]
    sort: Option<String>,
    #[deserr(default, error = DeserrQueryParamError<InvalidSearchShowMatchesPosition>)]
@ -88,14 +86,6 @@ impl From<SearchQueryGet> for SearchQuery {
            None => None,
        };

-        let boosting_filter = match other.boosting_filter {
-            Some(f) => match serde_json::from_str(&f) {
-                Ok(v) => Some(v),
-                _ => Some(Value::String(f)),
-            },
-            None => None,
-        };
-
        Self {
            q: other.q,
            vector: other.vector.map(CS::into_inner),
@ -108,7 +98,6 @@ impl From<SearchQueryGet> for SearchQuery {
            crop_length: other.crop_length.0,
            attributes_to_highlight: other.attributes_to_highlight.map(|o| o.into_iter().collect()),
            filter,
-            boosting_filter,
            sort: other.sort.map(|attr| fix_sort_query_parameters(&attr)),
            show_matches_position: other.show_matches_position.0,
            show_ranking_score: other.show_ranking_score.0,
--- a/meilisearch/src/routes/tasks.rs
+++ b/meilisearch/src/routes/tasks.rs
@ -8,11 +8,9 @@ use meilisearch_types::deserr::DeserrQueryParamError;
 use meilisearch_types::error::deserr_codes::*;
 use meilisearch_types::error::{InvalidTaskDateError, ResponseError};
 use meilisearch_types::index_uid::IndexUid;
-use meilisearch_types::settings::{Settings, Unchecked};
 use meilisearch_types::star_or::{OptionStarOr, OptionStarOrList};
-use meilisearch_types::tasks::{
-    serialize_duration, Details, IndexSwap, Kind, KindWithContent, Status, Task,
-};
+use meilisearch_types::task_view::TaskView;
+use meilisearch_types::tasks::{Kind, KindWithContent, Status};
 use serde::Serialize;
 use serde_json::json;
 use time::format_description::well_known::Rfc3339;
@ -37,140 +35,6 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
    .service(web::resource("/cancel").route(web::post().to(SeqHandler(cancel_tasks))))
    .service(web::resource("/{task_id}").route(web::get().to(SeqHandler(get_task))));
 }
-
-#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
-#[serde(rename_all = "camelCase")]
-pub struct TaskView {
-    pub uid: TaskId,
-    #[serde(default)]
-    pub index_uid: Option<String>,
-    pub status: Status,
-    #[serde(rename = "type")]
-    pub kind: Kind,
-    pub canceled_by: Option<TaskId>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub details: Option<DetailsView>,
-    pub error: Option<ResponseError>,
-    #[serde(serialize_with = "serialize_duration", default)]
-    pub duration: Option<Duration>,
-    #[serde(with = "time::serde::rfc3339")]
-    pub enqueued_at: OffsetDateTime,
-    #[serde(with = "time::serde::rfc3339::option", default)]
-    pub started_at: Option<OffsetDateTime>,
-    #[serde(with = "time::serde::rfc3339::option", default)]
-    pub finished_at: Option<OffsetDateTime>,
-}
-
-impl TaskView {
-    pub fn from_task(task: &Task) -> TaskView {
-        TaskView {
-            uid: task.uid,
-            index_uid: task.index_uid().map(ToOwned::to_owned),
-            status: task.status,
-            kind: task.kind.as_kind(),
-            canceled_by: task.canceled_by,
-            details: task.details.clone().map(DetailsView::from),
-            error: task.error.clone(),
-            duration: task.started_at.zip(task.finished_at).map(|(start, end)| end - start),
-            enqueued_at: task.enqueued_at,
-            started_at: task.started_at,
-            finished_at: task.finished_at,
-        }
-    }
-}
-
-#[derive(Default, Debug, PartialEq, Eq, Clone, Serialize)]
-#[serde(rename_all = "camelCase")]
-pub struct DetailsView {
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub received_documents: Option<u64>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub indexed_documents: Option<Option<u64>>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub primary_key: Option<Option<String>>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub provided_ids: Option<usize>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub deleted_documents: Option<Option<u64>>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub matched_tasks: Option<u64>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub canceled_tasks: Option<Option<u64>>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub deleted_tasks: Option<Option<u64>>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub original_filter: Option<Option<String>>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub dump_uid: Option<Option<String>>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    #[serde(flatten)]
-    pub settings: Option<Box<Settings<Unchecked>>>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub swaps: Option<Vec<IndexSwap>>,
-}
-
-impl From<Details> for DetailsView {
-    fn from(details: Details) -> Self {
-        match details {
-            Details::DocumentAdditionOrUpdate { received_documents, indexed_documents } => {
-                DetailsView {
-                    received_documents: Some(received_documents),
-                    indexed_documents: Some(indexed_documents),
-                    ..DetailsView::default()
-                }
-            }
-            Details::SettingsUpdate { settings } => {
-                DetailsView { settings: Some(settings), ..DetailsView::default() }
-            }
-            Details::IndexInfo { primary_key } => {
-                DetailsView { primary_key: Some(primary_key), ..DetailsView::default() }
-            }
-            Details::DocumentDeletion {
-                provided_ids: received_document_ids,
-                deleted_documents,
-            } => DetailsView {
-                provided_ids: Some(received_document_ids),
-                deleted_documents: Some(deleted_documents),
-                original_filter: Some(None),
-                ..DetailsView::default()
-            },
-            Details::DocumentDeletionByFilter { original_filter, deleted_documents } => {
-                DetailsView {
-                    provided_ids: Some(0),
-                    original_filter: Some(Some(original_filter)),
-                    deleted_documents: Some(deleted_documents),
-                    ..DetailsView::default()
-                }
-            }
-            Details::ClearAll { deleted_documents } => {
-                DetailsView { deleted_documents: Some(deleted_documents), ..DetailsView::default() }
-            }
-            Details::TaskCancelation { matched_tasks, canceled_tasks, original_filter } => {
-                DetailsView {
-                    matched_tasks: Some(matched_tasks),
-                    canceled_tasks: Some(canceled_tasks),
-                    original_filter: Some(Some(original_filter)),
-                    ..DetailsView::default()
-                }
-            }
-            Details::TaskDeletion { matched_tasks, deleted_tasks, original_filter } => {
-                DetailsView {
-                    matched_tasks: Some(matched_tasks),
-                    deleted_tasks: Some(deleted_tasks),
-                    original_filter: Some(Some(original_filter)),
-                    ..DetailsView::default()
-                }
-            }
-            Details::Dump { dump_uid } => {
-                DetailsView { dump_uid: Some(dump_uid), ..DetailsView::default() }
-            }
-            Details::IndexSwap { swaps } => {
-                DetailsView { swaps: Some(swaps), ..Default::default() }
-            }
-        }
-    }
-}
-
 #[derive(Debug, Deserr)]
 #[deserr(error = DeserrQueryParamError, rename_all = camelCase, deny_unknown_fields)]
 pub struct TasksFilterQuery {
--- a/meilisearch/src/search.rs
+++ b/meilisearch/src/search.rs
@ -71,8 +71,6 @@ pub struct SearchQuery {
    pub show_ranking_score_details: bool,
    #[deserr(default, error = DeserrJsonError<InvalidSearchFilter>)]
    pub filter: Option<Value>,
-    #[deserr(default, error = DeserrJsonError<InvalidSearchBoostingFilter>)]
-    pub boosting_filter: Option<Value>,
    #[deserr(default, error = DeserrJsonError<InvalidSearchSort>)]
    pub sort: Option<Vec<String>>,
    #[deserr(default, error = DeserrJsonError<InvalidSearchFacets>)]
@ -132,8 +130,6 @@ pub struct SearchQueryWithIndex {
    pub show_matches_position: bool,
    #[deserr(default, error = DeserrJsonError<InvalidSearchFilter>)]
    pub filter: Option<Value>,
-    #[deserr(default, error = DeserrJsonError<InvalidSearchBoostingFilter>)]
-    pub boosting_filter: Option<Value>,
    #[deserr(default, error = DeserrJsonError<InvalidSearchSort>)]
    pub sort: Option<Vec<String>>,
    #[deserr(default, error = DeserrJsonError<InvalidSearchFacets>)]
@ -168,7 +164,6 @@ impl SearchQueryWithIndex {
            show_ranking_score_details,
            show_matches_position,
            filter,
-            boosting_filter,
            sort,
            facets,
            highlight_pre_tag,
@ -194,7 +189,6 @@ impl SearchQueryWithIndex {
                show_ranking_score_details,
                show_matches_position,
                filter,
-                boosting_filter,
                sort,
                facets,
                highlight_pre_tag,
@ -403,14 +397,8 @@ fn prepare_search<'t>(
    search.limit(limit);

    if let Some(ref filter) = query.filter {
-        if let Some(filter) = parse_filter(filter)? {
-            search.filter(filter);
-        }
-    }
-
-    if let Some(ref boosting_filter) = query.boosting_filter {
-        if let Some(boosting_filter) = parse_filter(boosting_filter)? {
-            search.boosting_filter(boosting_filter);
+        if let Some(facets) = parse_filter(filter)? {
+            search.filter(facets);
        }
    }

--- a/meilisearch/tests/common/mod.rs
+++ b/meilisearch/tests/common/mod.rs
@ -5,9 +5,11 @@ pub mod service;

 use std::fmt::{self, Display};

+#[allow(unused)]
 pub use index::{GetAllDocumentsOptions, GetDocumentOptions};
 use meili_snap::json_string;
 use serde::{Deserialize, Serialize};
+#[allow(unused)]
 pub use server::{default_settings, Server};

 #[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)]
--- a/meilisearch/tests/documents/delete_documents.rs
+++ b/meilisearch/tests/documents/delete_documents.rs
@ -397,7 +397,7 @@ async fn delete_document_by_complex_filter() {
      "canceledBy": null,
      "details": {
        "providedIds": 0,
-        "deletedDocuments": 4,
+        "deletedDocuments": 2,
        "originalFilter": "[[\"color = green\",\"color NOT EXISTS\"]]"
      },
      "error": null,
--- a/meilisearch/tests/search/distinct.rs
+++ b/meilisearch/tests/search/distinct.rs
@ -6,21 +6,109 @@ use crate::json;

 pub(self) static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
    json!([
-        {"productId": 1, "shopId": 1},
-        {"productId": 2, "shopId": 1},
-        {"productId": 3, "shopId": 2},
-        {"productId": 4, "shopId": 2},
-        {"productId": 5, "shopId": 3},
-        {"productId": 6, "shopId": 3},
-        {"productId": 7, "shopId": 4},
-        {"productId": 8, "shopId": 4},
-        {"productId": 9, "shopId": 5},
-        {"productId": 10, "shopId": 5}
+      {
+        "id": 1,
+        "description": "Leather Jacket",
+        "brand": "Lee Jeans",
+        "product_id": "123456",
+        "color": "Brown"
+      },
+      {
+        "id": 2,
+        "description": "Leather Jacket",
+        "brand": "Lee Jeans",
+        "product_id": "123456",
+        "color": "Black"
+      },
+      {
+        "id": 3,
+        "description": "Leather Jacket",
+        "brand": "Lee Jeans",
+        "product_id": "123456",
+        "color": "Blue"
+      },
+      {
+        "id": 4,
+        "description": "T-Shirt",
+        "brand": "Nike",
+        "product_id": "789012",
+        "color": "Red"
+      },
+      {
+        "id": 5,
+        "description": "T-Shirt",
+        "brand": "Nike",
+        "product_id": "789012",
+        "color": "Blue"
+      },
+      {
+        "id": 6,
+        "description": "Running Shoes",
+        "brand": "Adidas",
+        "product_id": "456789",
+        "color": "Black"
+      },
+      {
+        "id": 7,
+        "description": "Running Shoes",
+        "brand": "Adidas",
+        "product_id": "456789",
+        "color": "White"
+      },
+      {
+        "id": 8,
+        "description": "Hoodie",
+        "brand": "Puma",
+        "product_id": "987654",
+        "color": "Gray"
+      },
+      {
+        "id": 9,
+        "description": "Sweater",
+        "brand": "Gap",
+        "product_id": "234567",
+        "color": "Green"
+      },
+      {
+        "id": 10,
+        "description": "Sweater",
+        "brand": "Gap",
+        "product_id": "234567",
+        "color": "Red"
+      },
+      {
+        "id": 11,
+        "description": "Sweater",
+        "brand": "Gap",
+        "product_id": "234567",
+        "color": "Blue"
+      },
+      {
+        "id": 12,
+        "description": "Jeans",
+        "brand": "Levi's",
+        "product_id": "345678",
+        "color": "Indigo"
+      },
+      {
+        "id": 13,
+        "description": "Jeans",
+        "brand": "Levi's",
+        "product_id": "345678",
+        "color": "Black"
+      },
+      {
+        "id": 14,
+        "description": "Jeans",
+        "brand": "Levi's",
+        "product_id": "345678",
+        "color": "Stone Wash"
+      }
    ])
 });

-pub(self) static DOCUMENT_PRIMARY_KEY: &str = "productId";
-pub(self) static DOCUMENT_DISTINCT_KEY: &str = "shopId";
+pub(self) static DOCUMENT_PRIMARY_KEY: &str = "id";
+pub(self) static DOCUMENT_DISTINCT_KEY: &str = "product_id";

 /// testing: https://github.com/meilisearch/meilisearch/issues/4078
 #[actix_rt::test]
@ -33,31 +121,121 @@ async fn distinct_search_with_offset_no_ranking() {
    index.update_distinct_attribute(json!(DOCUMENT_DISTINCT_KEY)).await;
    index.wait_task(1).await;

-    fn get_hits(Value(response): Value) -> Vec<i64> {
+    fn get_hits(response: &Value) -> Vec<&str> {
        let hits_array = response["hits"].as_array().unwrap();
-        hits_array.iter().map(|h| h[DOCUMENT_DISTINCT_KEY].as_i64().unwrap()).collect::<Vec<_>>()
+        hits_array.iter().map(|h| h[DOCUMENT_DISTINCT_KEY].as_str().unwrap()).collect::<Vec<_>>()
    }

-    let (response, code) = index.search_post(json!({"limit": 2, "offset": 0})).await;
-    let hits = get_hits(response);
+    let (response, code) = index.search_post(json!({"offset": 0, "limit": 2})).await;
+    let hits = get_hits(&response);
    snapshot!(code, @"200 OK");
    snapshot!(hits.len(), @"2");
-    snapshot!(format!("{:?}", hits), @"[1, 2]");
+    snapshot!(format!("{:?}", hits), @r#"["123456", "789012"]"#);
+    snapshot!(response["estimatedTotalHits"] , @"11");

-    let (response, code) = index.search_post(json!({"limit": 2, "offset": 2})).await;
-    let hits = get_hits(response);
+    let (response, code) = index.search_post(json!({"offset": 2, "limit": 2})).await;
+    let hits = get_hits(&response);
    snapshot!(code, @"200 OK");
    snapshot!(hits.len(), @"2");
-    snapshot!(format!("{:?}", hits), @"[3, 4]");
+    snapshot!(format!("{:?}", hits), @r#"["456789", "987654"]"#);
+    snapshot!(response["estimatedTotalHits"], @"10");

-    let (response, code) = index.search_post(json!({"limit": 10, "offset": 4})).await;
-    let hits = get_hits(response);
+    let (response, code) = index.search_post(json!({"offset": 4, "limit": 2})).await;
+    let hits = get_hits(&response);
+    snapshot!(code, @"200 OK");
+    snapshot!(hits.len(), @"2");
+    snapshot!(format!("{:?}", hits), @r#"["234567", "345678"]"#);
+    snapshot!(response["estimatedTotalHits"], @"6");
+
+    let (response, code) = index.search_post(json!({"offset": 5, "limit": 2})).await;
+    let hits = get_hits(&response);
    snapshot!(code, @"200 OK");
    snapshot!(hits.len(), @"1");
-    snapshot!(format!("{:?}", hits), @"[5]");
+    snapshot!(format!("{:?}", hits), @r#"["345678"]"#);
+    snapshot!(response["estimatedTotalHits"], @"6");

-    let (response, code) = index.search_post(json!({"limit": 10, "offset": 5})).await;
-    let hits = get_hits(response);
+    let (response, code) = index.search_post(json!({"offset": 6, "limit": 2})).await;
+    let hits = get_hits(&response);
    snapshot!(code, @"200 OK");
    snapshot!(hits.len(), @"0");
+    snapshot!(format!("{:?}", hits), @r#"[]"#);
+    snapshot!(response["estimatedTotalHits"], @"6");
+
+    let (response, code) = index.search_post(json!({"offset": 7, "limit": 2})).await;
+    let hits = get_hits(&response);
+    snapshot!(code, @"200 OK");
+    snapshot!(hits.len(), @"0");
+    snapshot!(format!("{:?}", hits), @r#"[]"#);
+    snapshot!(response["estimatedTotalHits"], @"6");
+}
+
+/// testing: https://github.com/meilisearch/meilisearch/issues/4130
+#[actix_rt::test]
+async fn distinct_search_with_pagination_no_ranking() {
+    let server = Server::new().await;
+    let index = server.index("test");
+
+    let documents = DOCUMENTS.clone();
+    index.add_documents(documents, Some(DOCUMENT_PRIMARY_KEY)).await;
+    index.update_distinct_attribute(json!(DOCUMENT_DISTINCT_KEY)).await;
+    index.wait_task(1).await;
+
+    fn get_hits(response: &Value) -> Vec<&str> {
+        let hits_array = response["hits"].as_array().unwrap();
+        hits_array.iter().map(|h| h[DOCUMENT_DISTINCT_KEY].as_str().unwrap()).collect::<Vec<_>>()
+    }
+
+    let (response, code) = index.search_post(json!({"page": 0, "hitsPerPage": 2})).await;
+    let hits = get_hits(&response);
+    snapshot!(code, @"200 OK");
+    snapshot!(hits.len(), @"0");
+    snapshot!(format!("{:?}", hits), @r#"[]"#);
+    snapshot!(response["page"], @"0");
+    snapshot!(response["totalPages"], @"3");
+    snapshot!(response["totalHits"], @"6");
+
+    let (response, code) = index.search_post(json!({"page": 1, "hitsPerPage": 2})).await;
+    let hits = get_hits(&response);
+    snapshot!(code, @"200 OK");
+    snapshot!(hits.len(), @"2");
+    snapshot!(format!("{:?}", hits), @r#"["123456", "789012"]"#);
+    snapshot!(response["page"], @"1");
+    snapshot!(response["totalPages"], @"3");
+    snapshot!(response["totalHits"], @"6");
+
+    let (response, code) = index.search_post(json!({"page": 2, "hitsPerPage": 2})).await;
+    let hits = get_hits(&response);
+    snapshot!(code, @"200 OK");
+    snapshot!(hits.len(), @"2");
+    snapshot!(format!("{:?}", hits), @r#"["456789", "987654"]"#);
+    snapshot!(response["page"], @"2");
+    snapshot!(response["totalPages"], @"3");
+    snapshot!(response["totalHits"], @"6");
+
+    let (response, code) = index.search_post(json!({"page": 3, "hitsPerPage": 2})).await;
+    let hits = get_hits(&response);
+    snapshot!(code, @"200 OK");
+    snapshot!(hits.len(), @"2");
+    snapshot!(format!("{:?}", hits), @r#"["234567", "345678"]"#);
+    snapshot!(response["page"], @"3");
+    snapshot!(response["totalPages"], @"3");
+    snapshot!(response["totalHits"], @"6");
+
+    let (response, code) = index.search_post(json!({"page": 4, "hitsPerPage": 2})).await;
+    let hits = get_hits(&response);
+    snapshot!(code, @"200 OK");
+    snapshot!(hits.len(), @"0");
+    snapshot!(format!("{:?}", hits), @r#"[]"#);
+    snapshot!(response["page"], @"4");
+    snapshot!(response["totalPages"], @"3");
+    snapshot!(response["totalHits"], @"6");
+
+    let (response, code) = index.search_post(json!({"page": 2, "hitsPerPage": 3})).await;
+    let hits = get_hits(&response);
+    snapshot!(code, @"200 OK");
+    snapshot!(hits.len(), @"3");
+    snapshot!(format!("{:?}", hits), @r#"["987654", "234567", "345678"]"#);
+    snapshot!(response["page"], @"2");
+    snapshot!(response["totalPages"], @"2");
+    snapshot!(response["totalHits"], @"6");
 }
--- a/meilisearch/tests/tasks/mod.rs
+++ b/meilisearch/tests/tasks/mod.rs
@ -1,4 +1,5 @@
 mod errors;
+mod webhook;

 use meili_snap::insta::assert_json_snapshot;
 use time::format_description::well_known::Rfc3339;
--- a/meilisearch/tests/tasks/webhook.rs
+++ b/meilisearch/tests/tasks/webhook.rs
@ -0,0 +1,123 @@
+//! To test the webhook, we need to spawn a new server with a URL listening for
+//! post requests. The webhook handle starts a server and forwards all the
+//! received requests into a channel for you to handle.
+
+use std::sync::Arc;
+
+use actix_http::body::MessageBody;
+use actix_web::dev::{ServiceFactory, ServiceResponse};
+use actix_web::web::{Bytes, Data};
+use actix_web::{post, App, HttpResponse, HttpServer};
+use meili_snap::{json_string, snapshot};
+use meilisearch::Opt;
+use tokio::sync::mpsc;
+use url::Url;
+
+use crate::common::{default_settings, Server};
+use crate::json;
+
+#[post("/")]
+async fn forward_body(sender: Data<mpsc::UnboundedSender<Vec<u8>>>, body: Bytes) -> HttpResponse {
+    let body = body.to_vec();
+    sender.send(body).unwrap();
+    HttpResponse::Ok().into()
+}
+
+fn create_app(
+    sender: Arc<mpsc::UnboundedSender<Vec<u8>>>,
+) -> actix_web::App<
+    impl ServiceFactory<
+        actix_web::dev::ServiceRequest,
+        Config = (),
+        Response = ServiceResponse<impl MessageBody>,
+        Error = actix_web::Error,
+        InitError = (),
+    >,
+> {
+    App::new().service(forward_body).app_data(Data::from(sender))
+}
+
+struct WebhookHandle {
+    pub server_handle: tokio::task::JoinHandle<Result<(), std::io::Error>>,
+    pub url: String,
+    pub receiver: mpsc::UnboundedReceiver<Vec<u8>>,
+}
+
+async fn create_webhook_server() -> WebhookHandle {
+    let mut log_builder = env_logger::Builder::new();
+    log_builder.parse_filters("info");
+    log_builder.init();
+
+    let (sender, receiver) = mpsc::unbounded_channel();
+    let sender = Arc::new(sender);
+
+    // By listening on the port 0, the system will give us any available port.
+    let server =
+        HttpServer::new(move || create_app(sender.clone())).bind(("127.0.0.1", 0)).unwrap();
+    let (ip, scheme) = server.addrs_with_scheme()[0];
+    let url = format!("{scheme}://{ip}/");
+
+    let server_handle = tokio::spawn(server.run());
+    WebhookHandle { server_handle, url, receiver }
+}
+
+#[actix_web::test]
+async fn test_basic_webhook() {
+    let WebhookHandle { server_handle, url, mut receiver } = create_webhook_server().await;
+
+    let db_path = tempfile::tempdir().unwrap();
+    let server = Server::new_with_options(Opt {
+        task_webhook_url: Some(Url::parse(&url).unwrap()),
+        ..default_settings(db_path.path())
+    })
+    .await
+    .unwrap();
+
+    let index = server.index("tamo");
+    // May be flaky: we're relying on the fact that while the first document addition is processed, the other
+    // operations will be received and will be batched together. If it doesn't happen it's not a problem
+    // the rest of the test won't assume anything about the number of tasks per batch.
+    for i in 0..5 {
+        let (_, _status) = index.add_documents(json!({ "id": i, "doggo": "bone" }), None).await;
+    }
+
+    let mut nb_tasks = 0;
+    while let Some(payload) = receiver.recv().await {
+        let payload = String::from_utf8(payload).unwrap();
+        let jsonl = payload.split('\n');
+        for json in jsonl {
+            if json.is_empty() {
+                break; // we reached EOF
+            }
+            nb_tasks += 1;
+            let json: serde_json::Value = serde_json::from_str(json).unwrap();
+            snapshot!(
+                json_string!(json, { ".uid" => "[uid]", ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }),
+            @r###"
+            {
+              "uid": "[uid]",
+              "indexUid": "tamo",
+              "status": "succeeded",
+              "type": "documentAdditionOrUpdate",
+              "canceledBy": null,
+              "details": {
+                "receivedDocuments": 1,
+                "indexedDocuments": 1
+              },
+              "error": null,
+              "duration": "[duration]",
+              "enqueuedAt": "[date]",
+              "startedAt": "[date]",
+              "finishedAt": "[date]"
+            }
+            "###);
+        }
+        if nb_tasks == 5 {
+            break;
+        }
+    }
+
+    assert!(nb_tasks == 5, "We should have received the 5 tasks but only received {nb_tasks}");
+
+    server_handle.abort();
+}
--- a/meilitool/Cargo.toml
+++ b/meilitool/Cargo.toml
@ -0,0 +1,19 @@
+[package]
+name = "meilitool"
+description = "A CLI to edit a Meilisearch database from the command line"
+version.workspace = true
+authors.workspace = true
+homepage.workspace = true
+readme.workspace = true
+edition.workspace = true
+license.workspace = true
+
+[dependencies]
+anyhow = "1.0.75"
+clap = { version = "4.2.1", features = ["derive"] }
+dump = { path = "../dump" }
+file-store = { path = "../file-store" }
+meilisearch-auth = { path = "../meilisearch-auth" }
+meilisearch-types = { path = "../meilisearch-types" }
+time = { version = "0.3.30", features = ["formatting"] }
+uuid = { version = "1.5.0", features = ["v4"], default-features = false }
--- a/meilitool/src/main.rs
+++ b/meilitool/src/main.rs
@ -0,0 +1,312 @@
+use std::fs::{read_dir, read_to_string, remove_file, File};
+use std::io::BufWriter;
+use std::path::PathBuf;
+
+use anyhow::Context;
+use clap::{Parser, Subcommand};
+use dump::{DumpWriter, IndexMetadata};
+use file_store::FileStore;
+use meilisearch_auth::AuthController;
+use meilisearch_types::heed::types::{OwnedType, SerdeJson, Str};
+use meilisearch_types::heed::{Database, Env, EnvOpenOptions, PolyDatabase, RoTxn, RwTxn};
+use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader};
+use meilisearch_types::milli::{obkv_to_json, BEU32};
+use meilisearch_types::tasks::{Status, Task};
+use meilisearch_types::versioning::check_version_file;
+use meilisearch_types::Index;
+use time::macros::format_description;
+use time::OffsetDateTime;
+use uuid_codec::UuidCodec;
+
+mod uuid_codec;
+
+#[derive(Parser)]
+#[command(author, version, about, long_about = None)]
+struct Cli {
+    /// The database path where the Meilisearch is running.
+    #[arg(long, default_value = "data.ms/")]
+    db_path: PathBuf,
+
+    #[command(subcommand)]
+    command: Command,
+}
+
+#[derive(Subcommand)]
+enum Command {
+    /// Clears the task queue and make it empty.
+    ///
+    /// This command can be safely executed even if Meilisearch is running and processing tasks.
+    /// Once the task queue is empty you can restart Meilisearch and no more tasks must be visible,
+    /// even the ones that were processing. However, it's highly possible that you see the processing
+    /// tasks in the queue again with an associated internal error message.
+    ClearTaskQueue,
+
+    /// Exports a dump from the Meilisearch database.
+    ///
+    /// Make sure to run this command when Meilisearch is not running or running but not processing tasks.
+    /// If tasks are being processed while a dump is being exported there are chances for the dump to be
+    /// malformed with missing tasks.
+    ///
+    /// TODO Verify this claim or make sure it cannot happen and we can export dumps
+    ///      without caring about killing Meilisearch first!
+    ExportADump {
+        /// The directory in which the dump will be created.
+        #[arg(long, default_value = "dumps/")]
+        dump_dir: PathBuf,
+
+        /// Skip dumping the enqueued or processing tasks.
+        ///
+        /// Can be useful when there are a lot of them and it is not particularly useful
+        /// to keep them. Note that only the enqueued tasks takes up space so skipping
+        /// the processed ones is not particularly interesting.
+        #[arg(long)]
+        skip_enqueued_tasks: bool,
+    },
+}
+
+fn main() -> anyhow::Result<()> {
+    let Cli { db_path, command } = Cli::parse();
+
+    check_version_file(&db_path).context("While checking the version file")?;
+
+    match command {
+        Command::ClearTaskQueue => clear_task_queue(db_path),
+        Command::ExportADump { dump_dir, skip_enqueued_tasks } => {
+            export_a_dump(db_path, dump_dir, skip_enqueued_tasks)
+        }
+    }
+}
+
+/// Clears the task queue located at `db_path`.
+fn clear_task_queue(db_path: PathBuf) -> anyhow::Result<()> {
+    let path = db_path.join("tasks");
+    let env = EnvOpenOptions::new()
+        .max_dbs(100)
+        .open(&path)
+        .with_context(|| format!("While trying to open {:?}", path.display()))?;
+
+    eprintln!("Deleting tasks from the database...");
+
+    let mut wtxn = env.write_txn()?;
+    let all_tasks = try_opening_poly_database(&env, &wtxn, "all-tasks")?;
+    let total = all_tasks.len(&wtxn)?;
+    let status = try_opening_poly_database(&env, &wtxn, "status")?;
+    let kind = try_opening_poly_database(&env, &wtxn, "kind")?;
+    let index_tasks = try_opening_poly_database(&env, &wtxn, "index-tasks")?;
+    let canceled_by = try_opening_poly_database(&env, &wtxn, "canceled_by")?;
+    let enqueued_at = try_opening_poly_database(&env, &wtxn, "enqueued-at")?;
+    let started_at = try_opening_poly_database(&env, &wtxn, "started-at")?;
+    let finished_at = try_opening_poly_database(&env, &wtxn, "finished-at")?;
+
+    try_clearing_poly_database(&mut wtxn, all_tasks, "all-tasks")?;
+    try_clearing_poly_database(&mut wtxn, status, "status")?;
+    try_clearing_poly_database(&mut wtxn, kind, "kind")?;
+    try_clearing_poly_database(&mut wtxn, index_tasks, "index-tasks")?;
+    try_clearing_poly_database(&mut wtxn, canceled_by, "canceled_by")?;
+    try_clearing_poly_database(&mut wtxn, enqueued_at, "enqueued-at")?;
+    try_clearing_poly_database(&mut wtxn, started_at, "started-at")?;
+    try_clearing_poly_database(&mut wtxn, finished_at, "finished-at")?;
+
+    wtxn.commit().context("While committing the transaction")?;
+
+    eprintln!("Successfully deleted {total} tasks from the tasks database!");
+    eprintln!("Deleting the content files from disk...");
+
+    let mut count = 0usize;
+    let update_files = db_path.join("update_files");
+    let entries = read_dir(&update_files).with_context(|| {
+        format!("While trying to read the content of {:?}", update_files.display())
+    })?;
+    for result in entries {
+        match result {
+            Ok(ent) => match remove_file(ent.path()) {
+                Ok(_) => count += 1,
+                Err(e) => eprintln!("Error while deleting {:?}: {}", ent.path().display(), e),
+            },
+            Err(e) => {
+                eprintln!("Error while reading a file in {:?}: {}", update_files.display(), e)
+            }
+        }
+    }
+
+    eprintln!("Sucessfully deleted {count} content files from disk!");
+
+    Ok(())
+}
+
+fn try_opening_database<KC: 'static, DC: 'static>(
+    env: &Env,
+    rtxn: &RoTxn,
+    db_name: &str,
+) -> anyhow::Result<Database<KC, DC>> {
+    env.open_database(rtxn, Some(db_name))
+        .with_context(|| format!("While opening the {db_name:?} database"))?
+        .with_context(|| format!("Missing the {db_name:?} database"))
+}
+
+fn try_opening_poly_database(
+    env: &Env,
+    rtxn: &RoTxn,
+    db_name: &str,
+) -> anyhow::Result<PolyDatabase> {
+    env.open_poly_database(rtxn, Some(db_name))
+        .with_context(|| format!("While opening the {db_name:?} poly database"))?
+        .with_context(|| format!("Missing the {db_name:?} poly database"))
+}
+
+fn try_clearing_poly_database(
+    wtxn: &mut RwTxn,
+    database: PolyDatabase,
+    db_name: &str,
+) -> anyhow::Result<()> {
+    database.clear(wtxn).with_context(|| format!("While clearing the {db_name:?} database"))
+}
+
+/// Exports a dump into the dump directory.
+fn export_a_dump(
+    db_path: PathBuf,
+    dump_dir: PathBuf,
+    skip_enqueued_tasks: bool,
+) -> Result<(), anyhow::Error> {
+    let started_at = OffsetDateTime::now_utc();
+
+    // 1. Extracts the instance UID from disk
+    let instance_uid_path = db_path.join("instance-uid");
+    let instance_uid = match read_to_string(&instance_uid_path) {
+        Ok(content) => match content.trim().parse() {
+            Ok(uuid) => Some(uuid),
+            Err(e) => {
+                eprintln!("Impossible to parse instance-uid: {e}");
+                None
+            }
+        },
+        Err(e) => {
+            eprintln!("Impossible to read {}: {}", instance_uid_path.display(), e);
+            None
+        }
+    };
+
+    let dump = DumpWriter::new(instance_uid).context("While creating a new dump")?;
+    let file_store =
+        FileStore::new(db_path.join("update_files")).context("While opening the FileStore")?;
+
+    let index_scheduler_path = db_path.join("tasks");
+    let env = EnvOpenOptions::new()
+        .max_dbs(100)
+        .open(&index_scheduler_path)
+        .with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?;
+
+    eprintln!("Dumping the keys...");
+
+    // 2. dump the keys
+    let auth_store = AuthController::new(&db_path, &None)
+        .with_context(|| format!("While opening the auth store at {}", db_path.display()))?;
+    let mut dump_keys = dump.create_keys()?;
+    let mut count = 0;
+    for key in auth_store.list_keys()? {
+        dump_keys.push_key(&key)?;
+        count += 1;
+    }
+    dump_keys.flush()?;
+
+    eprintln!("Successfully dumped {count} keys!");
+
+    let rtxn = env.read_txn()?;
+    let all_tasks: Database<OwnedType<BEU32>, SerdeJson<Task>> =
+        try_opening_database(&env, &rtxn, "all-tasks")?;
+    let index_mapping: Database<Str, UuidCodec> =
+        try_opening_database(&env, &rtxn, "index-mapping")?;
+
+    if skip_enqueued_tasks {
+        eprintln!("Skip dumping the enqueued tasks...");
+    } else {
+        eprintln!("Dumping the enqueued tasks...");
+
+        // 3. dump the tasks
+        let mut dump_tasks = dump.create_tasks_queue()?;
+        let mut count = 0;
+        for ret in all_tasks.iter(&rtxn)? {
+            let (_, t) = ret?;
+            let status = t.status;
+            let content_file = t.content_uuid();
+            let mut dump_content_file = dump_tasks.push_task(&t.into())?;
+
+            // 3.1. Dump the `content_file` associated with the task if there is one and the task is not finished yet.
+            if let Some(content_file_uuid) = content_file {
+                if status == Status::Enqueued {
+                    let content_file = file_store.get_update(content_file_uuid)?;
+
+                    let reader =
+                        DocumentsBatchReader::from_reader(content_file).with_context(|| {
+                            format!("While reading content file {:?}", content_file_uuid)
+                        })?;
+
+                    let (mut cursor, documents_batch_index) = reader.into_cursor_and_fields_index();
+                    while let Some(doc) = cursor.next_document().with_context(|| {
+                        format!("While iterating on content file {:?}", content_file_uuid)
+                    })? {
+                        dump_content_file
+                            .push_document(&obkv_to_object(&doc, &documents_batch_index)?)?;
+                    }
+                    dump_content_file.flush()?;
+                    count += 1;
+                }
+            }
+        }
+        dump_tasks.flush()?;
+
+        eprintln!("Successfully dumped {count} enqueued tasks!");
+    }
+
+    eprintln!("Dumping the indexes...");
+
+    // 4. Dump the indexes
+    let mut count = 0;
+    for result in index_mapping.iter(&rtxn)? {
+        let (uid, uuid) = result?;
+        let index_path = db_path.join("indexes").join(uuid.to_string());
+        let index = Index::new(EnvOpenOptions::new(), &index_path).with_context(|| {
+            format!("While trying to open the index at path {:?}", index_path.display())
+        })?;
+
+        let rtxn = index.read_txn()?;
+        let metadata = IndexMetadata {
+            uid: uid.to_owned(),
+            primary_key: index.primary_key(&rtxn)?.map(String::from),
+            created_at: index.created_at(&rtxn)?,
+            updated_at: index.updated_at(&rtxn)?,
+        };
+        let mut index_dumper = dump.create_index(uid, &metadata)?;
+
+        let fields_ids_map = index.fields_ids_map(&rtxn)?;
+        let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
+
+        // 4.1. Dump the documents
+        for ret in index.all_documents(&rtxn)? {
+            let (_id, doc) = ret?;
+            let document = obkv_to_json(&all_fields, &fields_ids_map, doc)?;
+            index_dumper.push_document(&document)?;
+        }
+
+        // 4.2. Dump the settings
+        let settings = meilisearch_types::settings::settings(&index, &rtxn)?;
+        index_dumper.settings(&settings)?;
+        count += 1;
+    }
+
+    eprintln!("Successfully dumped {count} indexes!");
+    // We will not dump experimental feature settings
+    eprintln!("The tool is not dumping experimental features, please set them by hand afterward");
+
+    let dump_uid = started_at.format(format_description!(
+        "[year repr:full][month repr:numerical][day padding:zero]-[hour padding:zero][minute padding:zero][second padding:zero][subsecond digits:3]"
+    )).unwrap();
+
+    let path = dump_dir.join(format!("{}.dump", dump_uid));
+    let file = File::create(&path)?;
+    dump.persist_to(BufWriter::new(file))?;
+
+    eprintln!("Dump exported at path {:?}", path.display());
+
+    Ok(())
+}
--- a/meilitool/src/uuid_codec.rs
+++ b/meilitool/src/uuid_codec.rs
@ -0,0 +1,24 @@
+use std::borrow::Cow;
+use std::convert::TryInto;
+
+use meilisearch_types::heed::{BytesDecode, BytesEncode};
+use uuid::Uuid;
+
+/// A heed codec for value of struct Uuid.
+pub struct UuidCodec;
+
+impl<'a> BytesDecode<'a> for UuidCodec {
+    type DItem = Uuid;
+
+    fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
+        bytes.try_into().ok().map(Uuid::from_bytes)
+    }
+}
+
+impl BytesEncode<'_> for UuidCodec {
+    type EItem = Uuid;
+
+    fn bytes_encode(item: &Self::EItem) -> Option<Cow<[u8]>> {
+        Some(Cow::Borrowed(item.as_bytes()))
+    }
+}
--- a/milli/Cargo.toml
+++ b/milli/Cargo.toml
@ -17,7 +17,7 @@ bincode = "1.3.3"
 bstr = "1.4.0"
 bytemuck = { version = "1.13.1", features = ["extern_crate_alloc"] }
 byteorder = "1.4.3"
-charabia = { version = "0.8.3", default-features = false }
+charabia = { version = "0.8.5", default-features = false }
 concat-arrays = "0.1.2"
 crossbeam-channel = "0.5.8"
 deserr = { version = "0.6.0", features = ["actix-web"]}
@ -26,8 +26,8 @@ flatten-serde-json = { path = "../flatten-serde-json" }
 fst = "0.4.7"
 fxhash = "0.2.1"
 geoutils = "0.5.1"
-grenad = { version = "0.4.4", default-features = false, features = [
-    "tempfile",
+grenad = { version = "0.4.5", default-features = false, features = [
+    "rayon", "tempfile"
 ] }
 heed = { git = "https://github.com/meilisearch/heed", tag = "v0.12.7", default-features = false, features = [
    "lmdb", "read-txn-no-tls"
@ -79,10 +79,11 @@ big_s = "1.0.2"
 insta = "1.29.0"
 maplit = "1.0.2"
 md5 = "0.7.0"
+meili-snap = { path = "../meili-snap" }
 rand = { version = "0.8.5", features = ["small_rng"] }

 [features]
-all-tokenizations = ["charabia/chinese", "charabia/hebrew", "charabia/japanese", "charabia/thai", "charabia/korean", "charabia/greek"]
+all-tokenizations = ["charabia/chinese", "charabia/hebrew", "charabia/japanese", "charabia/thai", "charabia/korean", "charabia/greek", "charabia/khmer"]

 # Use POSIX semaphores instead of SysV semaphores in LMDB
 # For more information on this feature, see heed's Cargo.toml
@ -106,3 +107,6 @@ thai = ["charabia/thai"]

 # allow greek specialized tokenization
 greek = ["charabia/greek"]
+
+# allow khmer specialized tokenization
+khmer = ["charabia/khmer"]
--- a/milli/examples/search.rs
+++ b/milli/examples/search.rs
@ -58,7 +58,6 @@ fn main() -> Result<(), Box<dyn Error>> {
                false,
                &None,
                &None,
-                &None,
                GeoSortStrategy::default(),
                0,
                20,
--- a/milli/examples/settings.rs
+++ b/milli/examples/settings.rs
@ -3,7 +3,7 @@ use heed::EnvOpenOptions;
 // use maplit::hashset;
 use milli::{
    update::{IndexerConfig, Settings},
-    Index, RankingRule,
+    Criterion, Index,
 };

 fn main() {
@ -19,13 +19,13 @@ fn main() {
    // builder.set_min_word_len_one_typo(5);
    // builder.set_min_word_len_two_typos(7);
    // builder.set_sortable_fields(hashset! { S("release_date") });
-    builder.set_ranking_rules(vec![
-        RankingRule::Words,
-        RankingRule::Typo,
-        RankingRule::Proximity,
-        RankingRule::Attribute,
-        RankingRule::Sort,
-        RankingRule::Exactness,
+    builder.set_criteria(vec![
+        Criterion::Words,
+        Criterion::Typo,
+        Criterion::Proximity,
+        Criterion::Attribute,
+        Criterion::Sort,
+        Criterion::Exactness,
    ]);

    builder.execute(|_| (), || false).unwrap();
--- a/milli/src/asc_desc.rs
+++ b/milli/src/asc_desc.rs
@ -8,7 +8,7 @@ use thiserror::Error;

 use crate::error::is_reserved_keyword;
 use crate::search::facet::BadGeoError;
-use crate::{Error, RankingRuleError, UserError};
+use crate::{CriterionError, Error, UserError};

 /// This error type is never supposed to be shown to the end user.
 /// You must always cast it to a sort error or a criterion error.
@ -28,23 +28,23 @@ impl From<BadGeoError> for AscDescError {
    }
 }

-impl From<AscDescError> for RankingRuleError {
+impl From<AscDescError> for CriterionError {
    fn from(error: AscDescError) -> Self {
        match error {
            AscDescError::GeoError(_) => {
-                RankingRuleError::ReservedNameForSort { name: "_geoPoint".to_string() }
+                CriterionError::ReservedNameForSort { name: "_geoPoint".to_string() }
            }
-            AscDescError::InvalidSyntax { name } => RankingRuleError::InvalidName { name },
+            AscDescError::InvalidSyntax { name } => CriterionError::InvalidName { name },
            AscDescError::ReservedKeyword { name } if name.starts_with("_geoPoint") => {
-                RankingRuleError::ReservedNameForSort { name: "_geoPoint".to_string() }
+                CriterionError::ReservedNameForSort { name: "_geoPoint".to_string() }
            }
            AscDescError::ReservedKeyword { name } if name.starts_with("_geoRadius") => {
-                RankingRuleError::ReservedNameForFilter { name: "_geoRadius".to_string() }
+                CriterionError::ReservedNameForFilter { name: "_geoRadius".to_string() }
            }
            AscDescError::ReservedKeyword { name } if name.starts_with("_geoBoundingBox") => {
-                RankingRuleError::ReservedNameForFilter { name: "_geoBoundingBox".to_string() }
+                CriterionError::ReservedNameForFilter { name: "_geoBoundingBox".to_string() }
            }
-            AscDescError::ReservedKeyword { name } => RankingRuleError::ReservedName { name },
+            AscDescError::ReservedKeyword { name } => CriterionError::ReservedName { name },
        }
    }
 }
--- a/milli/src/ranking_rule.rs
+++ b/milli/src/ranking_rule.rs
@ -7,7 +7,7 @@ use thiserror::Error;
 use crate::{AscDesc, Member};

 #[derive(Error, Debug)]
-pub enum RankingRuleError {
+pub enum CriterionError {
    #[error("`{name}` ranking rule is invalid. Valid ranking rules are words, typo, sort, proximity, attribute, exactness and custom ranking rules.")]
    InvalidName { name: String },
    #[error("`{name}` is a reserved keyword and thus can't be used as a ranking rule")]
@ -25,9 +25,7 @@ pub enum RankingRuleError {
 }

 #[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
-pub enum RankingRule {
-    /// Sorted by documents matching the given filter and then documents not matching it.
-    FilterBoosting(String),
+pub enum Criterion {
    /// Sorted by decreasing number of matched query terms.
    /// Query words at the front of an attribute is considered better than if it was at the back.
    Words,
@ -49,68 +47,62 @@ pub enum RankingRule {
    Desc(String),
 }

-impl RankingRule {
+impl Criterion {
    /// Returns the field name parameter of this criterion.
    pub fn field_name(&self) -> Option<&str> {
        match self {
-            RankingRule::Asc(name) | RankingRule::Desc(name) => Some(name),
+            Criterion::Asc(name) | Criterion::Desc(name) => Some(name),
            _otherwise => None,
        }
    }
 }

-impl FromStr for RankingRule {
-    type Err = RankingRuleError;
+impl FromStr for Criterion {
+    type Err = CriterionError;

-    fn from_str(text: &str) -> Result<RankingRule, Self::Err> {
+    fn from_str(text: &str) -> Result<Criterion, Self::Err> {
        match text {
-            "words" => Ok(RankingRule::Words),
-            "typo" => Ok(RankingRule::Typo),
-            "proximity" => Ok(RankingRule::Proximity),
-            "attribute" => Ok(RankingRule::Attribute),
-            "sort" => Ok(RankingRule::Sort),
-            "exactness" => Ok(RankingRule::Exactness),
-            text => match AscDesc::from_str(text) {
-                Ok(asc_desc) => match asc_desc {
-                    AscDesc::Asc(Member::Field(field)) => Ok(RankingRule::Asc(field)),
-                    AscDesc::Desc(Member::Field(field)) => Ok(RankingRule::Desc(field)),
-                    AscDesc::Asc(Member::Geo(_)) | AscDesc::Desc(Member::Geo(_)) => {
-                        Err(RankingRuleError::ReservedNameForSort {
-                            name: "_geoPoint".to_string(),
-                        })?
-                    }
-                },
-                Err(err) => Err(err.into()),
+            "words" => Ok(Criterion::Words),
+            "typo" => Ok(Criterion::Typo),
+            "proximity" => Ok(Criterion::Proximity),
+            "attribute" => Ok(Criterion::Attribute),
+            "sort" => Ok(Criterion::Sort),
+            "exactness" => Ok(Criterion::Exactness),
+            text => match AscDesc::from_str(text)? {
+                AscDesc::Asc(Member::Field(field)) => Ok(Criterion::Asc(field)),
+                AscDesc::Desc(Member::Field(field)) => Ok(Criterion::Desc(field)),
+                AscDesc::Asc(Member::Geo(_)) | AscDesc::Desc(Member::Geo(_)) => {
+                    Err(CriterionError::ReservedNameForSort { name: "_geoPoint".to_string() })?
+                }
            },
        }
    }
 }

-pub fn default_criteria() -> Vec<RankingRule> {
+pub fn default_criteria() -> Vec<Criterion> {
    vec![
-        RankingRule::Words,
-        RankingRule::Typo,
-        RankingRule::Proximity,
-        RankingRule::Attribute,
-        RankingRule::Sort,
-        RankingRule::Exactness,
+        Criterion::Words,
+        Criterion::Typo,
+        Criterion::Proximity,
+        Criterion::Attribute,
+        Criterion::Sort,
+        Criterion::Exactness,
    ]
 }

-impl fmt::Display for RankingRule {
+impl fmt::Display for Criterion {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        use RankingRule::*;
+        use Criterion::*;

        match self {
            Words => f.write_str("words"),
-            FilterBoosting(_) => write!(f, "filterBoosting"),
            Typo => f.write_str("typo"),
            Proximity => f.write_str("proximity"),
            Attribute => f.write_str("attribute"),
            Sort => f.write_str("sort"),
            Exactness => f.write_str("exactness"),
-            Asc(attr) => write!(f, "{attr}:asc"),
-            Desc(attr) => write!(f, "{attr}:desc"),
+            Asc(attr) => write!(f, "{}:asc", attr),
+            Desc(attr) => write!(f, "{}:desc", attr),
        }
    }
 }
@ -118,29 +110,29 @@ impl fmt::Display for RankingRule {
 #[cfg(test)]
 mod tests {
    use big_s::S;
-    use RankingRuleError::*;
+    use CriterionError::*;

    use super::*;

    #[test]
    fn parse_criterion() {
        let valid_criteria = [
-            ("words", RankingRule::Words),
-            ("typo", RankingRule::Typo),
-            ("proximity", RankingRule::Proximity),
-            ("attribute", RankingRule::Attribute),
-            ("sort", RankingRule::Sort),
-            ("exactness", RankingRule::Exactness),
-            ("price:asc", RankingRule::Asc(S("price"))),
-            ("price:desc", RankingRule::Desc(S("price"))),
-            ("price:asc:desc", RankingRule::Desc(S("price:asc"))),
-            ("truc:machin:desc", RankingRule::Desc(S("truc:machin"))),
-            ("hello-world!:desc", RankingRule::Desc(S("hello-world!"))),
-            ("it's spacy over there:asc", RankingRule::Asc(S("it's spacy over there"))),
+            ("words", Criterion::Words),
+            ("typo", Criterion::Typo),
+            ("proximity", Criterion::Proximity),
+            ("attribute", Criterion::Attribute),
+            ("sort", Criterion::Sort),
+            ("exactness", Criterion::Exactness),
+            ("price:asc", Criterion::Asc(S("price"))),
+            ("price:desc", Criterion::Desc(S("price"))),
+            ("price:asc:desc", Criterion::Desc(S("price:asc"))),
+            ("truc:machin:desc", Criterion::Desc(S("truc:machin"))),
+            ("hello-world!:desc", Criterion::Desc(S("hello-world!"))),
+            ("it's spacy over there:asc", Criterion::Asc(S("it's spacy over there"))),
        ];

        for (input, expected) in valid_criteria {
-            let res = input.parse::<RankingRule>();
+            let res = input.parse::<Criterion>();
            assert!(
                res.is_ok(),
                "Failed to parse `{}`, was expecting `{:?}` but instead got `{:?}`",
@ -175,7 +167,7 @@ mod tests {
        ];

        for (input, expected) in invalid_criteria {
-            let res = input.parse::<RankingRule>();
+            let res = input.parse::<Criterion>();
            assert!(
                res.is_err(),
                "Should no be able to parse `{}`, was expecting an error but instead got: `{:?}`",
--- a/milli/src/documents/mod.rs
+++ b/milli/src/documents/mod.rs
@ -1,5 +1,6 @@
 mod builder;
 mod enriched;
+mod primary_key;
 mod reader;
 mod serde_impl;

@ -11,6 +12,7 @@ use bimap::BiHashMap;
 pub use builder::DocumentsBatchBuilder;
 pub use enriched::{EnrichedDocument, EnrichedDocumentsBatchCursor, EnrichedDocumentsBatchReader};
 use obkv::KvReader;
+pub use primary_key::{DocumentIdExtractionError, FieldIdMapper, PrimaryKey, DEFAULT_PRIMARY_KEY};
 pub use reader::{DocumentsBatchCursor, DocumentsBatchCursorError, DocumentsBatchReader};
 use serde::{Deserialize, Serialize};

@ -87,6 +89,12 @@ impl DocumentsBatchIndex {
    }
 }

+impl FieldIdMapper for DocumentsBatchIndex {
+    fn id(&self, name: &str) -> Option<FieldId> {
+        self.id(name)
+    }
+}
+
 #[derive(Debug, thiserror::Error)]
 pub enum Error {
    #[error("Error parsing number {value:?} at line {line}: {error}")]
--- a/milli/src/documents/primary_key.rs
+++ b/milli/src/documents/primary_key.rs
@ -0,0 +1,172 @@
+use std::iter;
+use std::result::Result as StdResult;
+
+use serde_json::Value;
+
+use crate::{FieldId, InternalError, Object, Result, UserError};
+
+/// The symbol used to define levels in a nested primary key.
+const PRIMARY_KEY_SPLIT_SYMBOL: char = '.';
+
+/// The default primary that is used when not specified.
+pub const DEFAULT_PRIMARY_KEY: &str = "id";
+
+/// Trait for objects that can map the name of a field to its [`FieldId`].
+pub trait FieldIdMapper {
+    /// Attempts to map the passed name to its [`FieldId`].
+    ///
+    /// `None` if the field with this name was not found.
+    fn id(&self, name: &str) -> Option<FieldId>;
+}
+
+/// A type that represent the type of primary key that has been set
+/// for this index, a classic flat one or a nested one.
+#[derive(Debug, Clone, Copy)]
+pub enum PrimaryKey<'a> {
+    Flat { name: &'a str, field_id: FieldId },
+    Nested { name: &'a str },
+}
+
+pub enum DocumentIdExtractionError {
+    InvalidDocumentId(UserError),
+    MissingDocumentId,
+    TooManyDocumentIds(usize),
+}
+
+impl<'a> PrimaryKey<'a> {
+    pub fn new(path: &'a str, fields: &impl FieldIdMapper) -> Option<Self> {
+        Some(if path.contains(PRIMARY_KEY_SPLIT_SYMBOL) {
+            Self::Nested { name: path }
+        } else {
+            let field_id = fields.id(path)?;
+            Self::Flat { name: path, field_id }
+        })
+    }
+
+    pub fn name(&self) -> &str {
+        match self {
+            PrimaryKey::Flat { name, .. } => name,
+            PrimaryKey::Nested { name } => name,
+        }
+    }
+
+    pub fn document_id(
+        &self,
+        document: &obkv::KvReader<FieldId>,
+        fields: &impl FieldIdMapper,
+    ) -> Result<StdResult<String, DocumentIdExtractionError>> {
+        match self {
+            PrimaryKey::Flat { name: _, field_id } => match document.get(*field_id) {
+                Some(document_id_bytes) => {
+                    let document_id = serde_json::from_slice(document_id_bytes)
+                        .map_err(InternalError::SerdeJson)?;
+                    match validate_document_id_value(document_id)? {
+                        Ok(document_id) => Ok(Ok(document_id)),
+                        Err(user_error) => {
+                            Ok(Err(DocumentIdExtractionError::InvalidDocumentId(user_error)))
+                        }
+                    }
+                }
+                None => Ok(Err(DocumentIdExtractionError::MissingDocumentId)),
+            },
+            nested @ PrimaryKey::Nested { .. } => {
+                let mut matching_documents_ids = Vec::new();
+                for (first_level_name, right) in nested.possible_level_names() {
+                    if let Some(field_id) = fields.id(first_level_name) {
+                        if let Some(value_bytes) = document.get(field_id) {
+                            let object = serde_json::from_slice(value_bytes)
+                                .map_err(InternalError::SerdeJson)?;
+                            fetch_matching_values(object, right, &mut matching_documents_ids);
+
+                            if matching_documents_ids.len() >= 2 {
+                                return Ok(Err(DocumentIdExtractionError::TooManyDocumentIds(
+                                    matching_documents_ids.len(),
+                                )));
+                            }
+                        }
+                    }
+                }
+
+                match matching_documents_ids.pop() {
+                    Some(document_id) => match validate_document_id_value(document_id)? {
+                        Ok(document_id) => Ok(Ok(document_id)),
+                        Err(user_error) => {
+                            Ok(Err(DocumentIdExtractionError::InvalidDocumentId(user_error)))
+                        }
+                    },
+                    None => Ok(Err(DocumentIdExtractionError::MissingDocumentId)),
+                }
+            }
+        }
+    }
+
+    /// Returns an `Iterator` that gives all the possible fields names the primary key
+    /// can have depending of the first level name and depth of the objects.
+    pub fn possible_level_names(&self) -> impl Iterator<Item = (&str, &str)> + '_ {
+        let name = self.name();
+        name.match_indices(PRIMARY_KEY_SPLIT_SYMBOL)
+            .map(move |(i, _)| (&name[..i], &name[i + PRIMARY_KEY_SPLIT_SYMBOL.len_utf8()..]))
+            .chain(iter::once((name, "")))
+    }
+}
+
+fn fetch_matching_values(value: Value, selector: &str, output: &mut Vec<Value>) {
+    match value {
+        Value::Object(object) => fetch_matching_values_in_object(object, selector, "", output),
+        otherwise => output.push(otherwise),
+    }
+}
+
+fn fetch_matching_values_in_object(
+    object: Object,
+    selector: &str,
+    base_key: &str,
+    output: &mut Vec<Value>,
+) {
+    for (key, value) in object {
+        let base_key = if base_key.is_empty() {
+            key.to_string()
+        } else {
+            format!("{}{}{}", base_key, PRIMARY_KEY_SPLIT_SYMBOL, key)
+        };
+
+        if starts_with(selector, &base_key) {
+            match value {
+                Value::Object(object) => {
+                    fetch_matching_values_in_object(object, selector, &base_key, output)
+                }
+                value => output.push(value),
+            }
+        }
+    }
+}
+
+fn starts_with(selector: &str, key: &str) -> bool {
+    selector.strip_prefix(key).map_or(false, |tail| {
+        tail.chars().next().map(|c| c == PRIMARY_KEY_SPLIT_SYMBOL).unwrap_or(true)
+    })
+}
+
+// FIXME: move to a DocumentId struct
+
+fn validate_document_id(document_id: &str) -> Option<&str> {
+    if !document_id.is_empty()
+        && document_id.chars().all(|c| matches!(c, 'a'..='z' | 'A'..='Z' | '0'..='9' | '-' | '_'))
+    {
+        Some(document_id)
+    } else {
+        None
+    }
+}
+
+pub fn validate_document_id_value(document_id: Value) -> Result<StdResult<String, UserError>> {
+    match document_id {
+        Value::String(string) => match validate_document_id(&string) {
+            Some(s) if s.len() == string.len() => Ok(Ok(string)),
+            Some(s) => Ok(Ok(s.to_string())),
+            None => Ok(Err(UserError::InvalidDocumentId { document_id: Value::String(string) })),
+        },
+        Value::Number(number) if number.is_i64() => Ok(Ok(number.to_string())),
+        content => Ok(Err(UserError::InvalidDocumentId { document_id: content })),
+    }
+}
--- a/milli/src/error.rs
+++ b/milli/src/error.rs
@ -9,7 +9,7 @@ use serde_json::Value;
 use thiserror::Error;

 use crate::documents::{self, DocumentsBatchCursorError};
-use crate::{DocumentId, FieldId, Object, RankingRuleError, SortError};
+use crate::{CriterionError, DocumentId, FieldId, Object, SortError};

 pub fn is_reserved_keyword(keyword: &str) -> bool {
    ["_geo", "_geoDistance", "_geoPoint", "_geoRadius", "_geoBoundingBox"].contains(&keyword)
@ -89,12 +89,10 @@ pub enum FieldIdMapMissingEntry {

 #[derive(Error, Debug)]
 pub enum UserError {
-    #[error("A soft deleted internal document id have been used: `{document_id}`.")]
-    AccessingSoftDeletedDocument { document_id: DocumentId },
    #[error("A document cannot contain more than 65,535 fields.")]
    AttributeLimitReached,
    #[error(transparent)]
-    CriterionError(#[from] RankingRuleError),
+    CriterionError(#[from] CriterionError),
    #[error("Maximum number of documents reached.")]
    DocumentLimitReached,
    #[error(
@ -116,8 +114,6 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco
    InvalidVectorsType { document_id: Value, value: Value },
    #[error("{0}")]
    InvalidFilter(String),
-    #[error("{0}")]
-    InvalidBoostingFilter(String),
    #[error("Invalid type for filter subexpression: expected: {}, found: {1}.", .0.join(", "))]
    InvalidFilterExpression(&'static [&'static str], Value),
    #[error("Attribute `{}` is not sortable. {}",
@ -282,7 +278,7 @@ error_from_sub_error! {
    ThreadPoolBuildError => InternalError,
    SerializationError => InternalError,
    GeoError => UserError,
-    RankingRuleError => UserError,
+    CriterionError => UserError,
 }

 impl<E> From<grenad::Error<E>> for Error
--- a/milli/src/external_documents_ids.rs
+++ b/milli/src/external_documents_ids.rs
@ -1,159 +1,75 @@
-use std::borrow::Cow;
 use std::collections::HashMap;
-use std::convert::TryInto;
-use std::{fmt, str};

-use fst::map::IndexedValue;
-use fst::{IntoStreamer, Streamer};
-use roaring::RoaringBitmap;
+use heed::types::{OwnedType, Str};
+use heed::{Database, RoIter, RoTxn, RwTxn};

-const DELETED_ID: u64 = u64::MAX;
+use crate::{DocumentId, BEU32};

-pub struct ExternalDocumentsIds<'a> {
-    pub(crate) hard: fst::Map<Cow<'a, [u8]>>,
-    pub(crate) soft: fst::Map<Cow<'a, [u8]>>,
-    soft_deleted_docids: RoaringBitmap,
+pub enum DocumentOperationKind {
+    Create,
+    Delete,
 }

-impl<'a> ExternalDocumentsIds<'a> {
-    pub fn new(
-        hard: fst::Map<Cow<'a, [u8]>>,
-        soft: fst::Map<Cow<'a, [u8]>>,
-        soft_deleted_docids: RoaringBitmap,
-    ) -> ExternalDocumentsIds<'a> {
-        ExternalDocumentsIds { hard, soft, soft_deleted_docids }
-    }
+pub struct DocumentOperation {
+    pub external_id: String,
+    pub internal_id: DocumentId,
+    pub kind: DocumentOperationKind,
+}

-    pub fn into_static(self) -> ExternalDocumentsIds<'static> {
-        ExternalDocumentsIds {
-            hard: self.hard.map_data(|c| Cow::Owned(c.into_owned())).unwrap(),
-            soft: self.soft.map_data(|c| Cow::Owned(c.into_owned())).unwrap(),
-            soft_deleted_docids: self.soft_deleted_docids,
-        }
+pub struct ExternalDocumentsIds(Database<Str, OwnedType<BEU32>>);
+
+impl ExternalDocumentsIds {
+    pub fn new(db: Database<Str, OwnedType<BEU32>>) -> ExternalDocumentsIds {
+        ExternalDocumentsIds(db)
    }

    /// Returns `true` if hard and soft external documents lists are empty.
-    pub fn is_empty(&self) -> bool {
-        self.hard.is_empty() && self.soft.is_empty()
+    pub fn is_empty(&self, rtxn: &RoTxn) -> heed::Result<bool> {
+        self.0.is_empty(rtxn).map_err(Into::into)
    }

-    pub fn get<A: AsRef<[u8]>>(&self, external_id: A) -> Option<u32> {
-        let external_id = external_id.as_ref();
-        match self.soft.get(external_id).or_else(|| self.hard.get(external_id)) {
-            Some(id) if id != DELETED_ID && !self.soft_deleted_docids.contains(id as u32) => {
-                Some(id.try_into().unwrap())
-            }
-            _otherwise => None,
-        }
-    }
-
-    /// Rebuild the internal FSTs in the ExternalDocumentsIds structure such that they
-    /// don't contain any soft deleted document id.
-    pub fn delete_soft_deleted_documents_ids_from_fsts(&mut self) -> fst::Result<()> {
-        let mut new_hard_builder = fst::MapBuilder::memory();
-
-        let union_op = self.hard.op().add(&self.soft).r#union();
-        let mut iter = union_op.into_stream();
-        while let Some((external_id, docids)) = iter.next() {
-            // prefer selecting the ids from soft, always
-            let id = indexed_last_value(docids).unwrap();
-            if id != DELETED_ID && !self.soft_deleted_docids.contains(id as u32) {
-                new_hard_builder.insert(external_id, id)?;
-            }
-        }
-        drop(iter);
-
-        // Delete soft map completely
-        self.soft = fst::Map::default().map_data(Cow::Owned)?;
-        // We save the new map as the new hard map.
-        self.hard = new_hard_builder.into_map().map_data(Cow::Owned)?;
-
-        Ok(())
-    }
-
-    pub fn insert_ids<A: AsRef<[u8]>>(&mut self, other: &fst::Map<A>) -> fst::Result<()> {
-        let union_op = self.soft.op().add(other).r#union();
-
-        let mut new_soft_builder = fst::MapBuilder::memory();
-        let mut iter = union_op.into_stream();
-        while let Some((external_id, marked_docids)) = iter.next() {
-            let id = indexed_last_value(marked_docids).unwrap();
-            new_soft_builder.insert(external_id, id)?;
-        }
-
-        drop(iter);
-
-        // We save the new map as the new soft map.
-        self.soft = new_soft_builder.into_map().map_data(Cow::Owned)?;
-        self.merge_soft_into_hard()
+    pub fn get<A: AsRef<str>>(&self, rtxn: &RoTxn, external_id: A) -> heed::Result<Option<u32>> {
+        Ok(self.0.get(rtxn, external_id.as_ref())?.map(|x| x.get()))
    }

    /// An helper function to debug this type, returns an `HashMap` of both,
    /// soft and hard fst maps, combined.
-    pub fn to_hash_map(&self) -> HashMap<String, u32> {
-        let mut map = HashMap::new();
-
-        let union_op = self.hard.op().add(&self.soft).r#union();
-        let mut iter = union_op.into_stream();
-        while let Some((external_id, marked_docids)) = iter.next() {
-            let id = indexed_last_value(marked_docids).unwrap();
-            if id != DELETED_ID {
-                let external_id = str::from_utf8(external_id).unwrap();
-                map.insert(external_id.to_owned(), id.try_into().unwrap());
-            }
+    pub fn to_hash_map(&self, rtxn: &RoTxn) -> heed::Result<HashMap<String, u32>> {
+        let mut map = HashMap::default();
+        for result in self.0.iter(rtxn)? {
+            let (external, internal) = result?;
+            map.insert(external.to_owned(), internal.get());
        }
-
-        map
+        Ok(map)
    }

-    /// Return an fst of the combined hard and soft deleted ID.
-    pub fn to_fst<'b>(&'b self) -> fst::Result<Cow<'b, fst::Map<Cow<'a, [u8]>>>> {
-        if self.soft.is_empty() {
-            return Ok(Cow::Borrowed(&self.hard));
-        }
-        let union_op = self.hard.op().add(&self.soft).r#union();
-
-        let mut iter = union_op.into_stream();
-        let mut new_hard_builder = fst::MapBuilder::memory();
-        while let Some((external_id, marked_docids)) = iter.next() {
-            let value = indexed_last_value(marked_docids).unwrap();
-            if value != DELETED_ID {
-                new_hard_builder.insert(external_id, value)?;
+    /// Applies the list of operations passed as argument, modifying the current external to internal id mapping.
+    ///
+    /// If the list contains multiple operations on the same external id, then the result is unspecified.
+    ///
+    /// # Panics
+    ///
+    /// - If attempting to delete a document that doesn't exist
+    /// - If attempting to create a document that already exists
+    pub fn apply(&self, wtxn: &mut RwTxn, operations: Vec<DocumentOperation>) -> heed::Result<()> {
+        for DocumentOperation { external_id, internal_id, kind } in operations {
+            match kind {
+                DocumentOperationKind::Create => {
+                    self.0.put(wtxn, &external_id, &BEU32::new(internal_id))?;
+                }
+                DocumentOperationKind::Delete => {
+                    if !self.0.delete(wtxn, &external_id)? {
+                        panic!("Attempting to delete a non-existing document")
+                    }
+                }
            }
        }

-        drop(iter);
-
-        Ok(Cow::Owned(new_hard_builder.into_map().map_data(Cow::Owned)?))
-    }
-
-    fn merge_soft_into_hard(&mut self) -> fst::Result<()> {
-        if self.soft.len() >= self.hard.len() / 2 {
-            self.hard = self.to_fst()?.into_owned();
-            self.soft = fst::Map::default().map_data(Cow::Owned)?;
-        }
-
        Ok(())
    }
-}

-impl fmt::Debug for ExternalDocumentsIds<'_> {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        f.debug_tuple("ExternalDocumentsIds").field(&self.to_hash_map()).finish()
+    /// Returns an iterator over all the external ids.
+    pub fn iter<'t>(&self, rtxn: &'t RoTxn) -> heed::Result<RoIter<'t, Str, OwnedType<BEU32>>> {
+        self.0.iter(rtxn)
    }
 }
-
-impl Default for ExternalDocumentsIds<'static> {
-    fn default() -> Self {
-        ExternalDocumentsIds {
-            hard: fst::Map::default().map_data(Cow::Owned).unwrap(),
-            soft: fst::Map::default().map_data(Cow::Owned).unwrap(),
-            soft_deleted_docids: RoaringBitmap::new(),
-        }
-    }
-}
-
-/// Returns the value of the `IndexedValue` with the highest _index_.
-fn indexed_last_value(indexed_values: &[IndexedValue]) -> Option<u64> {
-    indexed_values.iter().copied().max_by_key(|iv| iv.index).map(|iv| iv.value)
-}
--- a/milli/src/fields_ids_map.rs
+++ b/milli/src/fields_ids_map.rs
@ -81,6 +81,12 @@ impl Default for FieldsIdsMap {
    }
 }

+impl crate::documents::FieldIdMapper for FieldsIdsMap {
+    fn id(&self, name: &str) -> Option<FieldId> {
+        self.id(name)
+    }
+}
+
 #[cfg(test)]
 mod tests {
    use super::*;
--- a/milli/src/heed_codec/roaring_bitmap/cbo_roaring_bitmap_codec.rs
+++ b/milli/src/heed_codec/roaring_bitmap/cbo_roaring_bitmap_codec.rs
@ -6,6 +6,7 @@ use byteorder::{NativeEndian, ReadBytesExt, WriteBytesExt};
 use roaring::RoaringBitmap;

 use crate::heed_codec::BytesDecodeOwned;
+use crate::update::del_add::{DelAdd, KvReaderDelAdd};

 /// This is the limit where using a byteorder became less size efficient
 /// than using a direct roaring encoding, it is also the point where we are able
@ -60,12 +61,16 @@ impl CboRoaringBitmapCodec {
    /// if the merged values length is under the threshold, values are directly
    /// serialized in the buffer else a RoaringBitmap is created from the
    /// values and is serialized in the buffer.
-    pub fn merge_into(slices: &[Cow<[u8]>], buffer: &mut Vec<u8>) -> io::Result<()> {
+    pub fn merge_into<I, A>(slices: I, buffer: &mut Vec<u8>) -> io::Result<()>
+    where
+        I: IntoIterator<Item = A>,
+        A: AsRef<[u8]>,
+    {
        let mut roaring = RoaringBitmap::new();
        let mut vec = Vec::new();

        for bytes in slices {
-            if bytes.len() <= THRESHOLD * size_of::<u32>() {
+            if bytes.as_ref().len() <= THRESHOLD * size_of::<u32>() {
                let mut reader = bytes.as_ref();
                while let Ok(integer) = reader.read_u32::<NativeEndian>() {
                    vec.push(integer);
@ -85,7 +90,7 @@ impl CboRoaringBitmapCodec {
                }
            } else {
                // We can unwrap safely because the vector is sorted upper.
-                let roaring = RoaringBitmap::from_sorted_iter(vec.into_iter()).unwrap();
+                let roaring = RoaringBitmap::from_sorted_iter(vec).unwrap();
                roaring.serialize_into(buffer)?;
            }
        } else {
@ -95,6 +100,33 @@ impl CboRoaringBitmapCodec {

        Ok(())
    }
+
+    /// Merges a DelAdd delta into a CboRoaringBitmap.
+    pub fn merge_deladd_into<'a>(
+        deladd: KvReaderDelAdd<'_>,
+        previous: &[u8],
+        buffer: &'a mut Vec<u8>,
+    ) -> io::Result<Option<&'a [u8]>> {
+        // Deserialize the bitmap that is already there
+        let mut previous = Self::deserialize_from(previous)?;
+
+        // Remove integers we no more want in the previous bitmap
+        if let Some(value) = deladd.get(DelAdd::Deletion) {
+            previous -= Self::deserialize_from(value)?;
+        }
+
+        // Insert the new integers we want in the previous bitmap
+        if let Some(value) = deladd.get(DelAdd::Addition) {
+            previous |= Self::deserialize_from(value)?;
+        }
+
+        if previous.is_empty() {
+            return Ok(None);
+        }
+
+        Self::serialize_into(&previous, buffer);
+        Ok(Some(&buffer[..]))
+    }
 }

 impl heed::BytesDecode<'_> for CboRoaringBitmapCodec {
--- a/milli/src/index.rs
+++ b/milli/src/index.rs
--- a/milli/src/lib.rs
+++ b/milli/src/lib.rs
@ -9,6 +9,7 @@ pub static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
 pub mod documents;

 mod asc_desc;
+mod criterion;
 pub mod distance;
 mod error;
 mod external_documents_ids;
@ -17,7 +18,6 @@ mod fields_ids_map;
 pub mod heed_codec;
 pub mod index;
 pub mod proximity;
-mod ranking_rule;
 mod readable_slices;
 pub mod score_details;
 mod search;
@ -44,6 +44,7 @@ use serde_json::Value;
 pub use {charabia as tokenizer, heed};

 pub use self::asc_desc::{AscDesc, AscDescError, Member, SortError};
+pub use self::criterion::{default_criteria, Criterion, CriterionError};
 pub use self::error::{
    Error, FieldIdMapMissingEntry, InternalError, SerializationError, UserError,
 };
@ -56,7 +57,6 @@ pub use self::heed_codec::{
    UncheckedU8StrStrCodec,
 };
 pub use self::index::Index;
-pub use self::ranking_rule::{default_criteria, RankingRule, RankingRuleError};
 pub use self::search::{
    FacetDistribution, FacetValueHit, Filter, FormatOptions, MatchBounds, MatcherBuilder,
    MatchingWords, OrderBy, Search, SearchForFacetValues, SearchResult, TermsMatchingStrategy,
--- a/milli/src/score_details.rs
+++ b/milli/src/score_details.rs
@ -5,7 +5,6 @@ use crate::distance_between_two_points;
 #[derive(Debug, Clone, PartialEq)]
 pub enum ScoreDetails {
    Words(Words),
-    FilterBoosting(FilterBoosting),
    Typo(Typo),
    Proximity(Rank),
    Fid(Rank),
@ -24,7 +23,6 @@ impl ScoreDetails {
    pub fn rank(&self) -> Option<Rank> {
        match self {
            ScoreDetails::Words(details) => Some(details.rank()),
-            ScoreDetails::FilterBoosting(_) => None,
            ScoreDetails::Typo(details) => Some(details.rank()),
            ScoreDetails::Proximity(details) => Some(*details),
            ScoreDetails::Fid(details) => Some(*details),
@ -62,11 +60,6 @@ impl ScoreDetails {
                    details_map.insert("words".into(), words_details);
                    order += 1;
                }
-                ScoreDetails::FilterBoosting(FilterBoosting { matching }) => {
-                    let sort_details = serde_json::json!({ "matching": matching });
-                    details_map.insert("filterBoosting".into(), sort_details);
-                    order += 1;
-                }
                ScoreDetails::Typo(typo) => {
                    let typo_details = serde_json::json!({
                        "order": order,
@ -228,11 +221,6 @@ impl Words {
    }
 }

-#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
-pub struct FilterBoosting {
-    pub matching: bool,
-}
-
 /// Structure that is super similar to [`Words`], but whose semantics is a bit distinct.
 ///
 /// In exactness, the number of matching words can actually be 0 with a non-zero score,
--- a/milli/src/search/facet/facet_sort_ascending.rs
+++ b/milli/src/search/facet/facet_sort_ascending.rs
@ -13,7 +13,7 @@ use crate::heed_codec::ByteSliceRefCodec;
 /// The documents returned by the iterator are grouped by the facet values that
 /// determined their rank. For example, given the documents:
 ///
-/// ```ignore
+/// ```text
 /// 0: { "colour": ["blue", "green"] }
 /// 1: { "colour": ["blue", "red"] }
 /// 2: { "colour": ["orange", "red"] }
@ -22,7 +22,7 @@ use crate::heed_codec::ByteSliceRefCodec;
 /// ```
 /// Then calling the function on the candidates `[0, 2, 3, 4]` will return an iterator
 /// over the following elements:
-/// ```ignore
+/// ```text
 /// [0, 4]  // corresponds to all the documents within the candidates that have the facet value "blue"
 /// [3]     // same for "green"
 /// [2]     // same for "orange"
--- a/milli/src/search/facet/filter.rs
+++ b/milli/src/search/facet/filter.rs
@ -3,7 +3,7 @@ use std::fmt::{Debug, Display};
 use std::ops::Bound::{self, Excluded, Included};

 use either::Either;
-pub use filter_parser::{Condition, Error as FPError, FilterCondition, Span, Token};
+pub use filter_parser::{Condition, Error as FPError, FilterCondition, Token};
 use roaring::RoaringBitmap;
 use serde_json::Value;

@ -223,12 +223,9 @@ impl<'a> Filter<'a> {
 impl<'a> Filter<'a> {
    pub fn evaluate(&self, rtxn: &heed::RoTxn, index: &Index) -> Result<RoaringBitmap> {
        // to avoid doing this for each recursive call we're going to do it ONCE ahead of time
-        let soft_deleted_documents = index.soft_deleted_documents_ids(rtxn)?;
        let filterable_fields = index.filterable_fields(rtxn)?;

-        // and finally we delete all the soft_deleted_documents, again, only once at the very end
        self.inner_evaluate(rtxn, index, &filterable_fields)
-            .map(|result| result - soft_deleted_documents)
    }

    fn evaluate_operator(
--- a/milli/src/search/mod.rs
+++ b/milli/src/search/mod.rs
@ -11,7 +11,7 @@ use once_cell::sync::Lazy;
 use roaring::bitmap::RoaringBitmap;

 pub use self::facet::{FacetDistribution, Filter, OrderBy, DEFAULT_VALUES_PER_FACET};
-pub use self::new::matches::{FormatOptions, MatchBounds, Matcher, MatcherBuilder, MatchingWords};
+pub use self::new::matches::{FormatOptions, MatchBounds, MatcherBuilder, MatchingWords};
 use self::new::PartialSearchResult;
 use crate::error::UserError;
 use crate::heed_codec::facet::{FacetGroupKey, FacetGroupValue};
@ -38,7 +38,6 @@ pub struct Search<'a> {
    vector: Option<Vec<f32>>,
    // this should be linked to the String in the query
    filter: Option<Filter<'a>>,
-    boosting_filter: Option<Filter<'a>>,
    offset: usize,
    limit: usize,
    sort_criteria: Option<Vec<AscDesc>>,
@ -58,7 +57,6 @@ impl<'a> Search<'a> {
            query: None,
            vector: None,
            filter: None,
-            boosting_filter: None,
            offset: 0,
            limit: 20,
            sort_criteria: None,
@ -123,11 +121,6 @@ impl<'a> Search<'a> {
        self
    }

-    pub fn boosting_filter(&mut self, condition: Filter<'a>) -> &mut Search<'a> {
-        self.boosting_filter = Some(condition);
-        self
-    }
-
    #[cfg(test)]
    pub fn geo_sort_strategy(&mut self, strategy: new::GeoSortStrategy) -> &mut Search<'a> {
        self.geo_strategy = strategy;
@ -157,7 +150,6 @@ impl<'a> Search<'a> {
                self.scoring_strategy,
                self.exhaustive_number_hits,
                &self.filter,
-                &self.boosting_filter,
                &self.sort_criteria,
                self.geo_strategy,
                self.offset,
@ -183,7 +175,6 @@ impl fmt::Debug for Search<'_> {
            query,
            vector: _,
            filter,
-            boosting_filter,
            offset,
            limit,
            sort_criteria,
@ -200,7 +191,6 @@ impl fmt::Debug for Search<'_> {
            .field("query", query)
            .field("vector", &"[...]")
            .field("filter", filter)
-            .field("boosting_filter", boosting_filter)
            .field("offset", offset)
            .field("limit", limit)
            .field("sort_criteria", sort_criteria)
--- a/milli/src/search/new/bucket_sort.rs
+++ b/milli/src/search/new/bucket_sort.rs
@ -46,9 +46,8 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
        if let Some(distinct_fid) = distinct_fid {
            let mut excluded = RoaringBitmap::new();
            let mut results = vec![];
-            let mut skip = 0;
            for docid in universe.iter() {
-                if results.len() >= length {
+                if results.len() >= from + length {
                    break;
                }
                if excluded.contains(docid) {
@ -56,16 +55,19 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
                }

                distinct_single_docid(ctx.index, ctx.txn, distinct_fid, docid, &mut excluded)?;
-                skip += 1;
-                if skip <= from {
-                    continue;
-                }
-
                results.push(docid);
            }

            let mut all_candidates = universe - excluded;
            all_candidates.extend(results.iter().copied());
+            // drain the results of the skipped elements
+            // this **must** be done **after** writing the entire results in `all_candidates` to ensure
+            // e.g. estimatedTotalHits is correct.
+            if results.len() >= from {
+                results.drain(..from);
+            } else {
+                results.clear();
+            }

            return Ok(BucketSortOutput {
                scores: vec![Default::default(); results.len()],
--- a/milli/src/search/new/db_cache.rs
+++ b/milli/src/search/new/db_cache.rs
@ -12,7 +12,7 @@ use super::Word;
 use crate::heed_codec::{BytesDecodeOwned, StrBEU16Codec};
 use crate::update::{merge_cbo_roaring_bitmaps, MergeFn};
 use crate::{
-    CboRoaringBitmapCodec, CboRoaringBitmapLenCodec, Result, RoaringBitmapCodec, SearchContext,
+    CboRoaringBitmapCodec, CboRoaringBitmapLenCodec, Result, SearchContext, U8StrStrCodec,
 };

 /// A cache storing pointers to values in the LMDB databases.
@ -25,7 +25,7 @@ pub struct DatabaseCache<'ctx> {
    pub word_pair_proximity_docids:
        FxHashMap<(u8, Interned<String>, Interned<String>), Option<Cow<'ctx, [u8]>>>,
    pub word_prefix_pair_proximity_docids:
-        FxHashMap<(u8, Interned<String>, Interned<String>), Option<Cow<'ctx, [u8]>>>,
+        FxHashMap<(u8, Interned<String>, Interned<String>), Option<RoaringBitmap>>,
    pub prefix_word_pair_proximity_docids:
        FxHashMap<(u8, Interned<String>, Interned<String>), Option<Cow<'ctx, [u8]>>>,
    pub word_docids: FxHashMap<Interned<String>, Option<Cow<'ctx, [u8]>>>,
@ -168,7 +168,7 @@ impl<'ctx> SearchContext<'ctx> {
                    merge_cbo_roaring_bitmaps,
                )
            }
-            None => DatabaseCache::get_value::<_, _, RoaringBitmapCodec>(
+            None => DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
                self.txn,
                word,
                self.word_interner.get(word).as_str(),
@ -182,7 +182,7 @@ impl<'ctx> SearchContext<'ctx> {
        &mut self,
        word: Interned<String>,
    ) -> Result<Option<RoaringBitmap>> {
-        DatabaseCache::get_value::<_, _, RoaringBitmapCodec>(
+        DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
            self.txn,
            word,
            self.word_interner.get(word).as_str(),
@ -230,7 +230,7 @@ impl<'ctx> SearchContext<'ctx> {
                    merge_cbo_roaring_bitmaps,
                )
            }
-            None => DatabaseCache::get_value::<_, _, RoaringBitmapCodec>(
+            None => DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
                self.txn,
                prefix,
                self.word_interner.get(prefix).as_str(),
@ -244,7 +244,7 @@ impl<'ctx> SearchContext<'ctx> {
        &mut self,
        prefix: Interned<String>,
    ) -> Result<Option<RoaringBitmap>> {
-        DatabaseCache::get_value::<_, _, RoaringBitmapCodec>(
+        DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
            self.txn,
            prefix,
            self.word_interner.get(prefix).as_str(),
@ -297,35 +297,47 @@ impl<'ctx> SearchContext<'ctx> {
        prefix2: Interned<String>,
        proximity: u8,
    ) -> Result<Option<RoaringBitmap>> {
-        DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
-            self.txn,
-            (proximity, word1, prefix2),
-            &(
-                proximity,
-                self.word_interner.get(word1).as_str(),
-                self.word_interner.get(prefix2).as_str(),
-            ),
-            &mut self.db_cache.word_prefix_pair_proximity_docids,
-            self.index.word_prefix_pair_proximity_docids.remap_data_type::<ByteSlice>(),
-        )
+        let docids = match self
+            .db_cache
+            .word_prefix_pair_proximity_docids
+            .entry((proximity, word1, prefix2))
+        {
+            Entry::Occupied(docids) => docids.get().clone(),
+            Entry::Vacant(entry) => {
+                // compute docids using prefix iter and store the result in the cache.
+                let key = U8StrStrCodec::bytes_encode(&(
+                    proximity,
+                    self.word_interner.get(word1).as_str(),
+                    self.word_interner.get(prefix2).as_str(),
+                ))
+                .unwrap()
+                .into_owned();
+                let mut prefix_docids = RoaringBitmap::new();
+                let remap_key_type = self
+                    .index
+                    .word_pair_proximity_docids
+                    .remap_key_type::<ByteSlice>()
+                    .prefix_iter(self.txn, &key)?;
+                for result in remap_key_type {
+                    let (_, docids) = result?;
+
+                    prefix_docids |= docids;
+                }
+                entry.insert(Some(prefix_docids.clone()));
+                Some(prefix_docids)
+            }
+        };
+        Ok(docids)
    }
+
    pub fn get_db_prefix_word_pair_proximity_docids(
        &mut self,
        left_prefix: Interned<String>,
        right: Interned<String>,
        proximity: u8,
    ) -> Result<Option<RoaringBitmap>> {
-        DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
-            self.txn,
-            (proximity, left_prefix, right),
-            &(
-                proximity,
-                self.word_interner.get(left_prefix).as_str(),
-                self.word_interner.get(right).as_str(),
-            ),
-            &mut self.db_cache.prefix_word_pair_proximity_docids,
-            self.index.prefix_word_pair_proximity_docids.remap_data_type::<ByteSlice>(),
-        )
+        // only accept exact matches on reverted positions
+        self.get_db_word_pair_proximity_docids(left_prefix, right, proximity)
    }

    pub fn get_db_word_fid_docids(
--- a/milli/src/search/new/filter_boosting.rs
+++ b/milli/src/search/new/filter_boosting.rs
@ -1,79 +0,0 @@
-use roaring::RoaringBitmap;
-
-use super::logger::SearchLogger;
-use super::{RankingRule, RankingRuleOutput, RankingRuleQueryTrait, SearchContext};
-use crate::score_details::{self, ScoreDetails};
-use crate::{Filter, Result};
-
-pub struct FilterBoosting<'f, Query> {
-    filter: Filter<'f>,
-    original_query: Option<Query>,
-    matching: Option<RankingRuleOutput<Query>>,
-    non_matching: Option<RankingRuleOutput<Query>>,
-}
-
-impl<'f, Query> FilterBoosting<'f, Query> {
-    pub fn new(filter: Filter<'f>) -> Result<Self> {
-        Ok(Self { filter, original_query: None, matching: None, non_matching: None })
-    }
-}
-
-impl<'ctx, 'f, Query: RankingRuleQueryTrait> RankingRule<'ctx, Query>
-    for FilterBoosting<'f, Query>
-{
-    fn id(&self) -> String {
-        // TODO improve this
-        let Self { filter: original_expression, .. } = self;
-        format!("boost:{original_expression:?}")
-    }
-
-    fn start_iteration(
-        &mut self,
-        ctx: &mut SearchContext<'ctx>,
-        _logger: &mut dyn SearchLogger<Query>,
-        parent_candidates: &RoaringBitmap,
-        parent_query: &Query,
-    ) -> Result<()> {
-        let universe_matching = match self.filter.evaluate(ctx.txn, ctx.index) {
-            Ok(documents) => documents,
-            Err(e) => return Err(e), // TODO manage the invalid_search_boosting_filter
-        };
-        let matching = parent_candidates & universe_matching;
-        let non_matching = parent_candidates - &matching;
-
-        self.original_query = Some(parent_query.clone());
-
-        self.matching = Some(RankingRuleOutput {
-            query: parent_query.clone(),
-            candidates: matching,
-            score: ScoreDetails::FilterBoosting(score_details::FilterBoosting { matching: true }),
-        });
-
-        self.non_matching = Some(RankingRuleOutput {
-            query: parent_query.clone(),
-            candidates: non_matching,
-            score: ScoreDetails::FilterBoosting(score_details::FilterBoosting { matching: false }),
-        });
-
-        Ok(())
-    }
-
-    fn next_bucket(
-        &mut self,
-        _ctx: &mut SearchContext<'ctx>,
-        _logger: &mut dyn SearchLogger<Query>,
-        _universe: &RoaringBitmap,
-    ) -> Result<Option<RankingRuleOutput<Query>>> {
-        Ok(self.matching.take().or_else(|| self.non_matching.take()))
-    }
-
-    fn end_iteration(
-        &mut self,
-        _ctx: &mut SearchContext<'ctx>,
-        _logger: &mut dyn SearchLogger<Query>,
-    ) {
-        self.original_query = None;
-        self.matching = None;
-        self.non_matching = None;
-    }
-}
--- a/milli/src/search/new/matches/mod.rs
+++ b/milli/src/search/new/matches/mod.rs
@ -512,7 +512,6 @@ mod tests {
                false,
                &None,
                &None,
-                &None,
                crate::search::new::GeoSortStrategy::default(),
                0,
                100,
--- a/milli/src/search/new/mod.rs
+++ b/milli/src/search/new/mod.rs
@ -15,7 +15,6 @@ mod resolve_query_graph;
 mod small_bitmap;

 mod exact_attribute;
-mod filter_boosting;
 mod sort;

 #[cfg(test)]
@ -27,7 +26,6 @@ use bucket_sort::{bucket_sort, BucketSortOutput};
 use charabia::TokenizerBuilder;
 use db_cache::DatabaseCache;
 use exact_attribute::ExactAttribute;
-use filter_boosting::FilterBoosting;
 use graph_based_ranking_rule::{Exactness, Fid, Position, Proximity, Typo};
 use heed::RoTxn;
 use instant_distance::Search;
@ -192,30 +190,25 @@ fn resolve_universe(
 }

 /// Return the list of initialised ranking rules to be used for a placeholder search.
-fn get_ranking_rules_for_placeholder_search<'ctx, 'f: 'ctx>(
+fn get_ranking_rules_for_placeholder_search<'ctx>(
    ctx: &SearchContext<'ctx>,
    sort_criteria: &Option<Vec<AscDesc>>,
    geo_strategy: geo_sort::Strategy,
-    boosting_filter: &Option<Filter<'f>>,
 ) -> Result<Vec<BoxRankingRule<'ctx, PlaceholderQuery>>> {
    let mut sort = false;
    let mut sorted_fields = HashSet::new();
    let mut geo_sorted = false;
-    let mut ranking_rules: Vec<BoxRankingRule<_>> = match boosting_filter {
-        Some(filter) => vec![Box::new(FilterBoosting::new(filter.clone())?)],
-        None => Vec::new(),
-    };
+    let mut ranking_rules: Vec<BoxRankingRule<PlaceholderQuery>> = vec![];
    let settings_ranking_rules = ctx.index.criteria(ctx.txn)?;
    for rr in settings_ranking_rules {
        match rr {
            // These rules need a query to have an effect; ignore them in placeholder search
-            crate::RankingRule::FilterBoosting(_)
-            | crate::RankingRule::Words
-            | crate::RankingRule::Typo
-            | crate::RankingRule::Attribute
-            | crate::RankingRule::Proximity
-            | crate::RankingRule::Exactness => continue,
-            crate::RankingRule::Sort => {
+            crate::Criterion::Words
+            | crate::Criterion::Typo
+            | crate::Criterion::Attribute
+            | crate::Criterion::Proximity
+            | crate::Criterion::Exactness => continue,
+            crate::Criterion::Sort => {
                if sort {
                    continue;
                }
@ -229,14 +222,14 @@ fn get_ranking_rules_for_placeholder_search<'ctx, 'f: 'ctx>(
                )?;
                sort = true;
            }
-            crate::RankingRule::Asc(field_name) => {
+            crate::Criterion::Asc(field_name) => {
                if sorted_fields.contains(&field_name) {
                    continue;
                }
                sorted_fields.insert(field_name.clone());
                ranking_rules.push(Box::new(Sort::new(ctx.index, ctx.txn, field_name, true)?));
            }
-            crate::RankingRule::Desc(field_name) => {
+            crate::Criterion::Desc(field_name) => {
                if sorted_fields.contains(&field_name) {
                    continue;
                }
@ -249,12 +242,11 @@ fn get_ranking_rules_for_placeholder_search<'ctx, 'f: 'ctx>(
 }

 /// Return the list of initialised ranking rules to be used for a query graph search.
-fn get_ranking_rules_for_query_graph_search<'ctx, 'f: 'ctx>(
+fn get_ranking_rules_for_query_graph_search<'ctx>(
    ctx: &SearchContext<'ctx>,
    sort_criteria: &Option<Vec<AscDesc>>,
    geo_strategy: geo_sort::Strategy,
    terms_matching_strategy: TermsMatchingStrategy,
-    boosting_filter: &Option<Filter<'f>>,
 ) -> Result<Vec<BoxRankingRule<'ctx, QueryGraph>>> {
    // query graph search
    let mut words = false;
@ -271,18 +263,15 @@ fn get_ranking_rules_for_query_graph_search<'ctx, 'f: 'ctx>(
        words = true;
    }

-    let mut ranking_rules: Vec<BoxRankingRule<QueryGraph>> = match boosting_filter {
-        Some(filter) => vec![Box::new(FilterBoosting::new(filter.clone())?)],
-        None => Vec::new(),
-    };
+    let mut ranking_rules: Vec<BoxRankingRule<QueryGraph>> = vec![];
    let settings_ranking_rules = ctx.index.criteria(ctx.txn)?;
    for rr in settings_ranking_rules {
        // Add Words before any of: typo, proximity, attribute
        match rr {
-            crate::RankingRule::Typo
-            | crate::RankingRule::Attribute
-            | crate::RankingRule::Proximity
-            | crate::RankingRule::Exactness => {
+            crate::Criterion::Typo
+            | crate::Criterion::Attribute
+            | crate::Criterion::Proximity
+            | crate::Criterion::Exactness => {
                if !words {
                    ranking_rules.push(Box::new(Words::new(terms_matching_strategy)));
                    words = true;
@ -291,33 +280,28 @@ fn get_ranking_rules_for_query_graph_search<'ctx, 'f: 'ctx>(
            _ => {}
        }
        match rr {
-            crate::RankingRule::Words => {
+            crate::Criterion::Words => {
                if words {
                    continue;
                }
                ranking_rules.push(Box::new(Words::new(terms_matching_strategy)));
                words = true;
            }
-            crate::RankingRule::FilterBoosting(_) => {
-                // that is not possible to define the filterBoosting ranking rule by hand,
-                // or by using the seetings. It is always inserted by the engine itself.
-                continue;
-            }
-            crate::RankingRule::Typo => {
+            crate::Criterion::Typo => {
                if typo {
                    continue;
                }
                typo = true;
                ranking_rules.push(Box::new(Typo::new(None)));
            }
-            crate::RankingRule::Proximity => {
+            crate::Criterion::Proximity => {
                if proximity {
                    continue;
                }
                proximity = true;
                ranking_rules.push(Box::new(Proximity::new(None)));
            }
-            crate::RankingRule::Attribute => {
+            crate::Criterion::Attribute => {
                if attribute {
                    continue;
                }
@ -325,7 +309,7 @@ fn get_ranking_rules_for_query_graph_search<'ctx, 'f: 'ctx>(
                ranking_rules.push(Box::new(Fid::new(None)));
                ranking_rules.push(Box::new(Position::new(None)));
            }
-            crate::RankingRule::Sort => {
+            crate::Criterion::Sort => {
                if sort {
                    continue;
                }
@ -339,7 +323,7 @@ fn get_ranking_rules_for_query_graph_search<'ctx, 'f: 'ctx>(
                )?;
                sort = true;
            }
-            crate::RankingRule::Exactness => {
+            crate::Criterion::Exactness => {
                if exactness {
                    continue;
                }
@ -347,15 +331,14 @@ fn get_ranking_rules_for_query_graph_search<'ctx, 'f: 'ctx>(
                ranking_rules.push(Box::new(Exactness::new()));
                exactness = true;
            }
-            crate::RankingRule::Asc(field_name) => {
-                // TODO Question: Why would it be invalid to sort price:asc, typo, price:desc?
+            crate::Criterion::Asc(field_name) => {
                if sorted_fields.contains(&field_name) {
                    continue;
                }
                sorted_fields.insert(field_name.clone());
                ranking_rules.push(Box::new(Sort::new(ctx.index, ctx.txn, field_name, true)?));
            }
-            crate::RankingRule::Desc(field_name) => {
+            crate::Criterion::Desc(field_name) => {
                if sorted_fields.contains(&field_name) {
                    continue;
                }
@ -423,15 +406,14 @@ fn resolve_sort_criteria<'ctx, Query: RankingRuleQueryTrait>(
 }

 #[allow(clippy::too_many_arguments)]
-pub fn execute_search<'ctx, 'f: 'ctx>(
-    ctx: &mut SearchContext<'ctx>,
+pub fn execute_search(
+    ctx: &mut SearchContext,
    query: &Option<String>,
    vector: &Option<Vec<f32>>,
    terms_matching_strategy: TermsMatchingStrategy,
    scoring_strategy: ScoringStrategy,
    exhaustive_number_hits: bool,
-    filter: &Option<Filter>,
-    boosting_filter: &Option<Filter<'f>>,
+    filters: &Option<Filter>,
    sort_criteria: &Option<Vec<AscDesc>>,
    geo_strategy: geo_sort::Strategy,
    from: usize,
@ -440,8 +422,8 @@ pub fn execute_search<'ctx, 'f: 'ctx>(
    placeholder_search_logger: &mut dyn SearchLogger<PlaceholderQuery>,
    query_graph_logger: &mut dyn SearchLogger<QueryGraph>,
 ) -> Result<PartialSearchResult> {
-    let mut universe = if let Some(filter) = filter {
-        filter.evaluate(ctx.txn, ctx.index)?
+    let mut universe = if let Some(filters) = filters {
+        filters.evaluate(ctx.txn, ctx.index)?
    } else {
        ctx.index.documents_ids(ctx.txn)?
    };
@ -452,7 +434,18 @@ pub fn execute_search<'ctx, 'f: 'ctx>(
        let mut search = Search::default();
        let docids = match ctx.index.vector_hnsw(ctx.txn)? {
            Some(hnsw) => {
+                if let Some(expected_size) = hnsw.iter().map(|(_, point)| point.len()).next() {
+                    if vector.len() != expected_size {
+                        return Err(UserError::InvalidVectorDimensions {
+                            expected: expected_size,
+                            found: vector.len(),
+                        }
+                        .into());
+                    }
+                }
+
                let vector = NDotProductPoint::new(vector.clone());
+
                let neighbors = hnsw.search(&vector, &mut search);

                let mut docids = Vec::new();
@ -534,7 +527,6 @@ pub fn execute_search<'ctx, 'f: 'ctx>(
            sort_criteria,
            geo_strategy,
            terms_matching_strategy,
-            boosting_filter,
        )?;

        universe =
@ -551,13 +543,8 @@ pub fn execute_search<'ctx, 'f: 'ctx>(
            query_graph_logger,
        )?
    } else {
-        let ranking_rules = get_ranking_rules_for_placeholder_search(
-            ctx,
-            sort_criteria,
-            geo_strategy,
-            boosting_filter,
-        )?;
-
+        let ranking_rules =
+            get_ranking_rules_for_placeholder_search(ctx, sort_criteria, geo_strategy)?;
        bucket_sort(
            ctx,
            ranking_rules,
@ -604,8 +591,7 @@ fn check_sort_criteria(ctx: &SearchContext, sort_criteria: Option<&Vec<AscDesc>>

    // We check that the sort ranking rule exists and throw an
    // error if we try to use it and that it doesn't.
-    let sort_ranking_rule_missing =
-        !ctx.index.criteria(ctx.txn)?.contains(&crate::RankingRule::Sort);
+    let sort_ranking_rule_missing = !ctx.index.criteria(ctx.txn)?.contains(&crate::Criterion::Sort);
    if sort_ranking_rule_missing {
        return Err(UserError::SortRankingRuleMissing.into());
    }
--- a/milli/src/search/new/ranking_rule_graph/mod.rs
+++ b/milli/src/search/new/ranking_rule_graph/mod.rs
@ -29,7 +29,7 @@ use std::hash::Hash;
 pub use cheapest_paths::PathVisitor;
 pub use condition_docids_cache::ConditionDocIdsCache;
 pub use dead_ends_cache::DeadEndsCache;
-pub use exactness::{ExactnessCondition, ExactnessGraph};
+pub use exactness::ExactnessGraph;
 pub use fid::{FidCondition, FidGraph};
 pub use position::{PositionCondition, PositionGraph};
 pub use proximity::{ProximityCondition, ProximityGraph};
--- a/milli/src/search/new/tests/attribute_fid.rs
+++ b/milli/src/search/new/tests/attribute_fid.rs
@ -1,5 +1,5 @@
 use crate::index::tests::TempIndex;
-use crate::{RankingRule, Search, SearchResult, TermsMatchingStrategy};
+use crate::{Criterion, Search, SearchResult, TermsMatchingStrategy};

 fn create_index() -> TempIndex {
    let index = TempIndex::new();
@ -12,7 +12,7 @@ fn create_index() -> TempIndex {
                "description".to_owned(),
                "plot".to_owned(),
            ]);
-            s.set_ranking_rules(vec![RankingRule::Attribute]);
+            s.set_criteria(vec![Criterion::Attribute]);
        })
        .unwrap();

--- a/milli/src/search/new/tests/attribute_position.rs
+++ b/milli/src/search/new/tests/attribute_position.rs
@ -1,5 +1,5 @@
 use crate::index::tests::TempIndex;
-use crate::{db_snap, RankingRule, Search, SearchResult, TermsMatchingStrategy};
+use crate::{db_snap, Criterion, Search, SearchResult, TermsMatchingStrategy};

 fn create_index() -> TempIndex {
    let index = TempIndex::new();
@ -12,7 +12,7 @@ fn create_index() -> TempIndex {
                "text2".to_owned(),
                "other".to_owned(),
            ]);
-            s.set_ranking_rules(vec![RankingRule::Attribute]);
+            s.set_criteria(vec![Criterion::Attribute]);
        })
        .unwrap();

--- a/milli/src/search/new/tests/distinct.rs
+++ b/milli/src/search/new/tests/distinct.rs
@ -19,7 +19,7 @@ use maplit::hashset;

 use super::collect_field_values;
 use crate::index::tests::TempIndex;
-use crate::{AscDesc, Index, Member, RankingRule, Search, SearchResult, TermsMatchingStrategy};
+use crate::{AscDesc, Criterion, Index, Member, Search, SearchResult, TermsMatchingStrategy};

 fn create_index() -> TempIndex {
    let index = TempIndex::new();
@ -30,7 +30,7 @@ fn create_index() -> TempIndex {
            s.set_searchable_fields(vec!["text".to_owned()]);
            s.set_sortable_fields(hashset! { S("rank1"), S("letter") });
            s.set_distinct_field("letter".to_owned());
-            s.set_ranking_rules(vec![RankingRule::Words]);
+            s.set_criteria(vec![Criterion::Words]);
        })
        .unwrap();

@ -252,7 +252,7 @@ fn test_distinct_placeholder_sort() {
    let index = create_index();
    index
        .update_settings(|s| {
-            s.set_ranking_rules(vec![RankingRule::Sort]);
+            s.set_criteria(vec![Criterion::Sort]);
        })
        .unwrap();

@ -387,7 +387,7 @@ fn test_distinct_words() {
    let index = create_index();
    index
        .update_settings(|s| {
-            s.set_ranking_rules(vec![RankingRule::Words]);
+            s.set_criteria(vec![Criterion::Words]);
        })
        .unwrap();

@ -440,11 +440,7 @@ fn test_distinct_sort_words() {
    let index = create_index();
    index
        .update_settings(|s| {
-            s.set_ranking_rules(vec![
-                RankingRule::Sort,
-                RankingRule::Words,
-                RankingRule::Desc(S("rank1")),
-            ]);
+            s.set_criteria(vec![Criterion::Sort, Criterion::Words, Criterion::Desc(S("rank1"))]);
        })
        .unwrap();

@ -517,7 +513,7 @@ fn test_distinct_all_candidates() {
    let index = create_index();
    index
        .update_settings(|s| {
-            s.set_ranking_rules(vec![RankingRule::Sort]);
+            s.set_criteria(vec![Criterion::Sort]);
        })
        .unwrap();

@ -540,7 +536,7 @@ fn test_distinct_typo() {
    let index = create_index();
    index
        .update_settings(|s| {
-            s.set_ranking_rules(vec![RankingRule::Words, RankingRule::Typo]);
+            s.set_criteria(vec![Criterion::Words, Criterion::Typo]);
        })
        .unwrap();

--- a/milli/src/search/new/tests/exactness.rs
+++ b/milli/src/search/new/tests/exactness.rs
@ -21,7 +21,7 @@ Then these rules will only work with

 use crate::index::tests::TempIndex;
 use crate::search::new::tests::collect_field_values;
-use crate::{RankingRule, Search, SearchResult, TermsMatchingStrategy};
+use crate::{Criterion, Search, SearchResult, TermsMatchingStrategy};

 fn create_index_simple_ordered() -> TempIndex {
    let index = TempIndex::new();
@ -30,7 +30,7 @@ fn create_index_simple_ordered() -> TempIndex {
        .update_settings(|s| {
            s.set_primary_key("id".to_owned());
            s.set_searchable_fields(vec!["text".to_owned()]);
-            s.set_ranking_rules(vec![RankingRule::Exactness]);
+            s.set_criteria(vec![Criterion::Exactness]);
        })
        .unwrap();

@ -89,7 +89,7 @@ fn create_index_simple_reversed() -> TempIndex {
        .update_settings(|s| {
            s.set_primary_key("id".to_owned());
            s.set_searchable_fields(vec!["text".to_owned()]);
-            s.set_ranking_rules(vec![RankingRule::Exactness]);
+            s.set_criteria(vec![Criterion::Exactness]);
        })
        .unwrap();

@ -147,7 +147,7 @@ fn create_index_simple_random() -> TempIndex {
        .update_settings(|s| {
            s.set_primary_key("id".to_owned());
            s.set_searchable_fields(vec!["text".to_owned()]);
-            s.set_ranking_rules(vec![RankingRule::Exactness]);
+            s.set_criteria(vec![Criterion::Exactness]);
        })
        .unwrap();

@ -201,7 +201,7 @@ fn create_index_attribute_starts_with() -> TempIndex {
        .update_settings(|s| {
            s.set_primary_key("id".to_owned());
            s.set_searchable_fields(vec!["text".to_owned()]);
-            s.set_ranking_rules(vec![RankingRule::Exactness]);
+            s.set_criteria(vec![Criterion::Exactness]);
        })
        .unwrap();

@ -251,7 +251,7 @@ fn create_index_simple_ordered_with_typos() -> TempIndex {
        .update_settings(|s| {
            s.set_primary_key("id".to_owned());
            s.set_searchable_fields(vec!["text".to_owned()]);
-            s.set_ranking_rules(vec![RankingRule::Exactness]);
+            s.set_criteria(vec![Criterion::Exactness]);
        })
        .unwrap();

@ -350,11 +350,7 @@ fn create_index_with_varying_proximities() -> TempIndex {
        .update_settings(|s| {
            s.set_primary_key("id".to_owned());
            s.set_searchable_fields(vec!["text".to_owned()]);
-            s.set_ranking_rules(vec![
-                RankingRule::Exactness,
-                RankingRule::Words,
-                RankingRule::Proximity,
-            ]);
+            s.set_criteria(vec![Criterion::Exactness, Criterion::Words, Criterion::Proximity]);
        })
        .unwrap();

@ -408,7 +404,7 @@ fn create_index_with_typo_and_prefix() -> TempIndex {
        .update_settings(|s| {
            s.set_primary_key("id".to_owned());
            s.set_searchable_fields(vec!["text".to_owned()]);
-            s.set_ranking_rules(vec![RankingRule::Exactness]);
+            s.set_criteria(vec![Criterion::Exactness]);
        })
        .unwrap();

@ -446,11 +442,7 @@ fn create_index_all_equal_except_proximity_between_ignored_terms() -> TempIndex
        .update_settings(|s| {
            s.set_primary_key("id".to_owned());
            s.set_searchable_fields(vec!["text".to_owned()]);
-            s.set_ranking_rules(vec![
-                RankingRule::Exactness,
-                RankingRule::Words,
-                RankingRule::Proximity,
-            ]);
+            s.set_criteria(vec![Criterion::Exactness, Criterion::Words, Criterion::Proximity]);
        })
        .unwrap();

@ -706,7 +698,7 @@ fn test_exactness_after_words() {

    index
        .update_settings(|s| {
-            s.set_ranking_rules(vec![RankingRule::Words, RankingRule::Exactness]);
+            s.set_criteria(vec![Criterion::Words, Criterion::Exactness]);
        })
        .unwrap();

@ -755,7 +747,7 @@ fn test_words_after_exactness() {

    index
        .update_settings(|s| {
-            s.set_ranking_rules(vec![RankingRule::Exactness, RankingRule::Words]);
+            s.set_criteria(vec![Criterion::Exactness, Criterion::Words]);
        })
        .unwrap();

@ -804,11 +796,7 @@ fn test_proximity_after_exactness() {

    index
        .update_settings(|s| {
-            s.set_ranking_rules(vec![
-                RankingRule::Exactness,
-                RankingRule::Words,
-                RankingRule::Proximity,
-            ]);
+            s.set_criteria(vec![Criterion::Exactness, Criterion::Words, Criterion::Proximity]);
        })
        .unwrap();

@ -846,11 +834,7 @@ fn test_proximity_after_exactness() {

    index
        .update_settings(|s| {
-            s.set_ranking_rules(vec![
-                RankingRule::Exactness,
-                RankingRule::Words,
-                RankingRule::Proximity,
-            ]);
+            s.set_criteria(vec![Criterion::Exactness, Criterion::Words, Criterion::Proximity]);
        })
        .unwrap();

@ -884,11 +868,7 @@ fn test_exactness_followed_by_typo_prefer_no_typo_prefix() {

    index
        .update_settings(|s| {
-            s.set_ranking_rules(vec![
-                RankingRule::Exactness,
-                RankingRule::Words,
-                RankingRule::Typo,
-            ]);
+            s.set_criteria(vec![Criterion::Exactness, Criterion::Words, Criterion::Typo]);
        })
        .unwrap();

@ -924,11 +904,7 @@ fn test_typo_followed_by_exactness() {

    index
        .update_settings(|s| {
-            s.set_ranking_rules(vec![
-                RankingRule::Words,
-                RankingRule::Typo,
-                RankingRule::Exactness,
-            ]);
+            s.set_criteria(vec![Criterion::Words, Criterion::Typo, Criterion::Exactness]);
        })
        .unwrap();

--- a/milli/src/search/new/tests/geo_sort.rs
+++ b/milli/src/search/new/tests/geo_sort.rs
@ -9,7 +9,7 @@ use maplit::hashset;
 use crate::index::tests::TempIndex;
 use crate::score_details::ScoreDetails;
 use crate::search::new::tests::collect_field_values;
-use crate::{AscDesc, GeoSortStrategy, Member, RankingRule, Search, SearchResult};
+use crate::{AscDesc, Criterion, GeoSortStrategy, Member, Search, SearchResult};

 fn create_index() -> TempIndex {
    let index = TempIndex::new();
@ -18,7 +18,7 @@ fn create_index() -> TempIndex {
        .update_settings(|s| {
            s.set_primary_key("id".to_owned());
            s.set_sortable_fields(hashset! { S("_geo") });
-            s.set_ranking_rules(vec![RankingRule::Words, RankingRule::Sort]);
+            s.set_criteria(vec![Criterion::Words, Criterion::Sort]);
        })
        .unwrap();
    index
--- a/milli/src/search/new/tests/integration.rs
+++ b/milli/src/search/new/tests/integration.rs
@ -6,10 +6,10 @@ use maplit::{btreemap, hashset};

 use crate::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
 use crate::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings};
-use crate::{db_snap, Index, Object, RankingRule};
+use crate::{db_snap, Criterion, Index, Object};
 pub const CONTENT: &str = include_str!("../../../../tests/assets/test_set.ndjson");

-pub fn setup_search_index_with_criteria(criteria: &[RankingRule]) -> Index {
+pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
    let path = tempfile::tempdir().unwrap();
    let mut options = EnvOpenOptions::new();
    options.map_size(10 * 1024 * 1024); // 10 MB
@ -20,7 +20,7 @@ pub fn setup_search_index_with_criteria(criteria: &[RankingRule]) -> Index {

    let mut builder = Settings::new(&mut wtxn, &index, &config);

-    builder.set_ranking_rules(criteria.to_vec());
+    builder.set_criteria(criteria.to_vec());
    builder.set_filterable_fields(hashset! {
        S("tag"),
        S("asc_desc_rank"),
@ -70,6 +70,6 @@ pub fn setup_search_index_with_criteria(criteria: &[RankingRule]) -> Index {

 #[test]
 fn snapshot_integration_dataset() {
-    let index = setup_search_index_with_criteria(&[RankingRule::Attribute]);
+    let index = setup_search_index_with_criteria(&[Criterion::Attribute]);
    db_snap!(index, word_position_docids, @"3c9347a767bceef3beb31465f1e5f3ae");
 }
--- a/milli/src/search/new/tests/ngram_split_words.rs
+++ b/milli/src/search/new/tests/ngram_split_words.rs
@ -19,7 +19,7 @@ This module tests the following properties:

 use crate::index::tests::TempIndex;
 use crate::search::new::tests::collect_field_values;
-use crate::{RankingRule, Search, SearchResult, TermsMatchingStrategy};
+use crate::{Criterion, Search, SearchResult, TermsMatchingStrategy};

 fn create_index() -> TempIndex {
    let index = TempIndex::new();
@ -28,7 +28,7 @@ fn create_index() -> TempIndex {
        .update_settings(|s| {
            s.set_primary_key("id".to_owned());
            s.set_searchable_fields(vec!["text".to_owned()]);
-            s.set_ranking_rules(vec![RankingRule::Words]);
+            s.set_criteria(vec![Criterion::Words]);
        })
        .unwrap();

--- a/milli/src/search/new/tests/proximity.rs
+++ b/milli/src/search/new/tests/proximity.rs
@ -19,7 +19,7 @@ use std::collections::BTreeMap;

 use crate::index::tests::TempIndex;
 use crate::search::new::tests::collect_field_values;
-use crate::{RankingRule, Search, SearchResult, TermsMatchingStrategy};
+use crate::{Criterion, Search, SearchResult, TermsMatchingStrategy};

 fn create_simple_index() -> TempIndex {
    let index = TempIndex::new();
@ -28,7 +28,7 @@ fn create_simple_index() -> TempIndex {
        .update_settings(|s| {
            s.set_primary_key("id".to_owned());
            s.set_searchable_fields(vec!["text".to_owned()]);
-            s.set_ranking_rules(vec![RankingRule::Words, RankingRule::Proximity]);
+            s.set_criteria(vec![Criterion::Words, Criterion::Proximity]);
        })
        .unwrap();

@ -94,7 +94,7 @@ fn create_edge_cases_index() -> TempIndex {
        .update_settings(|s| {
            s.set_primary_key("id".to_owned());
            s.set_searchable_fields(vec!["text".to_owned()]);
-            s.set_ranking_rules(vec![RankingRule::Words, RankingRule::Proximity]);
+            s.set_criteria(vec![Criterion::Words, Criterion::Proximity]);
        })
        .unwrap();

@ -371,7 +371,7 @@ fn test_proximity_prefix_db() {
    s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
    s.query("best s");
    let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
-    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 13, 9, 12, 6, 7, 8, 11, 15]");
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 9, 6, 7, 8, 11, 12, 13, 15]");
    insta::assert_snapshot!(format!("{document_scores:#?}"));
    let texts = collect_field_values(&index, &txn, "text", &documents_ids);

@ -379,13 +379,13 @@ fn test_proximity_prefix_db() {
    insta::assert_debug_snapshot!(texts, @r###"
    [
        "\"this is the best summer meal\"",
-        "\"summer best\"",
        "\"this is the best meal of summer\"",
-        "\"summer x best\"",
        "\"this is the best meal I have ever had in such a beautiful summer day\"",
        "\"this is the best cooked meal of the summer\"",
        "\"this is the best meal of the summer\"",
        "\"summer x y best\"",
+        "\"summer x best\"",
+        "\"summer best\"",
        "\"this is the best meal I have ever had in such a beautiful winter day\"",
    ]
    "###);
@ -423,17 +423,17 @@ fn test_proximity_prefix_db() {
    s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
    s.query("best win");
    let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
-    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[15, 16, 17, 18, 19, 20, 21, 22]");
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[19, 18, 15, 16, 17, 20, 21, 22]");
    insta::assert_snapshot!(format!("{document_scores:#?}"));
    let texts = collect_field_values(&index, &txn, "text", &documents_ids);

    insta::assert_debug_snapshot!(texts, @r###"
    [
+        "\"this is the best winter meal\"",
+        "\"this is the best meal of winter\"",
        "\"this is the best meal I have ever had in such a beautiful winter day\"",
        "\"this is the best cooked meal of the winter\"",
        "\"this is the best meal of the winter\"",
-        "\"this is the best meal of winter\"",
-        "\"this is the best winter meal\"",
        "\"winter x y best\"",
        "\"winter x best\"",
        "\"winter best\"",
@ -471,20 +471,20 @@ fn test_proximity_prefix_db() {
    s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
    s.query("best wi");
    let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
-    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[19, 22, 18, 21, 15, 16, 17, 20]");
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[19, 18, 15, 16, 17, 20, 21, 22]");
    insta::assert_snapshot!(format!("{document_scores:#?}"));
    let texts = collect_field_values(&index, &txn, "text", &documents_ids);

    insta::assert_debug_snapshot!(texts, @r###"
    [
        "\"this is the best winter meal\"",
-        "\"winter best\"",
        "\"this is the best meal of winter\"",
-        "\"winter x best\"",
        "\"this is the best meal I have ever had in such a beautiful winter day\"",
        "\"this is the best cooked meal of the winter\"",
        "\"this is the best meal of the winter\"",
        "\"winter x y best\"",
+        "\"winter x best\"",
+        "\"winter best\"",
    ]
    "###);
 }
--- a/milli/src/search/new/tests/proximity_typo.rs
+++ b/milli/src/search/new/tests/proximity_typo.rs
@ -8,7 +8,7 @@ implemented.

 use crate::index::tests::TempIndex;
 use crate::search::new::tests::collect_field_values;
-use crate::{RankingRule, Search, SearchResult, TermsMatchingStrategy};
+use crate::{Criterion, Search, SearchResult, TermsMatchingStrategy};

 fn create_index() -> TempIndex {
    let index = TempIndex::new();
@ -17,11 +17,7 @@ fn create_index() -> TempIndex {
        .update_settings(|s| {
            s.set_primary_key("id".to_owned());
            s.set_searchable_fields(vec!["text".to_owned()]);
-            s.set_ranking_rules(vec![
-                RankingRule::Words,
-                RankingRule::Proximity,
-                RankingRule::Typo,
-            ]);
+            s.set_criteria(vec![Criterion::Words, Criterion::Proximity, Criterion::Typo]);
        })
        .unwrap();

--- a/milli/src/search/new/tests/snapshots/millisearchnewtestsproximity__proximity_prefix_db-14.snap
+++ b/milli/src/search/new/tests/snapshots/millisearchnewtestsproximity__proximity_prefix_db-14.snap
@ -11,14 +11,6 @@ expression: "format!(\"{document_scores:#?}\")"
            },
        ),
    ],
-    [
-        Proximity(
-            Rank {
-                rank: 3,
-                max_rank: 4,
-            },
-        ),
-    ],
    [
        Proximity(
            Rank {
@ -30,7 +22,15 @@ expression: "format!(\"{document_scores:#?}\")"
    [
        Proximity(
            Rank {
-                rank: 2,
+                rank: 1,
+                max_rank: 4,
+            },
+        ),
+    ],
+    [
+        Proximity(
+            Rank {
+                rank: 1,
                max_rank: 4,
            },
        ),
--- a/milli/src/search/new/tests/snapshots/millisearchnewtestsproximity__proximity_prefix_db-2.snap
+++ b/milli/src/search/new/tests/snapshots/millisearchnewtestsproximity__proximity_prefix_db-2.snap
@ -11,14 +11,6 @@ expression: "format!(\"{document_scores:#?}\")"
            },
        ),
    ],
-    [
-        Proximity(
-            Rank {
-                rank: 3,
-                max_rank: 4,
-            },
-        ),
-    ],
    [
        Proximity(
            Rank {
@ -30,7 +22,15 @@ expression: "format!(\"{document_scores:#?}\")"
    [
        Proximity(
            Rank {
-                rank: 2,
+                rank: 1,
+                max_rank: 4,
+            },
+        ),
+    ],
+    [
+        Proximity(
+            Rank {
+                rank: 1,
                max_rank: 4,
            },
        ),
--- a/milli/src/search/new/tests/snapshots/millisearchnewtestsproximity__proximity_prefix_db-8.snap
+++ b/milli/src/search/new/tests/snapshots/millisearchnewtestsproximity__proximity_prefix_db-8.snap
@ -6,7 +6,7 @@ expression: "format!(\"{document_scores:#?}\")"
    [
        Proximity(
            Rank {
-                rank: 1,
+                rank: 4,
                max_rank: 4,
            },
        ),
@ -14,7 +14,7 @@ expression: "format!(\"{document_scores:#?}\")"
    [
        Proximity(
            Rank {
-                rank: 1,
+                rank: 2,
                max_rank: 4,
            },
        ),
--- a/milli/src/search/new/tests/sort.rs
+++ b/milli/src/search/new/tests/sort.rs
@ -13,11 +13,12 @@ This module tests the `sort` ranking rule:

 use big_s::S;
 use maplit::hashset;
+use meili_snap::insta;

 use crate::index::tests::TempIndex;
 use crate::search::new::tests::collect_field_values;
 use crate::{
-    score_details, AscDesc, Member, RankingRule, Search, SearchResult, TermsMatchingStrategy,
+    score_details, AscDesc, Criterion, Member, Search, SearchResult, TermsMatchingStrategy,
 };

 fn create_index() -> TempIndex {
@ -28,7 +29,7 @@ fn create_index() -> TempIndex {
            s.set_primary_key("id".to_owned());
            s.set_searchable_fields(vec!["text".to_owned()]);
            s.set_sortable_fields(hashset! { S("rank"), S("vague"), S("letter") });
-            s.set_ranking_rules(vec![RankingRule::Sort]);
+            s.set_criteria(vec![Criterion::Sort]);
        })
        .unwrap();

@ -331,7 +332,7 @@ fn test_redacted() {
        .update_settings(|s| {
            s.set_displayed_fields(vec!["text".to_owned(), "vague".to_owned()]);
            s.set_sortable_fields(hashset! { S("rank"), S("vague"), S("letter") });
-            s.set_ranking_rules(vec![RankingRule::Sort]);
+            s.set_criteria(vec![Criterion::Sort]);
        })
        .unwrap();

--- a/milli/src/search/new/tests/typo.rs
+++ b/milli/src/search/new/tests/typo.rs
@ -22,7 +22,7 @@ use std::collections::BTreeMap;

 use crate::index::tests::TempIndex;
 use crate::search::new::tests::collect_field_values;
-use crate::{RankingRule, Search, SearchResult, TermsMatchingStrategy};
+use crate::{Criterion, Search, SearchResult, TermsMatchingStrategy};

 fn create_index() -> TempIndex {
    let index = TempIndex::new();
@ -31,7 +31,7 @@ fn create_index() -> TempIndex {
        .update_settings(|s| {
            s.set_primary_key("id".to_owned());
            s.set_searchable_fields(vec!["text".to_owned()]);
-            s.set_ranking_rules(vec![RankingRule::Words]);
+            s.set_criteria(vec![Criterion::Words]);
        })
        .unwrap();

@ -457,7 +457,7 @@ fn test_typo_ranking_rule_not_preceded_by_words_ranking_rule() {
    let index = create_index();
    index
        .update_settings(|s| {
-            s.set_ranking_rules(vec![RankingRule::Typo]);
+            s.set_criteria(vec![Criterion::Typo]);
        })
        .unwrap();

@ -495,7 +495,7 @@ fn test_typo_ranking_rule_not_preceded_by_words_ranking_rule() {

    index
        .update_settings(|s| {
-            s.set_ranking_rules(vec![RankingRule::Words, RankingRule::Typo]);
+            s.set_criteria(vec![Criterion::Words, Criterion::Typo]);
        })
        .unwrap();

@ -540,7 +540,7 @@ fn test_typo_bucketing() {
    drop(txn);
    index
        .update_settings(|s| {
-            s.set_ranking_rules(vec![RankingRule::Typo]);
+            s.set_criteria(vec![Criterion::Typo]);
        })
        .unwrap();
    let txn = index.read_txn().unwrap();
@ -589,7 +589,7 @@ fn test_typo_synonyms() {
    let index = create_index();
    index
        .update_settings(|s| {
-            s.set_ranking_rules(vec![RankingRule::Typo]);
+            s.set_criteria(vec![Criterion::Typo]);

            let mut synonyms = BTreeMap::new();
            synonyms.insert("lackadaisical".to_owned(), vec!["lazy".to_owned()]);
--- a/milli/src/search/new/tests/typo_proximity.rs
+++ b/milli/src/search/new/tests/typo_proximity.rs
@ -17,7 +17,7 @@ because the typo ranking rule before it only used the derivation `beautiful`.

 use crate::index::tests::TempIndex;
 use crate::search::new::tests::collect_field_values;
-use crate::{RankingRule, Search, SearchResult, TermsMatchingStrategy};
+use crate::{Criterion, Search, SearchResult, TermsMatchingStrategy};

 fn create_index() -> TempIndex {
    let index = TempIndex::new();
@ -26,11 +26,7 @@ fn create_index() -> TempIndex {
        .update_settings(|s| {
            s.set_primary_key("id".to_owned());
            s.set_searchable_fields(vec!["text".to_owned()]);
-            s.set_ranking_rules(vec![
-                RankingRule::Words,
-                RankingRule::Typo,
-                RankingRule::Proximity,
-            ]);
+            s.set_criteria(vec![Criterion::Words, Criterion::Typo, Criterion::Proximity]);
        })
        .unwrap();

--- a/milli/src/search/new/tests/words_tms.rs
+++ b/milli/src/search/new/tests/words_tms.rs
@ -14,7 +14,7 @@ account by the proximity ranking rule.

 use crate::index::tests::TempIndex;
 use crate::search::new::tests::collect_field_values;
-use crate::{RankingRule, Search, SearchResult, TermsMatchingStrategy};
+use crate::{Criterion, Search, SearchResult, TermsMatchingStrategy};

 fn create_index() -> TempIndex {
    let index = TempIndex::new();
@ -23,7 +23,7 @@ fn create_index() -> TempIndex {
        .update_settings(|s| {
            s.set_primary_key("id".to_owned());
            s.set_searchable_fields(vec!["text".to_owned()]);
-            s.set_ranking_rules(vec![RankingRule::Words]);
+            s.set_criteria(vec![Criterion::Words]);
        })
        .unwrap();

@ -265,7 +265,7 @@ fn test_words_proximity_tms_last_simple() {
    let index = create_index();
    index
        .update_settings(|s| {
-            s.set_ranking_rules(vec![RankingRule::Words, RankingRule::Proximity]);
+            s.set_criteria(vec![Criterion::Words, Criterion::Proximity]);
        })
        .unwrap();

@ -346,7 +346,7 @@ fn test_words_proximity_tms_last_phrase() {
    let index = create_index();
    index
        .update_settings(|s| {
-            s.set_ranking_rules(vec![RankingRule::Words, RankingRule::Proximity]);
+            s.set_criteria(vec![Criterion::Words, Criterion::Proximity]);
        })
        .unwrap();

@ -416,7 +416,7 @@ fn test_words_tms_all() {
    let index = create_index();
    index
        .update_settings(|s| {
-            s.set_ranking_rules(vec![RankingRule::Words, RankingRule::Proximity]);
+            s.set_criteria(vec![Criterion::Words, Criterion::Proximity]);
        })
        .unwrap();

--- a/milli/src/snapshot_tests.rs
+++ b/milli/src/snapshot_tests.rs
@ -4,9 +4,8 @@ use std::path::Path;

 use roaring::RoaringBitmap;

-use crate::facet::FacetType;
 use crate::heed_codec::facet::{FacetGroupKey, FacetGroupValue};
-use crate::{make_db_snap_from_iter, obkv_to_json, ExternalDocumentsIds, Index};
+use crate::{make_db_snap_from_iter, obkv_to_json, Index};

 #[track_caller]
 pub fn default_db_snapshot_settings_for_test(name: Option<&str>) -> (insta::Settings, String) {
@ -98,7 +97,6 @@ Create a snapshot test of the given database.
    - `facet_id_string_docids`
    - `documents_ids`
    - `stop_words`
-    - `soft_deleted_documents_ids`
    - `field_distribution`
    - `fields_ids_map`
    - `geo_faceted_documents_ids`
@ -221,22 +219,6 @@ pub fn snap_word_pair_proximity_docids(index: &Index) -> String {
        &format!("{proximity:<2} {word1:<16} {word2:<16} {}", display_bitmap(&b))
    })
 }
-pub fn snap_word_prefix_pair_proximity_docids(index: &Index) -> String {
-    make_db_snap_from_iter!(index, word_prefix_pair_proximity_docids, |(
-        (proximity, word1, prefix),
-        b,
-    )| {
-        &format!("{proximity:<2} {word1:<16} {prefix:<4} {}", display_bitmap(&b))
-    })
-}
-pub fn snap_prefix_word_pair_proximity_docids(index: &Index) -> String {
-    make_db_snap_from_iter!(index, prefix_word_pair_proximity_docids, |(
-        (proximity, prefix, word2),
-        b,
-    )| {
-        &format!("{proximity:<2} {prefix:<4} {word2:<16} {}", display_bitmap(&b))
-    })
-}
 pub fn snap_word_position_docids(index: &Index) -> String {
    make_db_snap_from_iter!(index, word_position_docids, |((word, position), b)| {
        &format!("{word:<16} {position:<6} {}", display_bitmap(&b))
@ -308,12 +290,6 @@ pub fn snap_stop_words(index: &Index) -> String {
    let snap = format!("{stop_words:?}");
    snap
 }
-pub fn snap_soft_deleted_documents_ids(index: &Index) -> String {
-    let rtxn = index.read_txn().unwrap();
-    let soft_deleted_documents_ids = index.soft_deleted_documents_ids(&rtxn).unwrap();
-
-    display_bitmap(&soft_deleted_documents_ids)
-}
 pub fn snap_field_distributions(index: &Index) -> String {
    let rtxn = index.read_txn().unwrap();
    let mut snap = String::new();
@ -340,50 +316,21 @@ pub fn snap_geo_faceted_documents_ids(index: &Index) -> String {
 }
 pub fn snap_external_documents_ids(index: &Index) -> String {
    let rtxn = index.read_txn().unwrap();
-    let ExternalDocumentsIds { soft, hard, .. } = index.external_documents_ids(&rtxn).unwrap();
+    let external_ids = index.external_documents_ids().to_hash_map(&rtxn).unwrap();
+    // ensure fixed order (not guaranteed by hashmap)
+    let mut external_ids: Vec<(String, u32)> = external_ids.into_iter().collect();
+    external_ids.sort_by(|(l, _), (r, _)| l.cmp(r));

    let mut snap = String::new();

-    writeln!(&mut snap, "soft:").unwrap();
-    let stream_soft = soft.stream();
-    let soft_external_ids = stream_soft.into_str_vec().unwrap();
-    for (key, id) in soft_external_ids {
-        writeln!(&mut snap, "{key:<24} {id}").unwrap();
-    }
-    writeln!(&mut snap, "hard:").unwrap();
-    let stream_hard = hard.stream();
-    let hard_external_ids = stream_hard.into_str_vec().unwrap();
-    for (key, id) in hard_external_ids {
+    writeln!(&mut snap, "docids:").unwrap();
+    for (key, id) in external_ids {
        writeln!(&mut snap, "{key:<24} {id}").unwrap();
    }

    snap
 }
-pub fn snap_number_faceted_documents_ids(index: &Index) -> String {
-    let rtxn = index.read_txn().unwrap();
-    let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
-    let mut snap = String::new();
-    for field_id in fields_ids_map.ids() {
-        let number_faceted_documents_ids =
-            index.faceted_documents_ids(&rtxn, field_id, FacetType::Number).unwrap();
-        writeln!(&mut snap, "{field_id:<3} {}", display_bitmap(&number_faceted_documents_ids))
-            .unwrap();
-    }
-    snap
-}
-pub fn snap_string_faceted_documents_ids(index: &Index) -> String {
-    let rtxn = index.read_txn().unwrap();
-    let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();

-    let mut snap = String::new();
-    for field_id in fields_ids_map.ids() {
-        let string_faceted_documents_ids =
-            index.faceted_documents_ids(&rtxn, field_id, FacetType::String).unwrap();
-        writeln!(&mut snap, "{field_id:<3} {}", display_bitmap(&string_faceted_documents_ids))
-            .unwrap();
-    }
-    snap
-}
 pub fn snap_words_fst(index: &Index) -> String {
    let rtxn = index.read_txn().unwrap();
    let words_fst = index.words_fst(&rtxn).unwrap();
@ -516,9 +463,6 @@ macro_rules! full_snap_of_db {
    ($index:ident, stop_words) => {{
        $crate::snapshot_tests::snap_stop_words(&$index)
    }};
-    ($index:ident, soft_deleted_documents_ids) => {{
-        $crate::snapshot_tests::snap_soft_deleted_documents_ids(&$index)
-    }};
    ($index:ident, field_distribution) => {{
        $crate::snapshot_tests::snap_field_distributions(&$index)
    }};
@ -531,12 +475,6 @@ macro_rules! full_snap_of_db {
    ($index:ident, external_documents_ids) => {{
        $crate::snapshot_tests::snap_external_documents_ids(&$index)
    }};
-    ($index:ident, number_faceted_documents_ids) => {{
-        $crate::snapshot_tests::snap_number_faceted_documents_ids(&$index)
-    }};
-    ($index:ident, string_faceted_documents_ids) => {{
-        $crate::snapshot_tests::snap_string_faceted_documents_ids(&$index)
-    }};
    ($index:ident, words_fst) => {{
        $crate::snapshot_tests::snap_words_fst(&$index)
    }};
--- a/milli/src/update/available_documents_ids.rs
+++ b/milli/src/update/available_documents_ids.rs
@ -8,16 +8,11 @@ pub struct AvailableDocumentsIds {
 }

 impl AvailableDocumentsIds {
-    pub fn from_documents_ids(
-        docids: &RoaringBitmap,
-        soft_deleted_docids: &RoaringBitmap,
-    ) -> AvailableDocumentsIds {
-        let used_docids = docids | soft_deleted_docids;
-
-        match used_docids.max() {
+    pub fn from_documents_ids(docids: &RoaringBitmap) -> AvailableDocumentsIds {
+        match docids.max() {
            Some(last_id) => {
                let mut available = RoaringBitmap::from_iter(0..last_id);
-                available -= used_docids;
+                available -= docids;

                let iter = match last_id.checked_add(1) {
                    Some(id) => id..=u32::max_value(),
@ -50,7 +45,7 @@ mod tests {
    #[test]
    fn empty() {
        let base = RoaringBitmap::new();
-        let left = AvailableDocumentsIds::from_documents_ids(&base, &RoaringBitmap::new());
+        let left = AvailableDocumentsIds::from_documents_ids(&base);
        let right = 0..=u32::max_value();
        left.zip(right).take(500).for_each(|(l, r)| assert_eq!(l, r));
    }
@ -63,28 +58,8 @@ mod tests {
        base.insert(100);
        base.insert(405);

-        let left = AvailableDocumentsIds::from_documents_ids(&base, &RoaringBitmap::new());
+        let left = AvailableDocumentsIds::from_documents_ids(&base);
        let right = (0..=u32::max_value()).filter(|&n| n != 0 && n != 10 && n != 100 && n != 405);
        left.zip(right).take(500).for_each(|(l, r)| assert_eq!(l, r));
    }
-
-    #[test]
-    fn soft_deleted() {
-        let mut base = RoaringBitmap::new();
-        base.insert(0);
-        base.insert(10);
-        base.insert(100);
-        base.insert(405);
-
-        let mut soft_deleted = RoaringBitmap::new();
-        soft_deleted.insert(1);
-        soft_deleted.insert(11);
-        soft_deleted.insert(101);
-        soft_deleted.insert(406);
-
-        let left = AvailableDocumentsIds::from_documents_ids(&base, &soft_deleted);
-        let right =
-            (0..=u32::max_value()).filter(|&n| ![0, 1, 10, 11, 100, 101, 405, 406].contains(&n));
-        left.zip(right).take(500).for_each(|(l, r)| assert_eq!(l, r));
-    }
 }
--- a/milli/src/update/clear_documents.rs
+++ b/milli/src/update/clear_documents.rs
@ -1,8 +1,7 @@
 use roaring::RoaringBitmap;
 use time::OffsetDateTime;

-use crate::facet::FacetType;
-use crate::{ExternalDocumentsIds, FieldDistribution, Index, Result};
+use crate::{FieldDistribution, Index, Result};

 pub struct ClearDocuments<'t, 'u, 'i> {
    wtxn: &'t mut heed::RwTxn<'i, 'u>,
@ -21,13 +20,12 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> {
        let Index {
            env: _env,
            main: _main,
+            external_documents_ids,
            word_docids,
            exact_word_docids,
            word_prefix_docids,
            exact_word_prefix_docids,
            word_pair_proximity_docids,
-            word_prefix_pair_proximity_docids,
-            prefix_word_pair_proximity_docids,
            word_position_docids,
            word_fid_docids,
            field_id_word_count_docids,
@ -51,43 +49,23 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> {

        // We retrieve the number of documents ids that we are deleting.
        let number_of_documents = self.index.number_of_documents(self.wtxn)?;
-        let faceted_fields = self.index.faceted_fields_ids(self.wtxn)?;

        // We clean some of the main engine datastructures.
        self.index.put_words_fst(self.wtxn, &fst::Set::default())?;
        self.index.put_words_prefixes_fst(self.wtxn, &fst::Set::default())?;
-        self.index.put_external_documents_ids(self.wtxn, &ExternalDocumentsIds::default())?;
        self.index.put_documents_ids(self.wtxn, &empty_roaring)?;
-        self.index.put_soft_deleted_documents_ids(self.wtxn, &empty_roaring)?;
        self.index.put_field_distribution(self.wtxn, &FieldDistribution::default())?;
        self.index.delete_geo_rtree(self.wtxn)?;
        self.index.delete_geo_faceted_documents_ids(self.wtxn)?;
        self.index.delete_vector_hnsw(self.wtxn)?;

-        // We clean all the faceted documents ids.
-        for field_id in faceted_fields {
-            self.index.put_faceted_documents_ids(
-                self.wtxn,
-                field_id,
-                FacetType::Number,
-                &empty_roaring,
-            )?;
-            self.index.put_faceted_documents_ids(
-                self.wtxn,
-                field_id,
-                FacetType::String,
-                &empty_roaring,
-            )?;
-        }
-
        // Clear the other databases.
+        external_documents_ids.clear(self.wtxn)?;
        word_docids.clear(self.wtxn)?;
        exact_word_docids.clear(self.wtxn)?;
        word_prefix_docids.clear(self.wtxn)?;
        exact_word_prefix_docids.clear(self.wtxn)?;
        word_pair_proximity_docids.clear(self.wtxn)?;
-        word_prefix_pair_proximity_docids.clear(self.wtxn)?;
-        prefix_word_pair_proximity_docids.clear(self.wtxn)?;
        word_position_docids.clear(self.wtxn)?;
        word_fid_docids.clear(self.wtxn)?;
        field_id_word_count_docids.clear(self.wtxn)?;
@ -140,7 +118,7 @@ mod tests {

        assert!(index.words_fst(&rtxn).unwrap().is_empty());
        assert!(index.words_prefixes_fst(&rtxn).unwrap().is_empty());
-        assert!(index.external_documents_ids(&rtxn).unwrap().is_empty());
+        assert!(index.external_documents_ids().is_empty(&rtxn).unwrap());
        assert!(index.documents_ids(&rtxn).unwrap().is_empty());
        assert!(index.field_distribution(&rtxn).unwrap().is_empty());
        assert!(index.geo_rtree(&rtxn).unwrap().is_none());
@ -150,7 +128,6 @@ mod tests {
        assert!(index.word_prefix_docids.is_empty(&rtxn).unwrap());
        assert!(index.word_pair_proximity_docids.is_empty(&rtxn).unwrap());
        assert!(index.field_id_word_count_docids.is_empty(&rtxn).unwrap());
-        assert!(index.word_prefix_pair_proximity_docids.is_empty(&rtxn).unwrap());
        assert!(index.facet_id_f64_docids.is_empty(&rtxn).unwrap());
        assert!(index.facet_id_string_docids.is_empty(&rtxn).unwrap());
        assert!(index.field_id_docid_facet_f64s.is_empty(&rtxn).unwrap());
--- a/milli/src/update/del_add.rs
+++ b/milli/src/update/del_add.rs
@ -0,0 +1,125 @@
+use obkv::Key;
+
+pub type KvWriterDelAdd<W> = obkv::KvWriter<W, DelAdd>;
+pub type KvReaderDelAdd<'a> = obkv::KvReader<'a, DelAdd>;
+
+/// DelAdd defines the new value to add in the database and old value to delete from the database.
+///
+/// Its used in an OBKV to be serialized in grenad files.
+#[repr(u8)]
+#[derive(Clone, Copy, PartialOrd, PartialEq, Debug)]
+pub enum DelAdd {
+    Deletion = 0,
+    Addition = 1,
+}
+
+impl Key for DelAdd {
+    const BYTES_SIZE: usize = std::mem::size_of::<DelAdd>();
+    type BYTES = [u8; Self::BYTES_SIZE];
+
+    fn to_be_bytes(&self) -> Self::BYTES {
+        u8::to_be_bytes(*self as u8)
+    }
+
+    fn from_be_bytes(array: Self::BYTES) -> Self {
+        match u8::from_be_bytes(array) {
+            0 => Self::Deletion,
+            1 => Self::Addition,
+            otherwise => unreachable!("DelAdd has only 2 variants, unknown variant: {}", otherwise),
+        }
+    }
+}
+
+/// Creates a Kv<K, Kv<DelAdd, value>> from Kv<K, value>
+///
+/// Deletion: put all the values under DelAdd::Deletion
+/// Addition: put all the values under DelAdd::Addition,
+/// DeletionAndAddition: put all the values under DelAdd::Deletion and DelAdd::Addition,
+pub fn into_del_add_obkv<K: obkv::Key + PartialOrd>(
+    reader: obkv::KvReader<K>,
+    operation: DelAddOperation,
+    buffer: &mut Vec<u8>,
+) -> Result<(), std::io::Error> {
+    let mut writer = obkv::KvWriter::new(buffer);
+    let mut value_buffer = Vec::new();
+    for (key, value) in reader.iter() {
+        value_buffer.clear();
+        let mut value_writer = KvWriterDelAdd::new(&mut value_buffer);
+        if matches!(operation, DelAddOperation::Deletion | DelAddOperation::DeletionAndAddition) {
+            value_writer.insert(DelAdd::Deletion, value)?;
+        }
+        if matches!(operation, DelAddOperation::Addition | DelAddOperation::DeletionAndAddition) {
+            value_writer.insert(DelAdd::Addition, value)?;
+        }
+        value_writer.finish()?;
+        writer.insert(key, &value_buffer)?;
+    }
+
+    writer.finish()
+}
+
+/// Enum controlling the side of the DelAdd obkv in which the provided value will be written.
+#[derive(Debug, Clone, Copy)]
+pub enum DelAddOperation {
+    Deletion,
+    Addition,
+    DeletionAndAddition,
+}
+
+/// Creates a Kv<K, Kv<DelAdd, value>> from two Kv<K, value>
+///
+/// putting each deletion obkv's keys under an DelAdd::Deletion
+/// and putting each addition obkv's keys under an DelAdd::Addition
+pub fn del_add_from_two_obkvs<K: obkv::Key + PartialOrd + Ord>(
+    deletion: obkv::KvReader<K>,
+    addition: obkv::KvReader<K>,
+    buffer: &mut Vec<u8>,
+) -> Result<(), std::io::Error> {
+    use itertools::merge_join_by;
+    use itertools::EitherOrBoth::{Both, Left, Right};
+
+    let mut writer = obkv::KvWriter::new(buffer);
+    let mut value_buffer = Vec::new();
+
+    for eob in merge_join_by(deletion.iter(), addition.iter(), |(b, _), (u, _)| b.cmp(u)) {
+        value_buffer.clear();
+        match eob {
+            Left((k, v)) => {
+                let mut value_writer = KvWriterDelAdd::new(&mut value_buffer);
+                value_writer.insert(DelAdd::Deletion, v).unwrap();
+                writer.insert(k, value_writer.into_inner()?).unwrap();
+            }
+            Right((k, v)) => {
+                let mut value_writer = KvWriterDelAdd::new(&mut value_buffer);
+                value_writer.insert(DelAdd::Addition, v).unwrap();
+                writer.insert(k, value_writer.into_inner()?).unwrap();
+            }
+            Both((k, deletion), (_, addition)) => {
+                let mut value_writer = KvWriterDelAdd::new(&mut value_buffer);
+                value_writer.insert(DelAdd::Deletion, deletion).unwrap();
+                value_writer.insert(DelAdd::Addition, addition).unwrap();
+                writer.insert(k, value_writer.into_inner()?).unwrap();
+            }
+        }
+    }
+
+    writer.finish()
+}
+
+pub fn is_noop_del_add_obkv(del_add: KvReaderDelAdd) -> bool {
+    del_add.get(DelAdd::Deletion) == del_add.get(DelAdd::Addition)
+}
+
+/// A function that extracts and returns the Add side of a DelAdd obkv.
+/// This is useful when there are no previous value in the database and
+/// therefore we don't need to do a diff with what's already there.
+///
+/// If there is no Add side we currently write an empty buffer
+/// which is a valid CboRoaringBitmap.
+#[allow(clippy::ptr_arg)] // required to avoid signature mismatch
+pub fn deladd_serialize_add_side<'a>(
+    obkv: &'a [u8],
+    _buffer: &mut Vec<u8>,
+) -> crate::Result<&'a [u8]> {
+    Ok(KvReaderDelAdd::new(obkv).get(DelAdd::Addition).unwrap_or_default())
+}
--- a/milli/src/update/delete_documents.rs
+++ b/milli/src/update/delete_documents.rs
--- a/milli/src/update/facet/bulk.rs
+++ b/milli/src/update/facet/bulk.rs
@ -1,10 +1,9 @@
-use std::borrow::Cow;
 use std::fs::File;
 use std::io::BufReader;

 use grenad::CompressionType;
 use heed::types::ByteSlice;
-use heed::{BytesEncode, Error, RoTxn, RwTxn};
+use heed::{BytesDecode, BytesEncode, Error, RoTxn, RwTxn};
 use roaring::RoaringBitmap;

 use super::{FACET_GROUP_SIZE, FACET_MIN_LEVEL_SIZE};
@ -13,17 +12,15 @@ use crate::heed_codec::facet::{
    FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec,
 };
 use crate::heed_codec::ByteSliceRefCodec;
+use crate::update::del_add::{DelAdd, KvReaderDelAdd};
 use crate::update::index_documents::{create_writer, valid_lmdb_key, writer_into_reader};
-use crate::{CboRoaringBitmapCodec, FieldId, Index, Result};
+use crate::{CboRoaringBitmapCodec, CboRoaringBitmapLenCodec, FieldId, Index, Result};

 /// Algorithm to insert elememts into the `facet_id_(string/f64)_docids` databases
 /// by rebuilding the database "from scratch".
 ///
 /// First, the new elements are inserted into the level 0 of the database. Then, the
 /// higher levels are cleared and recomputed from the content of level 0.
-///
-/// Finally, the `faceted_documents_ids` value in the main database of `Index`
-/// is updated to contain the new set of faceted documents.
 pub struct FacetsUpdateBulk<'i> {
    index: &'i Index,
    group_size: u8,
@ -31,7 +28,7 @@ pub struct FacetsUpdateBulk<'i> {
    facet_type: FacetType,
    field_ids: Vec<FieldId>,
    // None if level 0 does not need to be updated
-    new_data: Option<grenad::Reader<BufReader<File>>>,
+    delta_data: Option<grenad::Reader<BufReader<File>>>,
 }

 impl<'i> FacetsUpdateBulk<'i> {
@ -39,7 +36,7 @@ impl<'i> FacetsUpdateBulk<'i> {
        index: &'i Index,
        field_ids: Vec<FieldId>,
        facet_type: FacetType,
-        new_data: grenad::Reader<BufReader<File>>,
+        delta_data: grenad::Reader<BufReader<File>>,
        group_size: u8,
        min_level_size: u8,
    ) -> FacetsUpdateBulk<'i> {
@ -49,7 +46,7 @@ impl<'i> FacetsUpdateBulk<'i> {
            group_size,
            min_level_size,
            facet_type,
-            new_data: Some(new_data),
+            delta_data: Some(delta_data),
        }
    }

@ -64,13 +61,13 @@ impl<'i> FacetsUpdateBulk<'i> {
            group_size: FACET_GROUP_SIZE,
            min_level_size: FACET_MIN_LEVEL_SIZE,
            facet_type,
-            new_data: None,
+            delta_data: None,
        }
    }

    #[logging_timer::time("FacetsUpdateBulk::{}")]
    pub fn execute(self, wtxn: &mut heed::RwTxn) -> Result<()> {
-        let Self { index, field_ids, group_size, min_level_size, facet_type, new_data } = self;
+        let Self { index, field_ids, group_size, min_level_size, facet_type, delta_data } = self;

        let db = match facet_type {
            FacetType::String => index
@ -81,12 +78,9 @@ impl<'i> FacetsUpdateBulk<'i> {
            }
        };

-        let inner = FacetsUpdateBulkInner { db, new_data, group_size, min_level_size };
+        let inner = FacetsUpdateBulkInner { db, delta_data, group_size, min_level_size };

-        inner.update(wtxn, &field_ids, |wtxn, field_id, all_docids| {
-            index.put_faceted_documents_ids(wtxn, field_id, facet_type, &all_docids)?;
-            Ok(())
-        })?;
+        inner.update(wtxn, &field_ids)?;

        Ok(())
    }
@ -95,26 +89,19 @@ impl<'i> FacetsUpdateBulk<'i> {
 /// Implementation of `FacetsUpdateBulk` that is independent of milli's `Index` type
 pub(crate) struct FacetsUpdateBulkInner<R: std::io::Read + std::io::Seek> {
    pub db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
-    pub new_data: Option<grenad::Reader<R>>,
+    pub delta_data: Option<grenad::Reader<R>>,
    pub group_size: u8,
    pub min_level_size: u8,
 }
 impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
-    pub fn update(
-        mut self,
-        wtxn: &mut RwTxn,
-        field_ids: &[u16],
-        mut handle_all_docids: impl FnMut(&mut RwTxn, FieldId, RoaringBitmap) -> Result<()>,
-    ) -> Result<()> {
+    pub fn update(mut self, wtxn: &mut RwTxn, field_ids: &[u16]) -> Result<()> {
        self.update_level0(wtxn)?;
        for &field_id in field_ids.iter() {
            self.clear_levels(wtxn, field_id)?;
        }

        for &field_id in field_ids.iter() {
-            let (level_readers, all_docids) = self.compute_levels_for_field_id(field_id, wtxn)?;
-
-            handle_all_docids(wtxn, field_id, all_docids)?;
+            let level_readers = self.compute_levels_for_field_id(field_id, wtxn)?;

            for level_reader in level_readers {
                let mut cursor = level_reader.into_cursor()?;
@ -133,19 +120,27 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
        self.db.delete_range(wtxn, &range).map(drop)?;
        Ok(())
    }
+
    fn update_level0(&mut self, wtxn: &mut RwTxn) -> Result<()> {
-        let new_data = match self.new_data.take() {
+        let delta_data = match self.delta_data.take() {
            Some(x) => x,
            None => return Ok(()),
        };
        if self.db.is_empty(wtxn)? {
            let mut buffer = Vec::new();
            let mut database = self.db.iter_mut(wtxn)?.remap_types::<ByteSlice, ByteSlice>();
-            let mut cursor = new_data.into_cursor()?;
+            let mut cursor = delta_data.into_cursor()?;
            while let Some((key, value)) = cursor.move_on_next()? {
                if !valid_lmdb_key(key) {
                    continue;
                }
+                let value = KvReaderDelAdd::new(value);
+
+                // DB is empty, it is safe to ignore Del operations
+                let Some(value) = value.get(DelAdd::Addition) else {
+                    continue;
+                };
+
                buffer.clear();
                // the group size for level 0
                buffer.push(1);
@ -157,11 +152,14 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
            let mut buffer = Vec::new();
            let database = self.db.remap_types::<ByteSlice, ByteSlice>();

-            let mut cursor = new_data.into_cursor()?;
+            let mut cursor = delta_data.into_cursor()?;
            while let Some((key, value)) = cursor.move_on_next()? {
                if !valid_lmdb_key(key) {
                    continue;
                }
+
+                let value = KvReaderDelAdd::new(value);
+
                // the value is a CboRoaringBitmap, but I still need to prepend the
                // group size for level 0 (= 1) to it
                buffer.clear();
@ -169,17 +167,27 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
                // then we extend the buffer with the docids bitmap
                match database.get(wtxn, key)? {
                    Some(prev_value) => {
+                        // prev_value is the group size for level 0, followed by the previous bitmap.
                        let old_bitmap = &prev_value[1..];
-                        CboRoaringBitmapCodec::merge_into(
-                            &[Cow::Borrowed(value), Cow::Borrowed(old_bitmap)],
-                            &mut buffer,
-                        )?;
+                        CboRoaringBitmapCodec::merge_deladd_into(value, old_bitmap, &mut buffer)?;
                    }
                    None => {
+                        // it is safe to ignore the del in that case.
+                        let Some(value) = value.get(DelAdd::Addition) else {
+                            // won't put the key in DB as the value would be empty
+                            continue;
+                        };
+
                        buffer.extend_from_slice(value);
                    }
                };
-                database.put(wtxn, key, &buffer)?;
+                let new_bitmap = &buffer[1..];
+                // if the new bitmap is empty, let's remove it
+                if CboRoaringBitmapLenCodec::bytes_decode(new_bitmap).unwrap_or_default() == 0 {
+                    database.delete(wtxn, key)?;
+                } else {
+                    database.put(wtxn, key, &buffer)?;
+                }
            }
        }
        Ok(())
@ -188,16 +196,10 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
        &self,
        field_id: FieldId,
        txn: &RoTxn,
-    ) -> Result<(Vec<grenad::Reader<BufReader<File>>>, RoaringBitmap)> {
-        let mut all_docids = RoaringBitmap::new();
-        let subwriters = self.compute_higher_levels(txn, field_id, 32, &mut |bitmaps, _| {
-            for bitmap in bitmaps {
-                all_docids |= bitmap;
-            }
-            Ok(())
-        })?;
+    ) -> Result<Vec<grenad::Reader<BufReader<File>>>> {
+        let subwriters = self.compute_higher_levels(txn, field_id, 32, &mut |_, _| Ok(()))?;

-        Ok((subwriters, all_docids))
+        Ok(subwriters)
    }
    #[allow(clippy::type_complexity)]
    fn read_level_0<'t>(
@ -491,7 +493,6 @@ mod tests {
        index.add_documents(documents).unwrap();

        db_snap!(index, facet_id_f64_docids, "initial", @"c34f499261f3510d862fa0283bbe843a");
-        db_snap!(index, number_faceted_documents_ids, "initial", @"01594fecbb316798ce3651d6730a4521");
    }

    #[test]
--- a/milli/src/update/facet/delete.rs
+++ b/milli/src/update/facet/delete.rs
@ -1,360 +0,0 @@
-use std::collections::{HashMap, HashSet};
-
-use heed::RwTxn;
-use log::debug;
-use roaring::RoaringBitmap;
-use time::OffsetDateTime;
-
-use super::{FACET_GROUP_SIZE, FACET_MAX_GROUP_SIZE, FACET_MIN_LEVEL_SIZE};
-use crate::facet::FacetType;
-use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec};
-use crate::heed_codec::ByteSliceRefCodec;
-use crate::update::{FacetsUpdateBulk, FacetsUpdateIncrementalInner};
-use crate::{FieldId, Index, Result};
-
-/// A builder used to remove elements from the `facet_id_string_docids` or `facet_id_f64_docids` databases.
-///
-/// Depending on the number of removed elements and the existing size of the database, we use either
-/// a bulk delete method or an incremental delete method.
-pub struct FacetsDelete<'i, 'b> {
-    index: &'i Index,
-    database: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
-    facet_type: FacetType,
-    affected_facet_values: HashMap<FieldId, HashSet<Vec<u8>>>,
-    docids_to_delete: &'b RoaringBitmap,
-    group_size: u8,
-    max_group_size: u8,
-    min_level_size: u8,
-}
-impl<'i, 'b> FacetsDelete<'i, 'b> {
-    pub fn new(
-        index: &'i Index,
-        facet_type: FacetType,
-        affected_facet_values: HashMap<FieldId, HashSet<Vec<u8>>>,
-        docids_to_delete: &'b RoaringBitmap,
-    ) -> Self {
-        let database = match facet_type {
-            FacetType::String => index
-                .facet_id_string_docids
-                .remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(),
-            FacetType::Number => {
-                index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>()
-            }
-        };
-        Self {
-            index,
-            database,
-            facet_type,
-            affected_facet_values,
-            docids_to_delete,
-            group_size: FACET_GROUP_SIZE,
-            max_group_size: FACET_MAX_GROUP_SIZE,
-            min_level_size: FACET_MIN_LEVEL_SIZE,
-        }
-    }
-
-    pub fn execute(self, wtxn: &mut RwTxn) -> Result<()> {
-        debug!("Computing and writing the facet values levels docids into LMDB on disk...");
-        self.index.set_updated_at(wtxn, &OffsetDateTime::now_utc())?;
-
-        for (field_id, affected_facet_values) in self.affected_facet_values {
-            // This is an incorrect condition, since we assume that the length of the database is equal
-            // to the number of facet values for the given field_id. It means that in some cases, we might
-            // wrongly choose the incremental indexer over the bulk indexer. But the only case where that could
-            // really be a performance problem is when we fully delete a large ratio of all facet values for
-            // each field id. This would almost never happen. Still, to be overly cautious, I have added a
-            // 2x penalty to the incremental indexer. That is, instead of assuming a 70x worst-case performance
-            // penalty to the incremental indexer, we assume a 150x worst-case performance penalty instead.
-            if affected_facet_values.len() >= (self.database.len(wtxn)? / 150) {
-                // Bulk delete
-                let mut modified = false;
-
-                for facet_value in affected_facet_values {
-                    let key =
-                        FacetGroupKey { field_id, level: 0, left_bound: facet_value.as_slice() };
-                    let mut old = self.database.get(wtxn, &key)?.unwrap();
-                    let previous_len = old.bitmap.len();
-                    old.bitmap -= self.docids_to_delete;
-                    if old.bitmap.is_empty() {
-                        modified = true;
-                        self.database.delete(wtxn, &key)?;
-                    } else if old.bitmap.len() != previous_len {
-                        modified = true;
-                        self.database.put(wtxn, &key, &old)?;
-                    }
-                }
-                if modified {
-                    let builder = FacetsUpdateBulk::new_not_updating_level_0(
-                        self.index,
-                        vec![field_id],
-                        self.facet_type,
-                    );
-                    builder.execute(wtxn)?;
-                }
-            } else {
-                // Incremental
-                let inc = FacetsUpdateIncrementalInner {
-                    db: self.database,
-                    group_size: self.group_size,
-                    min_level_size: self.min_level_size,
-                    max_group_size: self.max_group_size,
-                };
-                for facet_value in affected_facet_values {
-                    inc.delete(wtxn, field_id, facet_value.as_slice(), self.docids_to_delete)?;
-                }
-            }
-        }
-        Ok(())
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use std::iter::FromIterator;
-
-    use big_s::S;
-    use maplit::hashset;
-    use rand::seq::SliceRandom;
-    use rand::SeedableRng;
-    use roaring::RoaringBitmap;
-
-    use crate::db_snap;
-    use crate::documents::documents_batch_reader_from_objects;
-    use crate::index::tests::TempIndex;
-    use crate::update::facet::test_helpers::ordered_string;
-    use crate::update::{DeleteDocuments, DeletionStrategy};
-
-    #[test]
-    fn delete_mixed_incremental_and_bulk() {
-        // The point of this test is to create an index populated with documents
-        // containing different filterable attributes. Then, we delete a bunch of documents
-        // such that a mix of the incremental and bulk indexer is used (depending on the field id)
-        let index = TempIndex::new_with_map_size(4096 * 1000 * 100);
-
-        index
-            .update_settings(|settings| {
-                settings.set_filterable_fields(
-                    hashset! { S("id"), S("label"), S("timestamp"), S("colour") },
-                );
-            })
-            .unwrap();
-
-        let mut documents = vec![];
-        for i in 0..1000 {
-            documents.push(
-                serde_json::json! {
-                    {
-                        "id": i,
-                        "label": i / 10,
-                        "colour": i / 100,
-                        "timestamp": i / 2,
-                    }
-                }
-                .as_object()
-                .unwrap()
-                .clone(),
-            );
-        }
-
-        let documents = documents_batch_reader_from_objects(documents);
-        index.add_documents(documents).unwrap();
-
-        db_snap!(index, facet_id_f64_docids, 1, @"550cd138d6fe31ccdd42cd5392fbd576");
-        db_snap!(index, number_faceted_documents_ids, 1, @"9a0ea88e7c9dcf6dc0ef0b601736ffcf");
-
-        let mut wtxn = index.env.write_txn().unwrap();
-
-        let mut builder = DeleteDocuments::new(&mut wtxn, &index).unwrap();
-        builder.strategy(DeletionStrategy::AlwaysHard);
-        builder.delete_documents(&RoaringBitmap::from_iter(0..100));
-        // by deleting the first 100 documents, we expect that:
-        // - the "id" part of the DB will be updated in bulk, since #affected_facet_value = 100 which is > database_len / 150 (= 13)
-        // - the "label" part will be updated incrementally, since #affected_facet_value = 10 which is < 13
-        // - the "colour" part will also be updated incrementally, since #affected_values = 1 which is < 13
-        // - the "timestamp" part will be updated in bulk, since #affected_values = 50 which is > 13
-        // This has to be verified manually by inserting breakpoint/adding print statements to the code when running the test
-        builder.execute().unwrap();
-        wtxn.commit().unwrap();
-
-        db_snap!(index, soft_deleted_documents_ids, @"[]");
-        db_snap!(index, facet_id_f64_docids, 2, @"d4d5f14e7f1e1f09b86821a0b6defcc6");
-        db_snap!(index, number_faceted_documents_ids, 2, @"3570e0ac0fdb21be9ebe433f59264b56");
-    }
-
-    // Same test as above but working with string values for the facets
-    #[test]
-    fn delete_mixed_incremental_and_bulk_string() {
-        // The point of this test is to create an index populated with documents
-        // containing different filterable attributes. Then, we delete a bunch of documents
-        // such that a mix of the incremental and bulk indexer is used (depending on the field id)
-        let index = TempIndex::new_with_map_size(4096 * 1000 * 100);
-
-        index
-            .update_settings(|settings| {
-                settings.set_filterable_fields(
-                    hashset! { S("id"), S("label"), S("timestamp"), S("colour") },
-                );
-            })
-            .unwrap();
-
-        let mut documents = vec![];
-        for i in 0..1000 {
-            documents.push(
-                serde_json::json! {
-                    {
-                        "id": i,
-                        "label": ordered_string(i / 10),
-                        "colour": ordered_string(i / 100),
-                        "timestamp": ordered_string(i / 2),
-                    }
-                }
-                .as_object()
-                .unwrap()
-                .clone(),
-            );
-        }
-
-        let documents = documents_batch_reader_from_objects(documents);
-        index.add_documents(documents).unwrap();
-
-        // Note that empty strings are not stored in the facet db due to commit 4860fd452965 (comment written on 29 Nov 2022)
-        db_snap!(index, facet_id_string_docids, 1, @"5fd1bd0724c65a6dc1aafb6db93c7503");
-        db_snap!(index, string_faceted_documents_ids, 1, @"54bc15494fa81d93339f43c08fd9d8f5");
-
-        let mut wtxn = index.env.write_txn().unwrap();
-
-        let mut builder = DeleteDocuments::new(&mut wtxn, &index).unwrap();
-        builder.strategy(DeletionStrategy::AlwaysHard);
-        builder.delete_documents(&RoaringBitmap::from_iter(0..100));
-        // by deleting the first 100 documents, we expect that:
-        // - the "id" part of the DB will be updated in bulk, since #affected_facet_value = 100 which is > database_len / 150 (= 13)
-        // - the "label" part will be updated incrementally, since #affected_facet_value = 10 which is < 13
-        // - the "colour" part will also be updated incrementally, since #affected_values = 1 which is < 13
-        // - the "timestamp" part will be updated in bulk, since #affected_values = 50 which is > 13
-        // This has to be verified manually by inserting breakpoint/adding print statements to the code when running the test
-        builder.execute().unwrap();
-        wtxn.commit().unwrap();
-
-        db_snap!(index, soft_deleted_documents_ids, @"[]");
-        db_snap!(index, facet_id_string_docids, 2, @"7f9c00b29e04d58c1821202a5dda0ebc");
-        db_snap!(index, string_faceted_documents_ids, 2, @"504152afa5c94fd4e515dcdfa4c7161f");
-    }
-
-    #[test]
-    fn delete_almost_all_incrementally_string() {
-        let index = TempIndex::new_with_map_size(4096 * 1000 * 100);
-
-        index
-            .update_settings(|settings| {
-                settings.set_filterable_fields(
-                    hashset! { S("id"), S("label"), S("timestamp"), S("colour") },
-                );
-            })
-            .unwrap();
-
-        let mut documents = vec![];
-        for i in 0..1000 {
-            documents.push(
-                serde_json::json! {
-                    {
-                        "id": i,
-                        "label": ordered_string(i / 10),
-                        "colour": ordered_string(i / 100),
-                        "timestamp": ordered_string(i / 2),
-                    }
-                }
-                .as_object()
-                .unwrap()
-                .clone(),
-            );
-        }
-
-        let documents = documents_batch_reader_from_objects(documents);
-        index.add_documents(documents).unwrap();
-
-        // Note that empty strings are not stored in the facet db due to commit 4860fd452965 (comment written on 29 Nov 2022)
-        db_snap!(index, facet_id_string_docids, 1, @"5fd1bd0724c65a6dc1aafb6db93c7503");
-        db_snap!(index, string_faceted_documents_ids, 1, @"54bc15494fa81d93339f43c08fd9d8f5");
-
-        let mut rng = rand::rngs::SmallRng::from_seed([0; 32]);
-
-        let mut docids_to_delete = (0..1000).collect::<Vec<_>>();
-        docids_to_delete.shuffle(&mut rng);
-        for docid in docids_to_delete.into_iter().take(990) {
-            let mut wtxn = index.env.write_txn().unwrap();
-            let mut builder = DeleteDocuments::new(&mut wtxn, &index).unwrap();
-            builder.strategy(DeletionStrategy::AlwaysHard);
-            builder.delete_documents(&RoaringBitmap::from_iter([docid]));
-            builder.execute().unwrap();
-            wtxn.commit().unwrap();
-        }
-
-        db_snap!(index, soft_deleted_documents_ids, @"[]");
-        db_snap!(index, facet_id_string_docids, 2, @"ece56086e76d50e661fb2b58475b9f7d");
-        db_snap!(index, string_faceted_documents_ids, 2, @r###"
-        0   []
-        1   [11, 20, 73, 292, 324, 358, 381, 493, 839, 852, ]
-        2   [292, 324, 358, 381, 493, 839, 852, ]
-        3   [11, 20, 73, 292, 324, 358, 381, 493, 839, 852, ]
-        "###);
-    }
-}
-
-#[allow(unused)]
-#[cfg(test)]
-mod comparison_bench {
-    use std::iter::once;
-
-    use rand::Rng;
-    use roaring::RoaringBitmap;
-
-    use crate::heed_codec::facet::OrderedF64Codec;
-    use crate::update::facet::test_helpers::FacetIndex;
-
-    // This is a simple test to get an intuition on the relative speed
-    // of the incremental vs. bulk indexer.
-    //
-    // The benchmark shows the worst-case scenario for the incremental indexer, since
-    // each facet value contains only one document ID.
-    //
-    // In that scenario, it appears that the incremental indexer is about 70 times slower than the
-    // bulk indexer.
-    // #[test]
-    fn benchmark_facet_indexing_delete() {
-        let mut r = rand::thread_rng();
-
-        for i in 1..=20 {
-            let size = 50_000 * i;
-            let index = FacetIndex::<OrderedF64Codec>::new(4, 8, 5);
-
-            let mut txn = index.env.write_txn().unwrap();
-            let mut elements = Vec::<((u16, f64), RoaringBitmap)>::new();
-            for i in 0..size {
-                // field id = 0, left_bound = i, docids = [i]
-                elements.push(((0, i as f64), once(i).collect()));
-            }
-            let timer = std::time::Instant::now();
-            index.bulk_insert(&mut txn, &[0], elements.iter());
-            let time_spent = timer.elapsed().as_millis();
-            println!("bulk {size} : {time_spent}ms");
-
-            txn.commit().unwrap();
-
-            for nbr_doc in [1, 100, 1000, 10_000] {
-                let mut txn = index.env.write_txn().unwrap();
-                let timer = std::time::Instant::now();
-                //
-                // delete one document
-                //
-                for _ in 0..nbr_doc {
-                    let deleted_u32 = r.gen::<u32>() % size;
-                    let deleted_f64 = deleted_u32 as f64;
-                    index.delete_single_docid(&mut txn, 0, &deleted_f64, deleted_u32)
-                }
-                let time_spent = timer.elapsed().as_millis();
-                println!("    delete {nbr_doc} : {time_spent}ms");
-                txn.abort().unwrap();
-            }
-        }
-    }
-}
--- a/milli/src/update/facet/incremental.rs
+++ b/milli/src/update/facet/incremental.rs
@ -1,9 +1,9 @@
-use std::collections::HashMap;
 use std::fs::File;
 use std::io::BufReader;

 use heed::types::{ByteSlice, DecodeIgnore};
 use heed::{BytesDecode, Error, RoTxn, RwTxn};
+use obkv::KvReader;
 use roaring::RoaringBitmap;

 use crate::facet::FacetType;
@ -12,8 +12,9 @@ use crate::heed_codec::facet::{
 };
 use crate::heed_codec::ByteSliceRefCodec;
 use crate::search::facet::get_highest_level;
+use crate::update::del_add::DelAdd;
 use crate::update::index_documents::valid_lmdb_key;
-use crate::{CboRoaringBitmapCodec, FieldId, Index, Result};
+use crate::{CboRoaringBitmapCodec, Index, Result};

 enum InsertionResult {
    InPlace,
@ -28,27 +29,21 @@ enum DeletionResult {

 /// Algorithm to incrementally insert and delete elememts into the
 /// `facet_id_(string/f64)_docids` databases.
-///
-/// Rhe `faceted_documents_ids` value in the main database of `Index`
-/// is also updated to contain the new set of faceted documents.
-pub struct FacetsUpdateIncremental<'i> {
-    index: &'i Index,
+pub struct FacetsUpdateIncremental {
    inner: FacetsUpdateIncrementalInner,
-    facet_type: FacetType,
-    new_data: grenad::Reader<BufReader<File>>,
+    delta_data: grenad::Reader<BufReader<File>>,
 }

-impl<'i> FacetsUpdateIncremental<'i> {
+impl FacetsUpdateIncremental {
    pub fn new(
-        index: &'i Index,
+        index: &Index,
        facet_type: FacetType,
-        new_data: grenad::Reader<BufReader<File>>,
+        delta_data: grenad::Reader<BufReader<File>>,
        group_size: u8,
        min_level_size: u8,
        max_group_size: u8,
    ) -> Self {
        FacetsUpdateIncremental {
-            index,
            inner: FacetsUpdateIncrementalInner {
                db: match facet_type {
                    FacetType::String => index
@ -62,31 +57,41 @@ impl<'i> FacetsUpdateIncremental<'i> {
                max_group_size,
                min_level_size,
            },
-            facet_type,
-            new_data,
+            delta_data,
        }
    }

-    pub fn execute(self, wtxn: &'i mut RwTxn) -> crate::Result<()> {
-        let mut new_faceted_docids = HashMap::<FieldId, RoaringBitmap>::default();
-
-        let mut cursor = self.new_data.into_cursor()?;
+    pub fn execute(self, wtxn: &mut RwTxn) -> crate::Result<()> {
+        let mut cursor = self.delta_data.into_cursor()?;
        while let Some((key, value)) = cursor.move_on_next()? {
            if !valid_lmdb_key(key) {
                continue;
            }
            let key = FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_decode(key)
                .ok_or(heed::Error::Encoding)?;
-            let docids = CboRoaringBitmapCodec::bytes_decode(value).ok_or(heed::Error::Encoding)?;
-            self.inner.insert(wtxn, key.field_id, key.left_bound, &docids)?;
-            *new_faceted_docids.entry(key.field_id).or_default() |= docids;
+            let value = KvReader::new(value);
+
+            let docids_to_delete = value
+                .get(DelAdd::Deletion)
+                .map(CboRoaringBitmapCodec::bytes_decode)
+                .map(|o| o.ok_or(heed::Error::Encoding));
+
+            let docids_to_add = value
+                .get(DelAdd::Addition)
+                .map(CboRoaringBitmapCodec::bytes_decode)
+                .map(|o| o.ok_or(heed::Error::Encoding));
+
+            if let Some(docids_to_delete) = docids_to_delete {
+                let docids_to_delete = docids_to_delete?;
+                self.inner.delete(wtxn, key.field_id, key.left_bound, &docids_to_delete)?;
+            }
+
+            if let Some(docids_to_add) = docids_to_add {
+                let docids_to_add = docids_to_add?;
+                self.inner.insert(wtxn, key.field_id, key.left_bound, &docids_to_add)?;
+            }
        }

-        for (field_id, new_docids) in new_faceted_docids {
-            let mut docids = self.index.faceted_documents_ids(wtxn, field_id, self.facet_type)?;
-            docids |= new_docids;
-            self.index.put_faceted_documents_ids(wtxn, field_id, self.facet_type, &docids)?;
-        }
        Ok(())
    }
 }
--- a/milli/src/update/facet/mod.rs
+++ b/milli/src/update/facet/mod.rs
@ -14,7 +14,7 @@ The databases must be able to return results for queries such as:
 The algorithms that implement these queries are found in the `src/search/facet` folder.

 To make these queries fast to compute, the database adopts a tree structure:
-```ignore
+```text
            ┌───────────────────────────────┬───────────────────────────────┬───────────────┐
 ┌───────┐   │           "ab" (2)            │           "gaf" (2)           │   "woz" (1)   │
 │Level 2│   │                               │                               │               │
@ -41,7 +41,7 @@ These documents all contain a facet value that is contained within `ab .. gaf`.
 In the database, each node is represented by a key/value pair encoded as a [`FacetGroupKey`] and a
 [`FacetGroupValue`], which have the following format:

-```ignore
+```text
 FacetGroupKey:
 - field id  : u16
 - level     : u8
@ -98,7 +98,6 @@ use crate::update::merge_btreeset_string;
 use crate::{BEU16StrCodec, Index, Result, BEU16, MAX_FACET_VALUE_LENGTH};

 pub mod bulk;
-pub mod delete;
 pub mod incremental;

 /// A builder used to add new elements to the `facet_id_string_docids` or `facet_id_f64_docids` databases.
@ -109,7 +108,7 @@ pub struct FacetsUpdate<'i> {
    index: &'i Index,
    database: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
    facet_type: FacetType,
-    new_data: grenad::Reader<BufReader<File>>,
+    delta_data: grenad::Reader<BufReader<File>>,
    group_size: u8,
    max_group_size: u8,
    min_level_size: u8,
@ -118,7 +117,7 @@ impl<'i> FacetsUpdate<'i> {
    pub fn new(
        index: &'i Index,
        facet_type: FacetType,
-        new_data: grenad::Reader<BufReader<File>>,
+        delta_data: grenad::Reader<BufReader<File>>,
    ) -> Self {
        let database = match facet_type {
            FacetType::String => index
@ -135,26 +134,26 @@ impl<'i> FacetsUpdate<'i> {
            max_group_size: FACET_MAX_GROUP_SIZE,
            min_level_size: FACET_MIN_LEVEL_SIZE,
            facet_type,
-            new_data,
+            delta_data,
        }
    }

    pub fn execute(self, wtxn: &mut heed::RwTxn) -> Result<()> {
-        if self.new_data.is_empty() {
+        if self.delta_data.is_empty() {
            return Ok(());
        }
        debug!("Computing and writing the facet values levels docids into LMDB on disk...");
        self.index.set_updated_at(wtxn, &OffsetDateTime::now_utc())?;

        // See self::comparison_bench::benchmark_facet_indexing
-        if self.new_data.len() >= (self.database.len(wtxn)? as u64 / 50) {
+        if self.delta_data.len() >= (self.database.len(wtxn)? as u64 / 50) {
            let field_ids =
                self.index.faceted_fields_ids(wtxn)?.iter().copied().collect::<Vec<_>>();
            let bulk_update = FacetsUpdateBulk::new(
                self.index,
                field_ids,
                self.facet_type,
-                self.new_data,
+                self.delta_data,
                self.group_size,
                self.min_level_size,
            );
@ -163,7 +162,7 @@ impl<'i> FacetsUpdate<'i> {
            let incremental_update = FacetsUpdateIncremental::new(
                self.index,
                self.facet_type,
-                self.new_data,
+                self.delta_data,
                self.group_size,
                self.min_level_size,
                self.max_group_size,
@ -279,6 +278,7 @@ pub(crate) mod test_helpers {
    use crate::heed_codec::ByteSliceRefCodec;
    use crate::search::facet::get_highest_level;
    use crate::snapshot_tests::display_bitmap;
+    use crate::update::del_add::{DelAdd, KvWriterDelAdd};
    use crate::update::FacetsUpdateIncrementalInner;
    use crate::CboRoaringBitmapCodec;

@ -455,20 +455,22 @@ pub(crate) mod test_helpers {
                let key: FacetGroupKey<&[u8]> =
                    FacetGroupKey { field_id: *field_id, level: 0, left_bound: &left_bound_bytes };
                let key = FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_encode(&key).unwrap();
+                let mut inner_writer = KvWriterDelAdd::memory();
                let value = CboRoaringBitmapCodec::bytes_encode(docids).unwrap();
-                writer.insert(&key, &value).unwrap();
+                inner_writer.insert(DelAdd::Addition, value).unwrap();
+                writer.insert(&key, inner_writer.into_inner().unwrap()).unwrap();
            }
            writer.finish().unwrap();
            let reader = grenad::Reader::new(std::io::Cursor::new(new_data)).unwrap();

            let update = FacetsUpdateBulkInner {
                db: self.content,
-                new_data: Some(reader),
+                delta_data: Some(reader),
                group_size: self.group_size.get(),
                min_level_size: self.min_level_size.get(),
            };

-            update.update(wtxn, field_ids, |_, _, _| Ok(())).unwrap();
+            update.update(wtxn, field_ids).unwrap();
        }

        pub fn verify_structure_validity(&self, txn: &RoTxn, field_id: u16) {
@ -556,101 +558,6 @@ pub(crate) mod test_helpers {
    }
 }

-#[cfg(test)]
-mod tests {
-    use big_s::S;
-    use maplit::hashset;
-
-    use crate::db_snap;
-    use crate::documents::documents_batch_reader_from_objects;
-    use crate::index::tests::TempIndex;
-    use crate::update::DeletionStrategy;
-
-    #[test]
-    fn replace_all_identical_soft_deletion_then_hard_deletion() {
-        let mut index = TempIndex::new_with_map_size(4096 * 1000 * 100);
-
-        index.index_documents_config.deletion_strategy = DeletionStrategy::AlwaysSoft;
-
-        index
-            .update_settings(|settings| {
-                settings.set_primary_key("id".to_owned());
-                settings.set_filterable_fields(hashset! { S("size") });
-            })
-            .unwrap();
-
-        let mut documents = vec![];
-        for i in 0..1000 {
-            documents.push(
-                serde_json::json! {
-                    {
-                        "id": i,
-                        "size": i % 250,
-                    }
-                }
-                .as_object()
-                .unwrap()
-                .clone(),
-            );
-        }
-
-        let documents = documents_batch_reader_from_objects(documents);
-        index.add_documents(documents).unwrap();
-
-        db_snap!(index, facet_id_f64_docids, "initial", @"777e0e221d778764b472c512617eeb3b");
-        db_snap!(index, number_faceted_documents_ids, "initial", @"bd916ef32b05fd5c3c4c518708f431a9");
-        db_snap!(index, soft_deleted_documents_ids, "initial", @"[]");
-
-        let mut documents = vec![];
-        for i in 0..999 {
-            documents.push(
-                serde_json::json! {
-                    {
-                        "id": i,
-                        "size": i % 250,
-                        "other": 0,
-                    }
-                }
-                .as_object()
-                .unwrap()
-                .clone(),
-            );
-        }
-
-        let documents = documents_batch_reader_from_objects(documents);
-        index.add_documents(documents).unwrap();
-
-        db_snap!(index, facet_id_f64_docids, "replaced_1_soft", @"abba175d7bed727d0efadaef85a4388f");
-        db_snap!(index, number_faceted_documents_ids, "replaced_1_soft", @"de76488bd05ad94c6452d725acf1bd06");
-        db_snap!(index, soft_deleted_documents_ids, "replaced_1_soft", @"6c975deb900f286d2f6456d2d5c3a123");
-
-        // Then replace the last document while disabling soft_deletion
-        index.index_documents_config.deletion_strategy = DeletionStrategy::AlwaysHard;
-        let mut documents = vec![];
-        for i in 999..1000 {
-            documents.push(
-                serde_json::json! {
-                    {
-                        "id": i,
-                        "size": i % 250,
-                        "other": 0,
-                    }
-                }
-                .as_object()
-                .unwrap()
-                .clone(),
-            );
-        }
-
-        let documents = documents_batch_reader_from_objects(documents);
-        index.add_documents(documents).unwrap();
-
-        db_snap!(index, facet_id_f64_docids, "replaced_2_hard", @"029e27a46d09c574ae949aa4289b45e6");
-        db_snap!(index, number_faceted_documents_ids, "replaced_2_hard", @"60b19824f136affe6b240a7200779028");
-        db_snap!(index, soft_deleted_documents_ids, "replaced_2_hard", @"[]");
-    }
-}
-
 #[allow(unused)]
 #[cfg(test)]
 mod comparison_bench {
--- a/milli/src/update/index_documents/enrich.rs
+++ b/milli/src/update/index_documents/enrich.rs
@ -1,20 +1,17 @@
+use std::fmt;
 use std::io::{BufWriter, Read, Seek};
 use std::result::Result as StdResult;
-use std::{fmt, iter};

 use serde::{Deserialize, Serialize};
 use serde_json::Value;

-use crate::documents::{DocumentsBatchIndex, DocumentsBatchReader, EnrichedDocumentsBatchReader};
+use crate::documents::{
+    DocumentIdExtractionError, DocumentsBatchIndex, DocumentsBatchReader,
+    EnrichedDocumentsBatchReader, PrimaryKey, DEFAULT_PRIMARY_KEY,
+};
 use crate::error::{GeoError, InternalError, UserError};
 use crate::update::index_documents::{obkv_to_object, writer_into_reader};
-use crate::{FieldId, Index, Object, Result};
-
-/// The symbol used to define levels in a nested primary key.
-const PRIMARY_KEY_SPLIT_SYMBOL: char = '.';
-
-/// The default primary that is used when not specified.
-const DEFAULT_PRIMARY_KEY: &str = "id";
+use crate::{FieldId, Index, Result};

 /// This function validates and enrich the documents by checking that:
 ///  - we can infer a primary key,
@ -41,14 +38,12 @@ pub fn enrich_documents_batch<R: Read + Seek>(
    // The primary key *field id* that has already been set for this index or the one
    // we will guess by searching for the first key that contains "id" as a substring.
    let primary_key = match index.primary_key(rtxn)? {
-        Some(primary_key) if primary_key.contains(PRIMARY_KEY_SPLIT_SYMBOL) => {
-            PrimaryKey::nested(primary_key)
-        }
-        Some(primary_key) => match documents_batch_index.id(primary_key) {
-            Some(id) => PrimaryKey::flat(primary_key, id),
-            None if autogenerate_docids => {
-                PrimaryKey::flat(primary_key, documents_batch_index.insert(primary_key))
-            }
+        Some(primary_key) => match PrimaryKey::new(primary_key, &documents_batch_index) {
+            Some(primary_key) => primary_key,
+            None if autogenerate_docids => PrimaryKey::Flat {
+                name: primary_key,
+                field_id: documents_batch_index.insert(primary_key),
+            },
            None => {
                return match cursor.next_document()? {
                    Some(first_document) => Ok(Err(UserError::MissingDocumentId {
@ -76,14 +71,14 @@ pub fn enrich_documents_batch<R: Read + Seek>(
            });

            match guesses.as_slice() {
-                [] if autogenerate_docids => PrimaryKey::flat(
-                    DEFAULT_PRIMARY_KEY,
-                    documents_batch_index.insert(DEFAULT_PRIMARY_KEY),
-                ),
+                [] if autogenerate_docids => PrimaryKey::Flat {
+                    name: DEFAULT_PRIMARY_KEY,
+                    field_id: documents_batch_index.insert(DEFAULT_PRIMARY_KEY),
+                },
                [] => return Ok(Err(UserError::NoPrimaryKeyCandidateFound)),
                [(field_id, name)] => {
                    log::info!("Primary key was not specified in index. Inferred to '{name}'");
-                    PrimaryKey::flat(name, *field_id)
+                    PrimaryKey::Flat { name, field_id: *field_id }
                }
                multiple => {
                    return Ok(Err(UserError::MultiplePrimaryKeyCandidatesFound {
@ -156,92 +151,24 @@ fn fetch_or_generate_document_id(
    uuid_buffer: &mut [u8; uuid::fmt::Hyphenated::LENGTH],
    count: u32,
 ) -> Result<StdResult<DocumentId, UserError>> {
-    match primary_key {
-        PrimaryKey::Flat { name: primary_key, field_id: primary_key_id } => {
-            match document.get(primary_key_id) {
-                Some(document_id_bytes) => {
-                    let document_id = serde_json::from_slice(document_id_bytes)
-                        .map_err(InternalError::SerdeJson)?;
-                    match validate_document_id_value(document_id)? {
-                        Ok(document_id) => Ok(Ok(DocumentId::retrieved(document_id))),
-                        Err(user_error) => Ok(Err(user_error)),
-                    }
-                }
-                None if autogenerate_docids => {
-                    let uuid = uuid::Uuid::new_v4().as_hyphenated().encode_lower(uuid_buffer);
-                    Ok(Ok(DocumentId::generated(uuid.to_string(), count)))
-                }
-                None => Ok(Err(UserError::MissingDocumentId {
-                    primary_key: primary_key.to_string(),
-                    document: obkv_to_object(document, documents_batch_index)?,
-                })),
-            }
+    Ok(match primary_key.document_id(document, documents_batch_index)? {
+        Ok(document_id) => Ok(DocumentId::Retrieved { value: document_id }),
+        Err(DocumentIdExtractionError::InvalidDocumentId(user_error)) => Err(user_error),
+        Err(DocumentIdExtractionError::MissingDocumentId) if autogenerate_docids => {
+            let uuid = uuid::Uuid::new_v4().as_hyphenated().encode_lower(uuid_buffer);
+            Ok(DocumentId::Generated { value: uuid.to_string(), document_nth: count })
        }
-        nested @ PrimaryKey::Nested { .. } => {
-            let mut matching_documents_ids = Vec::new();
-            for (first_level_name, right) in nested.possible_level_names() {
-                if let Some(field_id) = documents_batch_index.id(first_level_name) {
-                    if let Some(value_bytes) = document.get(field_id) {
-                        let object = serde_json::from_slice(value_bytes)
-                            .map_err(InternalError::SerdeJson)?;
-                        fetch_matching_values(object, right, &mut matching_documents_ids);
-
-                        if matching_documents_ids.len() >= 2 {
-                            return Ok(Err(UserError::TooManyDocumentIds {
-                                primary_key: nested.name().to_string(),
-                                document: obkv_to_object(document, documents_batch_index)?,
-                            }));
-                        }
-                    }
-                }
-            }
-
-            match matching_documents_ids.pop() {
-                Some(document_id) => match validate_document_id_value(document_id)? {
-                    Ok(document_id) => Ok(Ok(DocumentId::retrieved(document_id))),
-                    Err(user_error) => Ok(Err(user_error)),
-                },
-                None => Ok(Err(UserError::MissingDocumentId {
-                    primary_key: nested.name().to_string(),
-                    document: obkv_to_object(document, documents_batch_index)?,
-                })),
-            }
+        Err(DocumentIdExtractionError::MissingDocumentId) => Err(UserError::MissingDocumentId {
+            primary_key: primary_key.name().to_string(),
+            document: obkv_to_object(document, documents_batch_index)?,
+        }),
+        Err(DocumentIdExtractionError::TooManyDocumentIds(_)) => {
+            Err(UserError::TooManyDocumentIds {
+                primary_key: primary_key.name().to_string(),
+                document: obkv_to_object(document, documents_batch_index)?,
+            })
        }
-    }
-}
-
-/// A type that represent the type of primary key that has been set
-/// for this index, a classic flat one or a nested one.
-#[derive(Debug, Clone, Copy)]
-enum PrimaryKey<'a> {
-    Flat { name: &'a str, field_id: FieldId },
-    Nested { name: &'a str },
-}
-
-impl PrimaryKey<'_> {
-    fn flat(name: &str, field_id: FieldId) -> PrimaryKey {
-        PrimaryKey::Flat { name, field_id }
-    }
-
-    fn nested(name: &str) -> PrimaryKey {
-        PrimaryKey::Nested { name }
-    }
-
-    fn name(&self) -> &str {
-        match self {
-            PrimaryKey::Flat { name, .. } => name,
-            PrimaryKey::Nested { name } => name,
-        }
-    }
-
-    /// Returns an `Iterator` that gives all the possible fields names the primary key
-    /// can have depending of the first level name and deepnes of the objects.
-    fn possible_level_names(&self) -> impl Iterator<Item = (&str, &str)> + '_ {
-        let name = self.name();
-        name.match_indices(PRIMARY_KEY_SPLIT_SYMBOL)
-            .map(move |(i, _)| (&name[..i], &name[i + PRIMARY_KEY_SPLIT_SYMBOL.len_utf8()..]))
-            .chain(iter::once((name, "")))
-    }
+    })
 }

 /// A type that represents a document id that has been retrieved from a document or auto-generated.
@ -255,14 +182,6 @@ pub enum DocumentId {
 }

 impl DocumentId {
-    fn retrieved(value: String) -> DocumentId {
-        DocumentId::Retrieved { value }
-    }
-
-    fn generated(value: String, document_nth: u32) -> DocumentId {
-        DocumentId::Generated { value, document_nth }
-    }
-
    fn debug(&self) -> String {
        format!("{:?}", self)
    }
@ -290,66 +209,6 @@ impl fmt::Debug for DocumentId {
    }
 }

-fn starts_with(selector: &str, key: &str) -> bool {
-    selector.strip_prefix(key).map_or(false, |tail| {
-        tail.chars().next().map(|c| c == PRIMARY_KEY_SPLIT_SYMBOL).unwrap_or(true)
-    })
-}
-
-pub fn fetch_matching_values(value: Value, selector: &str, output: &mut Vec<Value>) {
-    match value {
-        Value::Object(object) => fetch_matching_values_in_object(object, selector, "", output),
-        otherwise => output.push(otherwise),
-    }
-}
-
-pub fn fetch_matching_values_in_object(
-    object: Object,
-    selector: &str,
-    base_key: &str,
-    output: &mut Vec<Value>,
-) {
-    for (key, value) in object {
-        let base_key = if base_key.is_empty() {
-            key.to_string()
-        } else {
-            format!("{}{}{}", base_key, PRIMARY_KEY_SPLIT_SYMBOL, key)
-        };
-
-        if starts_with(selector, &base_key) {
-            match value {
-                Value::Object(object) => {
-                    fetch_matching_values_in_object(object, selector, &base_key, output)
-                }
-                value => output.push(value),
-            }
-        }
-    }
-}
-
-pub fn validate_document_id(document_id: &str) -> Option<&str> {
-    if !document_id.is_empty()
-        && document_id.chars().all(|c| matches!(c, 'a'..='z' | 'A'..='Z' | '0'..='9' | '-' | '_'))
-    {
-        Some(document_id)
-    } else {
-        None
-    }
-}
-
-/// Parses a Json encoded document id and validate it, returning a user error when it is one.
-pub fn validate_document_id_value(document_id: Value) -> Result<StdResult<String, UserError>> {
-    match document_id {
-        Value::String(string) => match validate_document_id(&string) {
-            Some(s) if s.len() == string.len() => Ok(Ok(string)),
-            Some(s) => Ok(Ok(s.to_string())),
-            None => Ok(Err(UserError::InvalidDocumentId { document_id: Value::String(string) })),
-        },
-        Value::Number(number) if number.is_i64() => Ok(Ok(number.to_string())),
-        content => Ok(Err(UserError::InvalidDocumentId { document_id: content })),
-    }
-}
-
 /// Try to extract an `f64` from a JSON `Value` and return the `Value`
 /// in the `Err` variant if it failed.
 pub fn extract_finite_float_from_value(value: Value) -> StdResult<f64, Value> {
--- a/milli/src/update/index_documents/extract/extract_docid_word_positions.rs
+++ b/milli/src/update/index_documents/extract/extract_docid_word_positions.rs
@ -5,18 +5,16 @@ use std::io::BufReader;
 use std::{io, mem, str};

 use charabia::{Language, Script, SeparatorKind, Token, TokenKind, Tokenizer, TokenizerBuilder};
-use obkv::KvReader;
+use obkv::{KvReader, KvWriterU16};
 use roaring::RoaringBitmap;
 use serde_json::Value;

-use super::helpers::{concat_u32s_array, create_sorter, sorter_into_reader, GrenadParameters};
+use super::helpers::{create_sorter, keep_latest_obkv, sorter_into_reader, GrenadParameters};
 use crate::error::{InternalError, SerializationError};
-use crate::update::index_documents::MergeFn;
-use crate::{
-    absolute_from_relative_position, FieldId, Result, MAX_POSITION_PER_ATTRIBUTE, MAX_WORD_LENGTH,
-};
+use crate::update::del_add::{del_add_from_two_obkvs, DelAdd, KvReaderDelAdd};
+use crate::{FieldId, Result, MAX_POSITION_PER_ATTRIBUTE, MAX_WORD_LENGTH};

-pub type ScriptLanguageDocidsMap = HashMap<(Script, Language), RoaringBitmap>;
+pub type ScriptLanguageDocidsMap = HashMap<(Script, Language), (RoaringBitmap, RoaringBitmap)>;

 /// Extracts the word and positions where this word appear and
 /// prefixes it by the document id.
@ -32,25 +30,162 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
    allowed_separators: Option<&[&str]>,
    dictionary: Option<&[&str]>,
    max_positions_per_attributes: Option<u32>,
-) -> Result<(RoaringBitmap, grenad::Reader<BufReader<File>>, ScriptLanguageDocidsMap)> {
+) -> Result<(grenad::Reader<BufReader<File>>, ScriptLanguageDocidsMap)> {
    puffin::profile_function!();

    let max_positions_per_attributes = max_positions_per_attributes
        .map_or(MAX_POSITION_PER_ATTRIBUTE, |max| max.min(MAX_POSITION_PER_ATTRIBUTE));
    let max_memory = indexer.max_memory_by_thread();

+    // initialize destination values.
    let mut documents_ids = RoaringBitmap::new();
    let mut script_language_docids = HashMap::new();
    let mut docid_word_positions_sorter = create_sorter(
        grenad::SortAlgorithm::Stable,
-        concat_u32s_array,
+        keep_latest_obkv,
        indexer.chunk_compression_type,
        indexer.chunk_compression_level,
        indexer.max_nb_chunks,
        max_memory,
    );

-    let mut buffers = Buffers::default();
+    // initialize buffers.
+    let mut del_buffers = Buffers::default();
+    let mut add_buffers = Buffers::default();
+    let mut key_buffer = Vec::new();
+    let mut value_buffer = Vec::new();
+
+    // initialize tokenizer.
+    let mut builder = tokenizer_builder(stop_words, allowed_separators, dictionary, None);
+    let tokenizer = builder.build();
+
+    // iterate over documents.
+    let mut cursor = obkv_documents.into_cursor()?;
+    while let Some((key, value)) = cursor.move_on_next()? {
+        let document_id = key
+            .try_into()
+            .map(u32::from_be_bytes)
+            .map_err(|_| SerializationError::InvalidNumberSerialization)?;
+        let obkv = KvReader::<FieldId>::new(value);
+
+        // if the searchable fields didn't change, skip the searchable indexing for this document.
+        if !searchable_fields_changed(&KvReader::<FieldId>::new(value), searchable_fields) {
+            continue;
+        }
+
+        documents_ids.push(document_id);
+
+        // Update key buffer prefix.
+        key_buffer.clear();
+        key_buffer.extend_from_slice(&document_id.to_be_bytes());
+
+        // Tokenize deletions and additions in 2 diffferent threads.
+        let (del, add): (Result<_>, Result<_>) = rayon::join(
+            || {
+                // deletions
+                lang_safe_tokens_from_document(
+                    &obkv,
+                    searchable_fields,
+                    &tokenizer,
+                    stop_words,
+                    allowed_separators,
+                    dictionary,
+                    max_positions_per_attributes,
+                    DelAdd::Deletion,
+                    &mut del_buffers,
+                )
+            },
+            || {
+                // additions
+                lang_safe_tokens_from_document(
+                    &obkv,
+                    searchable_fields,
+                    &tokenizer,
+                    stop_words,
+                    allowed_separators,
+                    dictionary,
+                    max_positions_per_attributes,
+                    DelAdd::Addition,
+                    &mut add_buffers,
+                )
+            },
+        );
+
+        let (del_obkv, del_script_language_word_count) = del?;
+        let (add_obkv, add_script_language_word_count) = add?;
+
+        // merge deletions and additions.
+        // transforming two KV<FieldId, KV<u16, String>> into one KV<FieldId, KV<DelAdd, KV<u16, String>>>
+        value_buffer.clear();
+        del_add_from_two_obkvs(
+            KvReader::<FieldId>::new(del_obkv),
+            KvReader::<FieldId>::new(add_obkv),
+            &mut value_buffer,
+        )?;
+
+        // write each KV<DelAdd, KV<u16, String>> into the sorter, field by field.
+        let obkv = KvReader::<FieldId>::new(&value_buffer);
+        for (field_id, value) in obkv.iter() {
+            key_buffer.truncate(mem::size_of::<u32>());
+            key_buffer.extend_from_slice(&field_id.to_be_bytes());
+            docid_word_positions_sorter.insert(&key_buffer, value)?;
+        }
+
+        // update script_language_docids deletions.
+        for (script, languages_frequency) in del_script_language_word_count {
+            for (language, _) in languages_frequency {
+                let entry = script_language_docids
+                    .entry((script, language))
+                    .or_insert_with(|| (RoaringBitmap::new(), RoaringBitmap::new()));
+                entry.0.push(document_id);
+            }
+        }
+
+        // update script_language_docids additions.
+        for (script, languages_frequency) in add_script_language_word_count {
+            for (language, _) in languages_frequency {
+                let entry = script_language_docids
+                    .entry((script, language))
+                    .or_insert_with(|| (RoaringBitmap::new(), RoaringBitmap::new()));
+                entry.1.push(document_id);
+            }
+        }
+    }
+
+    // the returned sorter is serialized as: key: (DocId, FieldId), value: KV<DelAdd, KV<u16, String>>.
+    sorter_into_reader(docid_word_positions_sorter, indexer)
+        .map(|reader| (reader, script_language_docids))
+}
+
+/// Check if any searchable fields of a document changed.
+fn searchable_fields_changed(
+    obkv: &KvReader<FieldId>,
+    searchable_fields: &Option<HashSet<FieldId>>,
+) -> bool {
+    for (field_id, field_bytes) in obkv.iter() {
+        if searchable_fields.as_ref().map_or(true, |sf| sf.contains(&field_id)) {
+            let del_add = KvReaderDelAdd::new(field_bytes);
+            match (del_add.get(DelAdd::Deletion), del_add.get(DelAdd::Addition)) {
+                // if both fields are None, check the next field.
+                (None, None) => (),
+                // if both contains a value and values are the same, check the next field.
+                (Some(del), Some(add)) if del == add => (),
+                // otherwise the fields are different, return true.
+                _otherwise => return true,
+            }
+        }
+    }
+
+    false
+}
+
+/// Factorize tokenizer building.
+fn tokenizer_builder<'a>(
+    stop_words: Option<&'a fst::Set<&[u8]>>,
+    allowed_separators: Option<&'a [&str]>,
+    dictionary: Option<&'a [&str]>,
+    script_language: Option<&'a HashMap<Script, Vec<Language>>>,
+) -> TokenizerBuilder<'a, &'a [u8]> {
    let mut tokenizer_builder = TokenizerBuilder::new();
    if let Some(stop_words) = stop_words {
        tokenizer_builder.stop_words(stop_words);
@ -61,130 +196,147 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
    if let Some(separators) = allowed_separators {
        tokenizer_builder.separators(separators);
    }
-    let tokenizer = tokenizer_builder.build();

-    let mut cursor = obkv_documents.into_cursor()?;
-    while let Some((key, value)) = cursor.move_on_next()? {
-        let document_id = key
-            .try_into()
-            .map(u32::from_be_bytes)
-            .map_err(|_| SerializationError::InvalidNumberSerialization)?;
-        let obkv = KvReader::<FieldId>::new(value);
+    if let Some(script_language) = script_language {
+        tokenizer_builder.allow_list(script_language);
+    }

-        documents_ids.push(document_id);
-        buffers.key_buffer.clear();
-        buffers.key_buffer.extend_from_slice(&document_id.to_be_bytes());
+    tokenizer_builder
+}

-        let mut script_language_word_count = HashMap::new();
+/// Extract words mapped with their positions of a document,
+/// ensuring no Language detection mistakes was made.
+#[allow(clippy::too_many_arguments)] // FIXME: consider grouping arguments in a struct
+fn lang_safe_tokens_from_document<'a>(
+    obkv: &KvReader<FieldId>,
+    searchable_fields: &Option<HashSet<FieldId>>,
+    tokenizer: &Tokenizer,
+    stop_words: Option<&fst::Set<&[u8]>>,
+    allowed_separators: Option<&[&str]>,
+    dictionary: Option<&[&str]>,
+    max_positions_per_attributes: u32,
+    del_add: DelAdd,
+    buffers: &'a mut Buffers,
+) -> Result<(&'a [u8], HashMap<Script, Vec<(Language, usize)>>)> {
+    let mut script_language_word_count = HashMap::new();

-        extract_tokens_from_document(
-            &obkv,
-            searchable_fields,
-            &tokenizer,
-            max_positions_per_attributes,
-            &mut buffers,
-            &mut script_language_word_count,
-            &mut docid_word_positions_sorter,
-        )?;
+    tokens_from_document(
+        obkv,
+        searchable_fields,
+        tokenizer,
+        max_positions_per_attributes,
+        del_add,
+        buffers,
+        &mut script_language_word_count,
+    )?;

-        // if we detect a potetial mistake in the language detection,
-        // we rerun the extraction forcing the tokenizer to detect the most frequently detected Languages.
-        // context: https://github.com/meilisearch/meilisearch/issues/3565
-        if script_language_word_count
-            .values()
-            .map(Vec::as_slice)
-            .any(potential_language_detection_error)
-        {
-            // build an allow list with the most frequent detected languages in the document.
-            let script_language: HashMap<_, _> =
-                script_language_word_count.iter().filter_map(most_frequent_languages).collect();
+    // if we detect a potetial mistake in the language detection,
+    // we rerun the extraction forcing the tokenizer to detect the most frequently detected Languages.
+    // context: https://github.com/meilisearch/meilisearch/issues/3565
+    if script_language_word_count
+        .values()
+        .map(Vec::as_slice)
+        .any(potential_language_detection_error)
+    {
+        // build an allow list with the most frequent detected languages in the document.
+        let script_language: HashMap<_, _> =
+            script_language_word_count.iter().filter_map(most_frequent_languages).collect();

-            // if the allow list is empty, meaning that no Language is considered frequent,
-            // then we don't rerun the extraction.
-            if !script_language.is_empty() {
-                // build a new temporary tokenizer including the allow list.
-                let mut tokenizer_builder = TokenizerBuilder::new();
-                if let Some(stop_words) = stop_words {
-                    tokenizer_builder.stop_words(stop_words);
-                }
-                tokenizer_builder.allow_list(&script_language);
-                let tokenizer = tokenizer_builder.build();
+        // if the allow list is empty, meaning that no Language is considered frequent,
+        // then we don't rerun the extraction.
+        if !script_language.is_empty() {
+            // build a new temporary tokenizer including the allow list.
+            let mut builder = tokenizer_builder(
+                stop_words,
+                allowed_separators,
+                dictionary,
+                Some(&script_language),
+            );
+            let tokenizer = builder.build();

-                script_language_word_count.clear();
+            script_language_word_count.clear();

-                // rerun the extraction.
-                extract_tokens_from_document(
-                    &obkv,
-                    searchable_fields,
-                    &tokenizer,
-                    max_positions_per_attributes,
-                    &mut buffers,
-                    &mut script_language_word_count,
-                    &mut docid_word_positions_sorter,
-                )?;
-            }
-        }
-
-        for (script, languages_frequency) in script_language_word_count {
-            for (language, _) in languages_frequency {
-                let entry = script_language_docids
-                    .entry((script, language))
-                    .or_insert_with(RoaringBitmap::new);
-                entry.push(document_id);
-            }
+            // rerun the extraction.
+            tokens_from_document(
+                obkv,
+                searchable_fields,
+                &tokenizer,
+                max_positions_per_attributes,
+                del_add,
+                buffers,
+                &mut script_language_word_count,
+            )?;
        }
    }

-    sorter_into_reader(docid_word_positions_sorter, indexer)
-        .map(|reader| (documents_ids, reader, script_language_docids))
+    // returns a (KV<FieldId, KV<u16, String>>, HashMap<Script, Vec<(Language, usize)>>)
+    Ok((&buffers.obkv_buffer, script_language_word_count))
 }

-fn extract_tokens_from_document(
+/// Extract words mapped with their positions of a document.
+fn tokens_from_document<'a>(
    obkv: &KvReader<FieldId>,
    searchable_fields: &Option<HashSet<FieldId>>,
    tokenizer: &Tokenizer,
    max_positions_per_attributes: u32,
-    buffers: &mut Buffers,
+    del_add: DelAdd,
+    buffers: &'a mut Buffers,
    script_language_word_count: &mut HashMap<Script, Vec<(Language, usize)>>,
-    docid_word_positions_sorter: &mut grenad::Sorter<MergeFn>,
-) -> Result<()> {
+) -> Result<&'a [u8]> {
+    buffers.obkv_buffer.clear();
+    let mut document_writer = KvWriterU16::new(&mut buffers.obkv_buffer);
    for (field_id, field_bytes) in obkv.iter() {
+        // if field is searchable.
        if searchable_fields.as_ref().map_or(true, |sf| sf.contains(&field_id)) {
-            let value = serde_json::from_slice(field_bytes).map_err(InternalError::SerdeJson)?;
-            buffers.field_buffer.clear();
-            if let Some(field) = json_to_string(&value, &mut buffers.field_buffer) {
-                let tokens = process_tokens(tokenizer.tokenize(field))
-                    .take_while(|(p, _)| (*p as u32) < max_positions_per_attributes);
+            // extract deletion or addition only.
+            if let Some(field_bytes) = KvReaderDelAdd::new(field_bytes).get(del_add) {
+                // parse json.
+                let value =
+                    serde_json::from_slice(field_bytes).map_err(InternalError::SerdeJson)?;

-                for (index, token) in tokens {
-                    // if a language has been detected for the token, we update the counter.
-                    if let Some(language) = token.language {
-                        let script = token.script;
-                        let entry =
-                            script_language_word_count.entry(script).or_insert_with(Vec::new);
-                        match entry.iter_mut().find(|(l, _)| *l == language) {
-                            Some((_, n)) => *n += 1,
-                            None => entry.push((language, 1)),
+                // prepare writing destination.
+                buffers.obkv_positions_buffer.clear();
+                let mut writer = KvWriterU16::new(&mut buffers.obkv_positions_buffer);
+
+                // convert json into a unique string.
+                buffers.field_buffer.clear();
+                if let Some(field) = json_to_string(&value, &mut buffers.field_buffer) {
+                    // create an iterator of token with their positions.
+                    let tokens = process_tokens(tokenizer.tokenize(field))
+                        .take_while(|(p, _)| (*p as u32) < max_positions_per_attributes);
+
+                    for (index, token) in tokens {
+                        // if a language has been detected for the token, we update the counter.
+                        if let Some(language) = token.language {
+                            let script = token.script;
+                            let entry =
+                                script_language_word_count.entry(script).or_insert_with(Vec::new);
+                            match entry.iter_mut().find(|(l, _)| *l == language) {
+                                Some((_, n)) => *n += 1,
+                                None => entry.push((language, 1)),
+                            }
+                        }
+
+                        // keep a word only if it is not empty and fit in a LMDB key.
+                        let token = token.lemma().trim();
+                        if !token.is_empty() && token.len() <= MAX_WORD_LENGTH {
+                            let position: u16 = index
+                                .try_into()
+                                .map_err(|_| SerializationError::InvalidNumberSerialization)?;
+                            writer.insert(position, token.as_bytes())?;
                        }
                    }
-                    let token = token.lemma().trim();
-                    if !token.is_empty() && token.len() <= MAX_WORD_LENGTH {
-                        buffers.key_buffer.truncate(mem::size_of::<u32>());
-                        buffers.key_buffer.extend_from_slice(token.as_bytes());

-                        let position: u16 = index
-                            .try_into()
-                            .map_err(|_| SerializationError::InvalidNumberSerialization)?;
-                        let position = absolute_from_relative_position(field_id, position);
-                        docid_word_positions_sorter
-                            .insert(&buffers.key_buffer, position.to_ne_bytes())?;
-                    }
+                    // write positions into document.
+                    let positions = writer.into_inner()?;
+                    document_writer.insert(field_id, positions)?;
                }
            }
        }
    }

-    Ok(())
+    // returns a KV<FieldId, KV<u16, String>>
+    Ok(document_writer.into_inner().map(|v| v.as_slice())?)
 }

 /// Transform a JSON value into a string that can be indexed.
@ -287,10 +439,10 @@ fn compute_language_frequency_threshold(languages_frequency: &[(Language, usize)

 #[derive(Default)]
 struct Buffers {
-    // the key buffer is the concatenation of the internal document id with the field id.
-    // The buffer has to be completelly cleared between documents,
-    // and the field id part must be cleared between each field.
-    key_buffer: Vec<u8>,
    // the field buffer for each fields desserialization, and must be cleared between each field.
    field_buffer: String,
+    // buffer used to store the value data containing an obkv.
+    obkv_buffer: Vec<u8>,
+    // buffer used to store the value data containing an obkv of tokens with their positions.
+    obkv_positions_buffer: Vec<u8>,
 }
--- a/milli/src/update/index_documents/extract/extract_facet_number_docids.rs
+++ b/milli/src/update/index_documents/extract/extract_facet_number_docids.rs
@ -4,11 +4,12 @@ use std::io::{self, BufReader};
 use heed::{BytesDecode, BytesEncode};

 use super::helpers::{
-    create_sorter, merge_cbo_roaring_bitmaps, sorter_into_reader, GrenadParameters,
+    create_sorter, merge_deladd_cbo_roaring_bitmaps, sorter_into_reader, GrenadParameters,
 };
 use crate::heed_codec::facet::{
    FacetGroupKey, FacetGroupKeyCodec, FieldDocIdFacetF64Codec, OrderedF64Codec,
 };
+use crate::update::del_add::{KvReaderDelAdd, KvWriterDelAdd};
 use crate::Result;

 /// Extracts the facet number and the documents ids where this facet number appear.
@ -17,7 +18,7 @@ use crate::Result;
 /// documents ids from the given chunk of docid facet number positions.
 #[logging_timer::time]
 pub fn extract_facet_number_docids<R: io::Read + io::Seek>(
-    docid_fid_facet_number: grenad::Reader<R>,
+    fid_docid_facet_number: grenad::Reader<R>,
    indexer: GrenadParameters,
 ) -> Result<grenad::Reader<BufReader<File>>> {
    puffin::profile_function!();
@ -26,21 +27,30 @@ pub fn extract_facet_number_docids<R: io::Read + io::Seek>(

    let mut facet_number_docids_sorter = create_sorter(
        grenad::SortAlgorithm::Unstable,
-        merge_cbo_roaring_bitmaps,
+        merge_deladd_cbo_roaring_bitmaps,
        indexer.chunk_compression_type,
        indexer.chunk_compression_level,
        indexer.max_nb_chunks,
        max_memory,
    );

-    let mut cursor = docid_fid_facet_number.into_cursor()?;
-    while let Some((key_bytes, _)) = cursor.move_on_next()? {
+    let mut buffer = Vec::new();
+    let mut cursor = fid_docid_facet_number.into_cursor()?;
+    while let Some((key_bytes, deladd_obkv_bytes)) = cursor.move_on_next()? {
        let (field_id, document_id, number) =
            FieldDocIdFacetF64Codec::bytes_decode(key_bytes).unwrap();

        let key = FacetGroupKey { field_id, level: 0, left_bound: number };
        let key_bytes = FacetGroupKeyCodec::<OrderedF64Codec>::bytes_encode(&key).unwrap();
-        facet_number_docids_sorter.insert(key_bytes, document_id.to_ne_bytes())?;
+
+        buffer.clear();
+        let mut obkv = KvWriterDelAdd::new(&mut buffer);
+        for (deladd_key, _) in KvReaderDelAdd::new(deladd_obkv_bytes).iter() {
+            obkv.insert(deladd_key, document_id.to_ne_bytes())?;
+        }
+        obkv.finish()?;
+
+        facet_number_docids_sorter.insert(key_bytes, &buffer)?;
    }

    sorter_into_reader(facet_number_docids_sorter, indexer)
--- a/milli/src/update/index_documents/extract/extract_facet_string_docids.rs
+++ b/milli/src/update/index_documents/extract/extract_facet_string_docids.rs
@ -1,13 +1,15 @@
 use std::fs::File;
-use std::io::{self, BufReader};
+use std::io::BufReader;
+use std::{io, str};

 use heed::BytesEncode;

 use super::helpers::{create_sorter, sorter_into_reader, try_split_array_at, GrenadParameters};
 use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec};
 use crate::heed_codec::StrRefCodec;
-use crate::update::index_documents::merge_cbo_roaring_bitmaps;
-use crate::{FieldId, Result, MAX_FACET_VALUE_LENGTH};
+use crate::update::del_add::{KvReaderDelAdd, KvWriterDelAdd};
+use crate::update::index_documents::helpers::merge_deladd_cbo_roaring_bitmaps;
+use crate::{FieldId, Result};

 /// Extracts the facet string and the documents ids where this facet string appear.
 ///
@ -24,15 +26,16 @@ pub fn extract_facet_string_docids<R: io::Read + io::Seek>(

    let mut facet_string_docids_sorter = create_sorter(
        grenad::SortAlgorithm::Stable,
-        merge_cbo_roaring_bitmaps,
+        merge_deladd_cbo_roaring_bitmaps,
        indexer.chunk_compression_type,
        indexer.chunk_compression_level,
        indexer.max_nb_chunks,
        max_memory,
    );

+    let mut buffer = Vec::new();
    let mut cursor = docid_fid_facet_string.into_cursor()?;
-    while let Some((key, _original_value_bytes)) = cursor.move_on_next()? {
+    while let Some((key, deladd_original_value_bytes)) = cursor.move_on_next()? {
        let (field_id_bytes, bytes) = try_split_array_at(key).unwrap();
        let field_id = FieldId::from_be_bytes(field_id_bytes);

@ -40,21 +43,17 @@ pub fn extract_facet_string_docids<R: io::Read + io::Seek>(
            try_split_array_at::<_, 4>(bytes).unwrap();
        let document_id = u32::from_be_bytes(document_id_bytes);

-        let mut normalised_value = std::str::from_utf8(normalized_value_bytes)?;
-
-        let normalised_truncated_value: String;
-        if normalised_value.len() > MAX_FACET_VALUE_LENGTH {
-            normalised_truncated_value = normalised_value
-                .char_indices()
-                .take_while(|(idx, _)| *idx < MAX_FACET_VALUE_LENGTH)
-                .map(|(_, c)| c)
-                .collect();
-            normalised_value = normalised_truncated_value.as_str();
-        }
-        let key = FacetGroupKey { field_id, level: 0, left_bound: normalised_value };
+        let normalized_value = str::from_utf8(normalized_value_bytes)?;
+        let key = FacetGroupKey { field_id, level: 0, left_bound: normalized_value };
        let key_bytes = FacetGroupKeyCodec::<StrRefCodec>::bytes_encode(&key).unwrap();
-        // document id is encoded in native-endian because of the CBO roaring bitmap codec
-        facet_string_docids_sorter.insert(&key_bytes, document_id.to_ne_bytes())?;
+
+        buffer.clear();
+        let mut obkv = KvWriterDelAdd::new(&mut buffer);
+        for (deladd_key, _) in KvReaderDelAdd::new(deladd_original_value_bytes).iter() {
+            obkv.insert(deladd_key, document_id.to_ne_bytes())?;
+        }
+        obkv.finish()?;
+        facet_string_docids_sorter.insert(&key_bytes, &buffer)?;
    }

    sorter_into_reader(facet_string_docids_sorter, indexer)
--- a/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs
+++ b/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs
@ -1,24 +1,36 @@
+use std::borrow::Cow;
 use std::collections::{BTreeMap, HashSet};
 use std::convert::TryInto;
 use std::fs::File;
 use std::io::{self, BufReader};
 use std::mem::size_of;
+use std::result::Result as StdResult;

+use grenad::Sorter;
 use heed::zerocopy::AsBytes;
 use heed::BytesEncode;
+use itertools::EitherOrBoth;
+use ordered_float::OrderedFloat;
 use roaring::RoaringBitmap;
 use serde_json::{from_slice, Value};
+use FilterableValues::{Empty, Null, Values};

 use super::helpers::{create_sorter, keep_first, sorter_into_reader, GrenadParameters};
 use crate::error::InternalError;
 use crate::facet::value_encoding::f64_into_bytes;
+use crate::update::del_add::{DelAdd, KvWriterDelAdd};
 use crate::update::index_documents::{create_writer, writer_into_reader};
-use crate::{CboRoaringBitmapCodec, DocumentId, FieldId, Result, BEU32, MAX_FACET_VALUE_LENGTH};
+use crate::{
+    CboRoaringBitmapCodec, DocumentId, Error, FieldId, Result, BEU32, MAX_FACET_VALUE_LENGTH,
+};
+
+/// The length of the elements that are always in the buffer when inserting new values.
+const TRUNCATE_SIZE: usize = size_of::<FieldId>() + size_of::<DocumentId>();

 /// The extracted facet values stored in grenad files by type.
 pub struct ExtractedFacetValues {
-    pub docid_fid_facet_numbers_chunk: grenad::Reader<BufReader<File>>,
-    pub docid_fid_facet_strings_chunk: grenad::Reader<BufReader<File>>,
+    pub fid_docid_facet_numbers_chunk: grenad::Reader<BufReader<File>>,
+    pub fid_docid_facet_strings_chunk: grenad::Reader<BufReader<File>>,
    pub fid_facet_is_null_docids_chunk: grenad::Reader<BufReader<File>>,
    pub fid_facet_is_empty_docids_chunk: grenad::Reader<BufReader<File>>,
    pub fid_facet_exists_docids_chunk: grenad::Reader<BufReader<File>>,
@ -58,71 +70,150 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
        max_memory.map(|m| m / 2),
    );

-    let mut facet_exists_docids = BTreeMap::<FieldId, RoaringBitmap>::new();
-    let mut facet_is_null_docids = BTreeMap::<FieldId, RoaringBitmap>::new();
-    let mut facet_is_empty_docids = BTreeMap::<FieldId, RoaringBitmap>::new();
+    // The tuples represents the Del and Add side for a bitmap
+    let mut facet_exists_docids = BTreeMap::<FieldId, (RoaringBitmap, RoaringBitmap)>::new();
+    let mut facet_is_null_docids = BTreeMap::<FieldId, (RoaringBitmap, RoaringBitmap)>::new();
+    let mut facet_is_empty_docids = BTreeMap::<FieldId, (RoaringBitmap, RoaringBitmap)>::new();
+
+    // We create two buffers for mutable ref issues with closures.
+    let mut numbers_key_buffer = Vec::new();
+    let mut strings_key_buffer = Vec::new();

-    let mut key_buffer = Vec::new();
    let mut cursor = obkv_documents.into_cursor()?;
    while let Some((docid_bytes, value)) = cursor.move_on_next()? {
        let obkv = obkv::KvReader::new(value);

        for (field_id, field_bytes) in obkv.iter() {
            if faceted_fields.contains(&field_id) {
-                key_buffer.clear();
+                numbers_key_buffer.clear();
+                strings_key_buffer.clear();

                // Set key to the field_id
                // Note: this encoding is consistent with FieldIdCodec
-                key_buffer.extend_from_slice(&field_id.to_be_bytes());
+                numbers_key_buffer.extend_from_slice(&field_id.to_be_bytes());
+                strings_key_buffer.extend_from_slice(&field_id.to_be_bytes());

-                // Here, we know already that the document must be added to the “field id exists” database
                let document: [u8; 4] = docid_bytes[..4].try_into().ok().unwrap();
                let document = BEU32::from(document).get();

-                facet_exists_docids.entry(field_id).or_default().insert(document);
-
                // For the other extraction tasks, prefix the key with the field_id and the document_id
-                key_buffer.extend_from_slice(docid_bytes);
+                numbers_key_buffer.extend_from_slice(docid_bytes);
+                strings_key_buffer.extend_from_slice(docid_bytes);

-                let value = from_slice(field_bytes).map_err(InternalError::SerdeJson)?;
+                let del_add_obkv = obkv::KvReader::new(field_bytes);
+                let del_value = match del_add_obkv.get(DelAdd::Deletion) {
+                    Some(bytes) => Some(from_slice(bytes).map_err(InternalError::SerdeJson)?),
+                    None => None,
+                };
+                let add_value = match del_add_obkv.get(DelAdd::Addition) {
+                    Some(bytes) => Some(from_slice(bytes).map_err(InternalError::SerdeJson)?),
+                    None => None,
+                };

-                match extract_facet_values(
-                    &value,
-                    geo_fields_ids.map_or(false, |(lat, lng)| field_id == lat || field_id == lng),
-                ) {
-                    FilterableValues::Null => {
-                        facet_is_null_docids.entry(field_id).or_default().insert(document);
-                    }
-                    FilterableValues::Empty => {
-                        facet_is_empty_docids.entry(field_id).or_default().insert(document);
-                    }
-                    FilterableValues::Values { numbers, strings } => {
-                        // insert facet numbers in sorter
-                        for number in numbers {
-                            key_buffer.truncate(size_of::<FieldId>() + size_of::<DocumentId>());
-                            if let Some(value_bytes) = f64_into_bytes(number) {
-                                key_buffer.extend_from_slice(&value_bytes);
-                                key_buffer.extend_from_slice(&number.to_be_bytes());
+                // We insert the document id on the Del and the Add side if the field exists.
+                let (ref mut del_exists, ref mut add_exists) =
+                    facet_exists_docids.entry(field_id).or_default();
+                let (ref mut del_is_null, ref mut add_is_null) =
+                    facet_is_null_docids.entry(field_id).or_default();
+                let (ref mut del_is_empty, ref mut add_is_empty) =
+                    facet_is_empty_docids.entry(field_id).or_default();

-                                fid_docid_facet_numbers_sorter
-                                    .insert(&key_buffer, ().as_bytes())?;
-                            }
+                if del_value.is_some() {
+                    del_exists.insert(document);
+                }
+                if add_value.is_some() {
+                    add_exists.insert(document);
+                }
+
+                let geo_support =
+                    geo_fields_ids.map_or(false, |(lat, lng)| field_id == lat || field_id == lng);
+                let del_filterable_values =
+                    del_value.map(|value| extract_facet_values(&value, geo_support));
+                let add_filterable_values =
+                    add_value.map(|value| extract_facet_values(&value, geo_support));
+
+                // Those closures are just here to simplify things a bit.
+                let mut insert_numbers_diff = |del_numbers, add_numbers| {
+                    insert_numbers_diff(
+                        &mut fid_docid_facet_numbers_sorter,
+                        &mut numbers_key_buffer,
+                        del_numbers,
+                        add_numbers,
+                    )
+                };
+                let mut insert_strings_diff = |del_strings, add_strings| {
+                    insert_strings_diff(
+                        &mut fid_docid_facet_strings_sorter,
+                        &mut strings_key_buffer,
+                        del_strings,
+                        add_strings,
+                    )
+                };
+
+                match (del_filterable_values, add_filterable_values) {
+                    (None, None) => (),
+                    (Some(del_filterable_values), None) => match del_filterable_values {
+                        Null => {
+                            del_is_null.insert(document);
                        }
-
-                        // insert normalized and original facet string in sorter
-                        for (normalized, original) in
-                            strings.into_iter().filter(|(n, _)| !n.is_empty())
-                        {
-                            let normalized_truncated_value: String = normalized
-                                .char_indices()
-                                .take_while(|(idx, _)| idx + 4 < MAX_FACET_VALUE_LENGTH)
-                                .map(|(_, c)| c)
-                                .collect();
-
-                            key_buffer.truncate(size_of::<FieldId>() + size_of::<DocumentId>());
-                            key_buffer.extend_from_slice(normalized_truncated_value.as_bytes());
-                            fid_docid_facet_strings_sorter
-                                .insert(&key_buffer, original.as_bytes())?;
+                        Empty => {
+                            del_is_empty.insert(document);
+                        }
+                        Values { numbers, strings } => {
+                            insert_numbers_diff(numbers, vec![])?;
+                            insert_strings_diff(strings, vec![])?;
+                        }
+                    },
+                    (None, Some(add_filterable_values)) => match add_filterable_values {
+                        Null => {
+                            add_is_null.insert(document);
+                        }
+                        Empty => {
+                            add_is_empty.insert(document);
+                        }
+                        Values { numbers, strings } => {
+                            insert_numbers_diff(vec![], numbers)?;
+                            insert_strings_diff(vec![], strings)?;
+                        }
+                    },
+                    (Some(del_filterable_values), Some(add_filterable_values)) => {
+                        match (del_filterable_values, add_filterable_values) {
+                            (Null, Null) | (Empty, Empty) => (),
+                            (Null, Empty) => {
+                                del_is_null.insert(document);
+                                add_is_empty.insert(document);
+                            }
+                            (Empty, Null) => {
+                                del_is_empty.insert(document);
+                                add_is_null.insert(document);
+                            }
+                            (Null, Values { numbers, strings }) => {
+                                insert_numbers_diff(vec![], numbers)?;
+                                insert_strings_diff(vec![], strings)?;
+                                del_is_null.insert(document);
+                            }
+                            (Empty, Values { numbers, strings }) => {
+                                insert_numbers_diff(vec![], numbers)?;
+                                insert_strings_diff(vec![], strings)?;
+                                del_is_empty.insert(document);
+                            }
+                            (Values { numbers, strings }, Null) => {
+                                add_is_null.insert(document);
+                                insert_numbers_diff(numbers, vec![])?;
+                                insert_strings_diff(strings, vec![])?;
+                            }
+                            (Values { numbers, strings }, Empty) => {
+                                add_is_empty.insert(document);
+                                insert_numbers_diff(numbers, vec![])?;
+                                insert_strings_diff(strings, vec![])?;
+                            }
+                            (
+                                Values { numbers: del_numbers, strings: del_strings },
+                                Values { numbers: add_numbers, strings: add_strings },
+                            ) => {
+                                insert_numbers_diff(del_numbers, add_numbers)?;
+                                insert_strings_diff(del_strings, add_strings)?;
+                            }
                        }
                    }
                }
@ -130,14 +221,15 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
        }
    }

+    let mut buffer = Vec::new();
    let mut facet_exists_docids_writer = create_writer(
        indexer.chunk_compression_type,
        indexer.chunk_compression_level,
        tempfile::tempfile()?,
    );
-    for (fid, bitmap) in facet_exists_docids.into_iter() {
-        let bitmap_bytes = CboRoaringBitmapCodec::bytes_encode(&bitmap).unwrap();
-        facet_exists_docids_writer.insert(fid.to_be_bytes(), &bitmap_bytes)?;
+    for (fid, (del_bitmap, add_bitmap)) in facet_exists_docids.into_iter() {
+        deladd_obkv_cbo_roaring_bitmaps(&mut buffer, &del_bitmap, &add_bitmap)?;
+        facet_exists_docids_writer.insert(fid.to_be_bytes(), &buffer)?;
    }
    let facet_exists_docids_reader = writer_into_reader(facet_exists_docids_writer)?;

@ -146,9 +238,9 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
        indexer.chunk_compression_level,
        tempfile::tempfile()?,
    );
-    for (fid, bitmap) in facet_is_null_docids.into_iter() {
-        let bitmap_bytes = CboRoaringBitmapCodec::bytes_encode(&bitmap).unwrap();
-        facet_is_null_docids_writer.insert(fid.to_be_bytes(), &bitmap_bytes)?;
+    for (fid, (del_bitmap, add_bitmap)) in facet_is_null_docids.into_iter() {
+        deladd_obkv_cbo_roaring_bitmaps(&mut buffer, &del_bitmap, &add_bitmap)?;
+        facet_is_null_docids_writer.insert(fid.to_be_bytes(), &buffer)?;
    }
    let facet_is_null_docids_reader = writer_into_reader(facet_is_null_docids_writer)?;

@ -157,21 +249,156 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
        indexer.chunk_compression_level,
        tempfile::tempfile()?,
    );
-    for (fid, bitmap) in facet_is_empty_docids.into_iter() {
-        let bitmap_bytes = CboRoaringBitmapCodec::bytes_encode(&bitmap).unwrap();
-        facet_is_empty_docids_writer.insert(fid.to_be_bytes(), &bitmap_bytes)?;
+    for (fid, (del_bitmap, add_bitmap)) in facet_is_empty_docids.into_iter() {
+        deladd_obkv_cbo_roaring_bitmaps(&mut buffer, &del_bitmap, &add_bitmap)?;
+        facet_is_empty_docids_writer.insert(fid.to_be_bytes(), &buffer)?;
    }
    let facet_is_empty_docids_reader = writer_into_reader(facet_is_empty_docids_writer)?;

    Ok(ExtractedFacetValues {
-        docid_fid_facet_numbers_chunk: sorter_into_reader(fid_docid_facet_numbers_sorter, indexer)?,
-        docid_fid_facet_strings_chunk: sorter_into_reader(fid_docid_facet_strings_sorter, indexer)?,
+        fid_docid_facet_numbers_chunk: sorter_into_reader(fid_docid_facet_numbers_sorter, indexer)?,
+        fid_docid_facet_strings_chunk: sorter_into_reader(fid_docid_facet_strings_sorter, indexer)?,
        fid_facet_is_null_docids_chunk: facet_is_null_docids_reader,
        fid_facet_is_empty_docids_chunk: facet_is_empty_docids_reader,
        fid_facet_exists_docids_chunk: facet_exists_docids_reader,
    })
 }

+/// Generates a vector of bytes containing a DelAdd obkv with two bitmaps.
+fn deladd_obkv_cbo_roaring_bitmaps(
+    buffer: &mut Vec<u8>,
+    del_bitmap: &RoaringBitmap,
+    add_bitmap: &RoaringBitmap,
+) -> io::Result<()> {
+    buffer.clear();
+    let mut obkv = KvWriterDelAdd::new(buffer);
+    let del_bitmap_bytes = CboRoaringBitmapCodec::bytes_encode(del_bitmap).unwrap();
+    let add_bitmap_bytes = CboRoaringBitmapCodec::bytes_encode(add_bitmap).unwrap();
+    obkv.insert(DelAdd::Deletion, del_bitmap_bytes)?;
+    obkv.insert(DelAdd::Addition, add_bitmap_bytes)?;
+    obkv.finish()
+}
+
+/// Truncates a string to the biggest valid LMDB key size.
+fn truncate_string(s: String) -> String {
+    s.char_indices()
+        .take_while(|(idx, _)| idx + 4 < MAX_FACET_VALUE_LENGTH)
+        .map(|(_, c)| c)
+        .collect()
+}
+
+/// Computes the diff between both Del and Add numbers and
+/// only inserts the parts that differ in the sorter.
+fn insert_numbers_diff<MF>(
+    fid_docid_facet_numbers_sorter: &mut Sorter<MF>,
+    key_buffer: &mut Vec<u8>,
+    mut del_numbers: Vec<f64>,
+    mut add_numbers: Vec<f64>,
+) -> Result<()>
+where
+    MF: for<'a> Fn(&[u8], &[Cow<'a, [u8]>]) -> StdResult<Cow<'a, [u8]>, Error>,
+{
+    // We sort and dedup the float numbers
+    del_numbers.sort_unstable_by_key(|f| OrderedFloat(*f));
+    add_numbers.sort_unstable_by_key(|f| OrderedFloat(*f));
+    del_numbers.dedup_by_key(|f| OrderedFloat(*f));
+    add_numbers.dedup_by_key(|f| OrderedFloat(*f));
+
+    let merged_numbers_iter = itertools::merge_join_by(
+        del_numbers.into_iter().map(OrderedFloat),
+        add_numbers.into_iter().map(OrderedFloat),
+        |del, add| del.cmp(add),
+    );
+
+    // insert facet numbers in sorter
+    for eob in merged_numbers_iter {
+        key_buffer.truncate(TRUNCATE_SIZE);
+        match eob {
+            EitherOrBoth::Both(_, _) => (), // no need to touch anything
+            EitherOrBoth::Left(OrderedFloat(number)) => {
+                if let Some(value_bytes) = f64_into_bytes(number) {
+                    key_buffer.extend_from_slice(&value_bytes);
+                    key_buffer.extend_from_slice(&number.to_be_bytes());
+
+                    // We insert only the Del part of the Obkv to inform
+                    // that we only want to remove all those numbers.
+                    let mut obkv = KvWriterDelAdd::memory();
+                    obkv.insert(DelAdd::Deletion, ().as_bytes())?;
+                    let bytes = obkv.into_inner()?;
+                    fid_docid_facet_numbers_sorter.insert(&key_buffer, bytes)?;
+                }
+            }
+            EitherOrBoth::Right(OrderedFloat(number)) => {
+                if let Some(value_bytes) = f64_into_bytes(number) {
+                    key_buffer.extend_from_slice(&value_bytes);
+                    key_buffer.extend_from_slice(&number.to_be_bytes());
+
+                    // We insert only the Add part of the Obkv to inform
+                    // that we only want to remove all those numbers.
+                    let mut obkv = KvWriterDelAdd::memory();
+                    obkv.insert(DelAdd::Addition, ().as_bytes())?;
+                    let bytes = obkv.into_inner()?;
+                    fid_docid_facet_numbers_sorter.insert(&key_buffer, bytes)?;
+                }
+            }
+        }
+    }
+
+    Ok(())
+}
+
+/// Computes the diff between both Del and Add strings and
+/// only inserts the parts that differ in the sorter.
+fn insert_strings_diff<MF>(
+    fid_docid_facet_strings_sorter: &mut Sorter<MF>,
+    key_buffer: &mut Vec<u8>,
+    mut del_strings: Vec<(String, String)>,
+    mut add_strings: Vec<(String, String)>,
+) -> Result<()>
+where
+    MF: for<'a> Fn(&[u8], &[Cow<'a, [u8]>]) -> StdResult<Cow<'a, [u8]>, Error>,
+{
+    // We sort and dedup the normalized and original strings
+    del_strings.sort_unstable();
+    add_strings.sort_unstable();
+    del_strings.dedup();
+    add_strings.dedup();
+
+    let merged_strings_iter = itertools::merge_join_by(
+        del_strings.into_iter().filter(|(n, _)| !n.is_empty()),
+        add_strings.into_iter().filter(|(n, _)| !n.is_empty()),
+        |del, add| del.cmp(add),
+    );
+
+    // insert normalized and original facet string in sorter
+    for eob in merged_strings_iter {
+        key_buffer.truncate(TRUNCATE_SIZE);
+        match eob {
+            EitherOrBoth::Both(_, _) => (), // no need to touch anything
+            EitherOrBoth::Left((normalized, original)) => {
+                let truncated = truncate_string(normalized);
+                key_buffer.extend_from_slice(truncated.as_bytes());
+
+                let mut obkv = KvWriterDelAdd::memory();
+                obkv.insert(DelAdd::Deletion, original)?;
+                let bytes = obkv.into_inner()?;
+                fid_docid_facet_strings_sorter.insert(&key_buffer, bytes)?;
+            }
+            EitherOrBoth::Right((normalized, original)) => {
+                let truncated = truncate_string(normalized);
+                key_buffer.extend_from_slice(truncated.as_bytes());
+
+                let mut obkv = KvWriterDelAdd::memory();
+                obkv.insert(DelAdd::Addition, original)?;
+                let bytes = obkv.into_inner()?;
+                fid_docid_facet_strings_sorter.insert(&key_buffer, bytes)?;
+            }
+        }
+    }
+
+    Ok(())
+}
+
 /// Represent what a document field contains.
 enum FilterableValues {
    /// Corresponds to the JSON `null` value.
@ -182,6 +409,7 @@ enum FilterableValues {
    Values { numbers: Vec<f64>, strings: Vec<(String, String)> },
 }

+/// Extracts the facet values of a JSON field.
 fn extract_facet_values(value: &Value, geo_field: bool) -> FilterableValues {
    fn inner_extract_facet_values(
        value: &Value,
--- a/milli/src/update/index_documents/extract/extract_fid_word_count_docids.rs
+++ b/milli/src/update/index_documents/extract/extract_fid_word_count_docids.rs
@ -1,16 +1,18 @@
-use std::collections::HashMap;
 use std::fs::File;
 use std::io::{self, BufReader};

-use grenad::Sorter;
+use obkv::KvReaderU16;

 use super::helpers::{
-    create_sorter, merge_cbo_roaring_bitmaps, read_u32_ne_bytes, sorter_into_reader,
-    try_split_array_at, GrenadParameters, MergeFn,
+    create_sorter, merge_deladd_cbo_roaring_bitmaps, sorter_into_reader, try_split_array_at,
+    GrenadParameters,
 };
 use crate::error::SerializationError;
 use crate::index::db_name::DOCID_WORD_POSITIONS;
-use crate::{relative_from_absolute_position, DocumentId, FieldId, Result};
+use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd};
+use crate::Result;
+
+const MAX_COUNTED_WORDS: usize = 30;

 /// Extracts the field id word count and the documents ids where
 /// this field id with this amount of words appear.
@ -28,70 +30,62 @@ pub fn extract_fid_word_count_docids<R: io::Read + io::Seek>(

    let mut fid_word_count_docids_sorter = create_sorter(
        grenad::SortAlgorithm::Unstable,
-        merge_cbo_roaring_bitmaps,
+        merge_deladd_cbo_roaring_bitmaps,
        indexer.chunk_compression_type,
        indexer.chunk_compression_level,
        indexer.max_nb_chunks,
        max_memory,
    );

-    // This map is assumed to not consume a lot of memory.
-    let mut document_fid_wordcount = HashMap::new();
-    let mut current_document_id = None;
-
+    let mut key_buffer = Vec::new();
+    let mut value_buffer = Vec::new();
    let mut cursor = docid_word_positions.into_cursor()?;
    while let Some((key, value)) = cursor.move_on_next()? {
-        let (document_id_bytes, _word_bytes) = try_split_array_at(key)
+        let (document_id_bytes, fid_bytes) = try_split_array_at(key)
            .ok_or(SerializationError::Decoding { db_name: Some(DOCID_WORD_POSITIONS) })?;
        let document_id = u32::from_be_bytes(document_id_bytes);

-        let curr_document_id = *current_document_id.get_or_insert(document_id);
-        if curr_document_id != document_id {
-            drain_document_fid_wordcount_into_sorter(
-                &mut fid_word_count_docids_sorter,
-                &mut document_fid_wordcount,
-                curr_document_id,
-            )?;
-            current_document_id = Some(document_id);
+        let del_add_reader = KvReaderDelAdd::new(value);
+        let deletion = del_add_reader
+            // get deleted words
+            .get(DelAdd::Deletion)
+            // count deleted words
+            .map(|deletion| KvReaderU16::new(deletion).iter().take(MAX_COUNTED_WORDS + 1).count())
+            // keep the count if under or equal to MAX_COUNTED_WORDS
+            .filter(|&word_count| word_count <= MAX_COUNTED_WORDS);
+        let addition = del_add_reader
+            // get added words
+            .get(DelAdd::Addition)
+            // count added words
+            .map(|addition| KvReaderU16::new(addition).iter().take(MAX_COUNTED_WORDS + 1).count())
+            // keep the count if under or equal to MAX_COUNTED_WORDS
+            .filter(|&word_count| word_count <= MAX_COUNTED_WORDS);
+
+        if deletion != addition {
+            // Insert deleted word count in sorter if exist.
+            if let Some(word_count) = deletion {
+                value_buffer.clear();
+                let mut value_writer = KvWriterDelAdd::new(&mut value_buffer);
+                value_writer.insert(DelAdd::Deletion, document_id.to_ne_bytes()).unwrap();
+                key_buffer.clear();
+                key_buffer.extend_from_slice(fid_bytes);
+                key_buffer.push(word_count as u8);
+                fid_word_count_docids_sorter
+                    .insert(&key_buffer, value_writer.into_inner().unwrap())?;
+            }
+            // Insert added word count in sorter if exist.
+            if let Some(word_count) = addition {
+                value_buffer.clear();
+                let mut value_writer = KvWriterDelAdd::new(&mut value_buffer);
+                value_writer.insert(DelAdd::Addition, document_id.to_ne_bytes()).unwrap();
+                key_buffer.clear();
+                key_buffer.extend_from_slice(fid_bytes);
+                key_buffer.push(word_count as u8);
+                fid_word_count_docids_sorter
+                    .insert(&key_buffer, value_writer.into_inner().unwrap())?;
+            }
        }
-
-        for position in read_u32_ne_bytes(value) {
-            let (field_id, _) = relative_from_absolute_position(position);
-
-            let value = document_fid_wordcount.entry(field_id as FieldId).or_insert(0);
-            *value += 1;
-        }
-    }
-
-    if let Some(document_id) = current_document_id {
-        // We must make sure that don't lose the current document field id
-        // word count map if we break because we reached the end of the chunk.
-        drain_document_fid_wordcount_into_sorter(
-            &mut fid_word_count_docids_sorter,
-            &mut document_fid_wordcount,
-            document_id,
-        )?;
    }

    sorter_into_reader(fid_word_count_docids_sorter, indexer)
 }
-
-fn drain_document_fid_wordcount_into_sorter(
-    fid_word_count_docids_sorter: &mut Sorter<MergeFn>,
-    document_fid_wordcount: &mut HashMap<FieldId, u32>,
-    document_id: DocumentId,
-) -> Result<()> {
-    let mut key_buffer = Vec::new();
-
-    for (fid, count) in document_fid_wordcount.drain() {
-        if count <= 30 {
-            key_buffer.clear();
-            key_buffer.extend_from_slice(&fid.to_be_bytes());
-            key_buffer.push(count as u8);
-
-            fid_word_count_docids_sorter.insert(&key_buffer, document_id.to_ne_bytes())?;
-        }
-    }
-
-    Ok(())
-}
--- a/milli/src/update/index_documents/extract/extract_geo_points.rs
+++ b/milli/src/update/index_documents/extract/extract_geo_points.rs
@ -6,6 +6,7 @@ use serde_json::Value;

 use super::helpers::{create_writer, writer_into_reader, GrenadParameters};
 use crate::error::GeoError;
+use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd};
 use crate::update::index_documents::extract_finite_float_from_value;
 use crate::{FieldId, InternalError, Result};

@ -30,39 +31,71 @@ pub fn extract_geo_points<R: io::Read + io::Seek>(
    let mut cursor = obkv_documents.into_cursor()?;
    while let Some((docid_bytes, value)) = cursor.move_on_next()? {
        let obkv = obkv::KvReader::new(value);
-        // since we only needs the primary key when we throw an error we create this getter to
-        // lazily get it when needed
+        // since we only need the primary key when we throw an error
+        // we create this getter to lazily get it when needed
        let document_id = || -> Value {
            let document_id = obkv.get(primary_key_id).unwrap();
            serde_json::from_slice(document_id).unwrap()
        };

        // first we get the two fields
-        let lat = obkv.get(lat_fid);
-        let lng = obkv.get(lng_fid);
+        match (obkv.get(lat_fid), obkv.get(lng_fid)) {
+            (Some(lat), Some(lng)) => {
+                let deladd_lat_obkv = KvReaderDelAdd::new(lat);
+                let deladd_lng_obkv = KvReaderDelAdd::new(lng);

-        if let Some((lat, lng)) = lat.zip(lng) {
-            // then we extract the values
-            let lat = extract_finite_float_from_value(
-                serde_json::from_slice(lat).map_err(InternalError::SerdeJson)?,
-            )
-            .map_err(|lat| GeoError::BadLatitude { document_id: document_id(), value: lat })?;
+                // then we extract the values
+                let del_lat_lng = deladd_lat_obkv
+                    .get(DelAdd::Deletion)
+                    .zip(deladd_lng_obkv.get(DelAdd::Deletion))
+                    .map(|(lat, lng)| extract_lat_lng(lat, lng, document_id))
+                    .transpose()?;
+                let add_lat_lng = deladd_lat_obkv
+                    .get(DelAdd::Addition)
+                    .zip(deladd_lng_obkv.get(DelAdd::Addition))
+                    .map(|(lat, lng)| extract_lat_lng(lat, lng, document_id))
+                    .transpose()?;

-            let lng = extract_finite_float_from_value(
-                serde_json::from_slice(lng).map_err(InternalError::SerdeJson)?,
-            )
-            .map_err(|lng| GeoError::BadLongitude { document_id: document_id(), value: lng })?;
-
-            #[allow(clippy::drop_non_drop)]
-            let bytes: [u8; 16] = concat_arrays![lat.to_ne_bytes(), lng.to_ne_bytes()];
-            writer.insert(docid_bytes, bytes)?;
-        } else if lat.is_none() && lng.is_some() {
-            return Err(GeoError::MissingLatitude { document_id: document_id() })?;
-        } else if lat.is_some() && lng.is_none() {
-            return Err(GeoError::MissingLongitude { document_id: document_id() })?;
+                if del_lat_lng != add_lat_lng {
+                    let mut obkv = KvWriterDelAdd::memory();
+                    if let Some([lat, lng]) = del_lat_lng {
+                        #[allow(clippy::drop_non_drop)]
+                        let bytes: [u8; 16] = concat_arrays![lat.to_ne_bytes(), lng.to_ne_bytes()];
+                        obkv.insert(DelAdd::Deletion, bytes)?;
+                    }
+                    if let Some([lat, lng]) = add_lat_lng {
+                        #[allow(clippy::drop_non_drop)]
+                        let bytes: [u8; 16] = concat_arrays![lat.to_ne_bytes(), lng.to_ne_bytes()];
+                        obkv.insert(DelAdd::Addition, bytes)?;
+                    }
+                    let bytes = obkv.into_inner()?;
+                    writer.insert(docid_bytes, bytes)?;
+                }
+            }
+            (None, Some(_)) => {
+                return Err(GeoError::MissingLatitude { document_id: document_id() }.into())
+            }
+            (Some(_), None) => {
+                return Err(GeoError::MissingLongitude { document_id: document_id() }.into())
+            }
+            (None, None) => (),
        }
-        // else => the _geo object was `null`, there is nothing to do
    }

    writer_into_reader(writer)
 }
+
+/// Extract the finite floats lat and lng from two bytes slices.
+fn extract_lat_lng(lat: &[u8], lng: &[u8], document_id: impl Fn() -> Value) -> Result<[f64; 2]> {
+    let lat = extract_finite_float_from_value(
+        serde_json::from_slice(lat).map_err(InternalError::SerdeJson)?,
+    )
+    .map_err(|lat| GeoError::BadLatitude { document_id: document_id(), value: lat })?;
+
+    let lng = extract_finite_float_from_value(
+        serde_json::from_slice(lng).map_err(InternalError::SerdeJson)?,
+    )
+    .map_err(|lng| GeoError::BadLongitude { document_id: document_id(), value: lng })?;
+
+    Ok([lat, lng])
+}
--- a/milli/src/update/index_documents/extract/extract_vector_points.rs
+++ b/milli/src/update/index_documents/extract/extract_vector_points.rs
@ -1,13 +1,24 @@
+use std::cmp::Ordering;
 use std::convert::TryFrom;
 use std::fs::File;
-use std::io::{self, BufReader};
+use std::io::{self, BufReader, BufWriter};
+use std::mem::size_of;
+use std::str::from_utf8;

 use bytemuck::cast_slice;
+use grenad::Writer;
+use itertools::EitherOrBoth;
+use ordered_float::OrderedFloat;
 use serde_json::{from_slice, Value};

 use super::helpers::{create_writer, writer_into_reader, GrenadParameters};
 use crate::error::UserError;
-use crate::{FieldId, InternalError, Result, VectorOrArrayOfVectors};
+use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd};
+use crate::update::index_documents::helpers::try_split_at;
+use crate::{DocumentId, FieldId, InternalError, Result, VectorOrArrayOfVectors};
+
+/// The length of the elements that are always in the buffer when inserting new values.
+const TRUNCATE_SIZE: usize = size_of::<DocumentId>();

 /// Extracts the embedding vector contained in each document under the `_vectors` field.
 ///
@ -16,7 +27,6 @@ use crate::{FieldId, InternalError, Result, VectorOrArrayOfVectors};
 pub fn extract_vector_points<R: io::Read + io::Seek>(
    obkv_documents: grenad::Reader<R>,
    indexer: GrenadParameters,
-    primary_key_id: FieldId,
    vectors_fid: FieldId,
 ) -> Result<grenad::Reader<BufReader<File>>> {
    puffin::profile_function!();
@ -27,43 +37,112 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
        tempfile::tempfile()?,
    );

+    let mut key_buffer = Vec::new();
    let mut cursor = obkv_documents.into_cursor()?;
-    while let Some((docid_bytes, value)) = cursor.move_on_next()? {
+    while let Some((key, value)) = cursor.move_on_next()? {
+        // this must always be serialized as (docid, external_docid);
+        let (docid_bytes, external_id_bytes) =
+            try_split_at(key, std::mem::size_of::<DocumentId>()).unwrap();
+        debug_assert!(from_utf8(external_id_bytes).is_ok());
+
        let obkv = obkv::KvReader::new(value);
+        key_buffer.clear();
+        key_buffer.extend_from_slice(docid_bytes);

        // since we only needs the primary key when we throw an error we create this getter to
        // lazily get it when needed
-        let document_id = || -> Value {
-            let document_id = obkv.get(primary_key_id).unwrap();
-            from_slice(document_id).unwrap()
-        };
+        let document_id = || -> Value { from_utf8(external_id_bytes).unwrap().into() };

        // first we retrieve the _vectors field
-        if let Some(vectors) = obkv.get(vectors_fid) {
-            // extract the vectors
-            let vectors = match from_slice(vectors) {
-                Ok(vectors) => VectorOrArrayOfVectors::into_array_of_vectors(vectors),
-                Err(_) => {
-                    return Err(UserError::InvalidVectorsType {
-                        document_id: document_id(),
-                        value: from_slice(vectors).map_err(InternalError::SerdeJson)?,
-                    }
-                    .into())
-                }
-            };
+        if let Some(value) = obkv.get(vectors_fid) {
+            let vectors_obkv = KvReaderDelAdd::new(value);

-            if let Some(vectors) = vectors {
-                for (i, vector) in vectors.into_iter().enumerate().take(u16::MAX as usize) {
-                    let index = u16::try_from(i).unwrap();
-                    let mut key = docid_bytes.to_vec();
-                    key.extend_from_slice(&index.to_be_bytes());
-                    let bytes = cast_slice(&vector);
-                    writer.insert(key, bytes)?;
-                }
-            }
+            // then we extract the values
+            let del_vectors = vectors_obkv
+                .get(DelAdd::Deletion)
+                .map(|vectors| extract_vectors(vectors, document_id))
+                .transpose()?
+                .flatten();
+            let add_vectors = vectors_obkv
+                .get(DelAdd::Addition)
+                .map(|vectors| extract_vectors(vectors, document_id))
+                .transpose()?
+                .flatten();
+
+            // and we finally push the unique vectors into the writer
+            push_vectors_diff(
+                &mut writer,
+                &mut key_buffer,
+                del_vectors.unwrap_or_default(),
+                add_vectors.unwrap_or_default(),
+            )?;
        }
-        // else => the `_vectors` object was `null`, there is nothing to do
    }

    writer_into_reader(writer)
 }
+
+/// Computes the diff between both Del and Add numbers and
+/// only inserts the parts that differ in the sorter.
+fn push_vectors_diff(
+    writer: &mut Writer<BufWriter<File>>,
+    key_buffer: &mut Vec<u8>,
+    mut del_vectors: Vec<Vec<f32>>,
+    mut add_vectors: Vec<Vec<f32>>,
+) -> Result<()> {
+    // We sort and dedup the vectors
+    del_vectors.sort_unstable_by(|a, b| compare_vectors(a, b));
+    add_vectors.sort_unstable_by(|a, b| compare_vectors(a, b));
+    del_vectors.dedup_by(|a, b| compare_vectors(a, b).is_eq());
+    add_vectors.dedup_by(|a, b| compare_vectors(a, b).is_eq());
+
+    let merged_vectors_iter =
+        itertools::merge_join_by(del_vectors, add_vectors, |del, add| compare_vectors(del, add));
+
+    // insert vectors into the writer
+    for (i, eob) in merged_vectors_iter.into_iter().enumerate().take(u16::MAX as usize) {
+        // Generate the key by extending the unique index to it.
+        key_buffer.truncate(TRUNCATE_SIZE);
+        let index = u16::try_from(i).unwrap();
+        key_buffer.extend_from_slice(&index.to_be_bytes());
+
+        match eob {
+            EitherOrBoth::Both(_, _) => (), // no need to touch anything
+            EitherOrBoth::Left(vector) => {
+                // We insert only the Del part of the Obkv to inform
+                // that we only want to remove all those vectors.
+                let mut obkv = KvWriterDelAdd::memory();
+                obkv.insert(DelAdd::Deletion, cast_slice(&vector))?;
+                let bytes = obkv.into_inner()?;
+                writer.insert(&key_buffer, bytes)?;
+            }
+            EitherOrBoth::Right(vector) => {
+                // We insert only the Add part of the Obkv to inform
+                // that we only want to remove all those vectors.
+                let mut obkv = KvWriterDelAdd::memory();
+                obkv.insert(DelAdd::Addition, cast_slice(&vector))?;
+                let bytes = obkv.into_inner()?;
+                writer.insert(&key_buffer, bytes)?;
+            }
+        }
+    }
+
+    Ok(())
+}
+
+/// Compares two vectors by using the OrderingFloat helper.
+fn compare_vectors(a: &[f32], b: &[f32]) -> Ordering {
+    a.iter().copied().map(OrderedFloat).cmp(b.iter().copied().map(OrderedFloat))
+}
+
+/// Extracts the vectors from a JSON value.
+fn extract_vectors(value: &[u8], document_id: impl Fn() -> Value) -> Result<Option<Vec<Vec<f32>>>> {
+    match from_slice(value) {
+        Ok(vectors) => Ok(VectorOrArrayOfVectors::into_array_of_vectors(vectors)),
+        Err(_) => Err(UserError::InvalidVectorsType {
+            document_id: document_id(),
+            value: from_slice(value).map_err(InternalError::SerdeJson)?,
+        }
+        .into()),
+    }
+}
--- a/milli/src/update/index_documents/extract/extract_word_docids.rs
+++ b/milli/src/update/index_documents/extract/extract_word_docids.rs
@ -1,18 +1,20 @@
-use std::collections::HashSet;
+use std::collections::{BTreeSet, HashSet};
 use std::fs::File;
 use std::io::{self, BufReader};
-use std::iter::FromIterator;

-use roaring::RoaringBitmap;
+use heed::BytesDecode;
+use obkv::KvReaderU16;

 use super::helpers::{
-    create_sorter, merge_roaring_bitmaps, serialize_roaring_bitmap, sorter_into_reader,
-    try_split_array_at, GrenadParameters,
+    create_sorter, create_writer, merge_deladd_cbo_roaring_bitmaps, sorter_into_reader,
+    try_split_array_at, writer_into_reader, GrenadParameters,
 };
 use crate::error::SerializationError;
+use crate::heed_codec::StrBEU16Codec;
 use crate::index::db_name::DOCID_WORD_POSITIONS;
-use crate::update::index_documents::helpers::read_u32_ne_bytes;
-use crate::{relative_from_absolute_position, FieldId, Result};
+use crate::update::del_add::{is_noop_del_add_obkv, DelAdd, KvReaderDelAdd, KvWriterDelAdd};
+use crate::update::MergeFn;
+use crate::{DocumentId, FieldId, Result};

 /// Extracts the word and the documents ids where this word appear.
 ///
@ -26,65 +28,152 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(
    docid_word_positions: grenad::Reader<R>,
    indexer: GrenadParameters,
    exact_attributes: &HashSet<FieldId>,
-) -> Result<(grenad::Reader<BufReader<File>>, grenad::Reader<BufReader<File>>)> {
+) -> Result<(
+    grenad::Reader<BufReader<File>>,
+    grenad::Reader<BufReader<File>>,
+    grenad::Reader<BufReader<File>>,
+)> {
    puffin::profile_function!();

    let max_memory = indexer.max_memory_by_thread();

-    let mut word_docids_sorter = create_sorter(
+    let mut word_fid_docids_sorter = create_sorter(
        grenad::SortAlgorithm::Unstable,
-        merge_roaring_bitmaps,
+        merge_deladd_cbo_roaring_bitmaps,
        indexer.chunk_compression_type,
        indexer.chunk_compression_level,
        indexer.max_nb_chunks,
-        max_memory.map(|x| x / 2),
+        max_memory.map(|x| x / 3),
+    );
+    let mut key_buffer = Vec::new();
+    let mut del_words = BTreeSet::new();
+    let mut add_words = BTreeSet::new();
+    let mut cursor = docid_word_positions.into_cursor()?;
+    while let Some((key, value)) = cursor.move_on_next()? {
+        let (document_id_bytes, fid_bytes) = try_split_array_at(key)
+            .ok_or(SerializationError::Decoding { db_name: Some(DOCID_WORD_POSITIONS) })?;
+        let (fid_bytes, _) = try_split_array_at(fid_bytes)
+            .ok_or(SerializationError::Decoding { db_name: Some(DOCID_WORD_POSITIONS) })?;
+        let document_id = u32::from_be_bytes(document_id_bytes);
+        let fid = u16::from_be_bytes(fid_bytes);
+
+        let del_add_reader = KvReaderDelAdd::new(value);
+        // extract all unique words to remove.
+        if let Some(deletion) = del_add_reader.get(DelAdd::Deletion) {
+            for (_pos, word) in KvReaderU16::new(deletion).iter() {
+                del_words.insert(word.to_vec());
+            }
+        }
+
+        // extract all unique additional words.
+        if let Some(addition) = del_add_reader.get(DelAdd::Addition) {
+            for (_pos, word) in KvReaderU16::new(addition).iter() {
+                add_words.insert(word.to_vec());
+            }
+        }
+
+        words_into_sorter(
+            document_id,
+            fid,
+            &mut key_buffer,
+            &del_words,
+            &add_words,
+            &mut word_fid_docids_sorter,
+        )?;
+
+        del_words.clear();
+        add_words.clear();
+    }
+
+    let mut word_docids_sorter = create_sorter(
+        grenad::SortAlgorithm::Unstable,
+        merge_deladd_cbo_roaring_bitmaps,
+        indexer.chunk_compression_type,
+        indexer.chunk_compression_level,
+        indexer.max_nb_chunks,
+        max_memory.map(|x| x / 3),
    );

    let mut exact_word_docids_sorter = create_sorter(
        grenad::SortAlgorithm::Unstable,
-        merge_roaring_bitmaps,
+        merge_deladd_cbo_roaring_bitmaps,
        indexer.chunk_compression_type,
        indexer.chunk_compression_level,
        indexer.max_nb_chunks,
-        max_memory.map(|x| x / 2),
+        max_memory.map(|x| x / 3),
    );

-    let mut value_buffer = Vec::new();
-    let mut cursor = docid_word_positions.into_cursor()?;
-    while let Some((key, positions)) = cursor.move_on_next()? {
-        let (document_id_bytes, word_bytes) = try_split_array_at(key)
+    let mut word_fid_docids_writer = create_writer(
+        indexer.chunk_compression_type,
+        indexer.chunk_compression_level,
+        tempfile::tempfile()?,
+    );
+
+    let mut iter = word_fid_docids_sorter.into_stream_merger_iter()?;
+    // TODO: replace sorters by writers by accumulating values into a buffer before inserting them.
+    while let Some((key, value)) = iter.next()? {
+        // only keep the value if their is a change to apply in the DB.
+        if !is_noop_del_add_obkv(KvReaderDelAdd::new(value)) {
+            word_fid_docids_writer.insert(key, value)?;
+        }
+
+        let (word, fid) = StrBEU16Codec::bytes_decode(key)
            .ok_or(SerializationError::Decoding { db_name: Some(DOCID_WORD_POSITIONS) })?;
-        let document_id = u32::from_be_bytes(document_id_bytes);

-        let bitmap = RoaringBitmap::from_iter(Some(document_id));
-        serialize_roaring_bitmap(&bitmap, &mut value_buffer)?;
-
-        // If there are no exact attributes, we do not need to iterate over positions.
-        if exact_attributes.is_empty() {
-            word_docids_sorter.insert(word_bytes, &value_buffer)?;
+        // every words contained in an attribute set to exact must be pushed in the exact_words list.
+        if exact_attributes.contains(&fid) {
+            exact_word_docids_sorter.insert(word.as_bytes(), value)?;
        } else {
-            let mut added_to_exact = false;
-            let mut added_to_word_docids = false;
-            for position in read_u32_ne_bytes(positions) {
-                // as soon as we know that this word had been to both readers, we don't need to
-                // iterate over the positions.
-                if added_to_exact && added_to_word_docids {
-                    break;
-                }
-                let (fid, _) = relative_from_absolute_position(position);
-                if exact_attributes.contains(&fid) && !added_to_exact {
-                    exact_word_docids_sorter.insert(word_bytes, &value_buffer)?;
-                    added_to_exact = true;
-                } else if !added_to_word_docids {
-                    word_docids_sorter.insert(word_bytes, &value_buffer)?;
-                    added_to_word_docids = true;
-                }
-            }
+            word_docids_sorter.insert(word.as_bytes(), value)?;
        }
    }

    Ok((
        sorter_into_reader(word_docids_sorter, indexer)?,
        sorter_into_reader(exact_word_docids_sorter, indexer)?,
+        writer_into_reader(word_fid_docids_writer)?,
    ))
 }
+
+fn words_into_sorter(
+    document_id: DocumentId,
+    fid: FieldId,
+    key_buffer: &mut Vec<u8>,
+    del_words: &BTreeSet<Vec<u8>>,
+    add_words: &BTreeSet<Vec<u8>>,
+    word_fid_docids_sorter: &mut grenad::Sorter<MergeFn>,
+) -> Result<()> {
+    puffin::profile_function!();
+
+    use itertools::merge_join_by;
+    use itertools::EitherOrBoth::{Both, Left, Right};
+
+    let mut buffer = Vec::new();
+    for eob in merge_join_by(del_words.iter(), add_words.iter(), |d, a| d.cmp(a)) {
+        buffer.clear();
+        let mut value_writer = KvWriterDelAdd::new(&mut buffer);
+        let word_bytes = match eob {
+            Left(word_bytes) => {
+                value_writer.insert(DelAdd::Deletion, document_id.to_ne_bytes()).unwrap();
+                word_bytes
+            }
+            Right(word_bytes) => {
+                value_writer.insert(DelAdd::Addition, document_id.to_ne_bytes()).unwrap();
+                word_bytes
+            }
+            Both(word_bytes, _) => {
+                value_writer.insert(DelAdd::Deletion, document_id.to_ne_bytes()).unwrap();
+                value_writer.insert(DelAdd::Addition, document_id.to_ne_bytes()).unwrap();
+                word_bytes
+            }
+        };
+
+        key_buffer.clear();
+        key_buffer.extend_from_slice(word_bytes);
+        key_buffer.push(0);
+        key_buffer.extend_from_slice(&fid.to_be_bytes());
+        word_fid_docids_sorter.insert(&key_buffer, value_writer.into_inner().unwrap())?;
+    }
+
+    Ok(())
+}
--- a/milli/src/update/index_documents/extract/extract_word_fid_docids.rs
+++ b/milli/src/update/index_documents/extract/extract_word_fid_docids.rs
@ -1,51 +0,0 @@
-use std::fs::File;
-use std::io::{self, BufReader};
-
-use super::helpers::{
-    create_sorter, merge_cbo_roaring_bitmaps, read_u32_ne_bytes, sorter_into_reader,
-    try_split_array_at, GrenadParameters,
-};
-use crate::error::SerializationError;
-use crate::index::db_name::DOCID_WORD_POSITIONS;
-use crate::{relative_from_absolute_position, DocumentId, Result};
-
-/// Extracts the word, field id, and the documents ids where this word appear at this field id.
-#[logging_timer::time]
-pub fn extract_word_fid_docids<R: io::Read + io::Seek>(
-    docid_word_positions: grenad::Reader<R>,
-    indexer: GrenadParameters,
-) -> Result<grenad::Reader<BufReader<File>>> {
-    puffin::profile_function!();
-
-    let max_memory = indexer.max_memory_by_thread();
-
-    let mut word_fid_docids_sorter = create_sorter(
-        grenad::SortAlgorithm::Unstable,
-        merge_cbo_roaring_bitmaps,
-        indexer.chunk_compression_type,
-        indexer.chunk_compression_level,
-        indexer.max_nb_chunks,
-        max_memory,
-    );
-
-    let mut key_buffer = Vec::new();
-    let mut cursor = docid_word_positions.into_cursor()?;
-    while let Some((key, value)) = cursor.move_on_next()? {
-        let (document_id_bytes, word_bytes) = try_split_array_at(key)
-            .ok_or(SerializationError::Decoding { db_name: Some(DOCID_WORD_POSITIONS) })?;
-        let document_id = DocumentId::from_be_bytes(document_id_bytes);
-
-        for position in read_u32_ne_bytes(value) {
-            key_buffer.clear();
-            key_buffer.extend_from_slice(word_bytes);
-            key_buffer.push(0);
-            let (fid, _) = relative_from_absolute_position(position);
-            key_buffer.extend_from_slice(&fid.to_be_bytes());
-            word_fid_docids_sorter.insert(&key_buffer, document_id.to_ne_bytes())?;
-        }
-    }
-
-    let word_fid_docids_reader = sorter_into_reader(word_fid_docids_sorter, indexer)?;
-
-    Ok(word_fid_docids_reader)
-}
--- a/milli/src/update/index_documents/extract/extract_word_pair_proximity_docids.rs
+++ b/milli/src/update/index_documents/extract/extract_word_pair_proximity_docids.rs
@ -1,16 +1,18 @@
-use std::cmp::Ordering;
-use std::collections::{BinaryHeap, HashMap};
+use std::collections::{BTreeMap, VecDeque};
 use std::fs::File;
 use std::io::BufReader;
-use std::{cmp, io, mem, str, vec};
+use std::{cmp, io};
+
+use obkv::KvReaderU16;

 use super::helpers::{
-    create_sorter, merge_cbo_roaring_bitmaps, read_u32_ne_bytes, sorter_into_reader,
-    try_split_array_at, GrenadParameters, MergeFn,
+    create_sorter, create_writer, merge_deladd_cbo_roaring_bitmaps, try_split_array_at,
+    writer_into_reader, GrenadParameters, MergeFn,
 };
 use crate::error::SerializationError;
 use crate::index::db_name::DOCID_WORD_POSITIONS;
-use crate::proximity::{positions_proximity, MAX_DISTANCE};
+use crate::proximity::{index_proximity, MAX_DISTANCE};
+use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd};
 use crate::{DocumentId, Result};

 /// Extracts the best proximity between pairs of words and the documents ids where this pair appear.
@ -26,58 +28,137 @@ pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(

    let max_memory = indexer.max_memory_by_thread();

-    let mut word_pair_proximity_docids_sorter = create_sorter(
-        grenad::SortAlgorithm::Unstable,
-        merge_cbo_roaring_bitmaps,
-        indexer.chunk_compression_type,
-        indexer.chunk_compression_level,
-        indexer.max_nb_chunks,
-        max_memory.map(|m| m / 2),
-    );
+    let mut word_pair_proximity_docids_sorters: Vec<_> = (1..MAX_DISTANCE)
+        .map(|_| {
+            create_sorter(
+                grenad::SortAlgorithm::Unstable,
+                merge_deladd_cbo_roaring_bitmaps,
+                indexer.chunk_compression_type,
+                indexer.chunk_compression_level,
+                indexer.max_nb_chunks,
+                max_memory.map(|m| m / MAX_DISTANCE as usize),
+            )
+        })
+        .collect();

-    // This map is assumed to not consume a lot of memory.
-    let mut document_word_positions_heap = BinaryHeap::new();
+    let mut del_word_positions: VecDeque<(String, u16)> =
+        VecDeque::with_capacity(MAX_DISTANCE as usize);
+    let mut add_word_positions: VecDeque<(String, u16)> =
+        VecDeque::with_capacity(MAX_DISTANCE as usize);
+    let mut del_word_pair_proximity = BTreeMap::new();
+    let mut add_word_pair_proximity = BTreeMap::new();
    let mut current_document_id = None;

    let mut cursor = docid_word_positions.into_cursor()?;
    while let Some((key, value)) = cursor.move_on_next()? {
-        let (document_id_bytes, word_bytes) = try_split_array_at(key)
+        let (document_id_bytes, _fid_bytes) = try_split_array_at(key)
            .ok_or(SerializationError::Decoding { db_name: Some(DOCID_WORD_POSITIONS) })?;
        let document_id = u32::from_be_bytes(document_id_bytes);
-        let word = str::from_utf8(word_bytes)?;

-        let curr_document_id = *current_document_id.get_or_insert(document_id);
-        if curr_document_id != document_id {
-            let document_word_positions_heap = mem::take(&mut document_word_positions_heap);
+        // if we change document, we fill the sorter
+        if current_document_id.map_or(false, |id| id != document_id) {
+            puffin::profile_scope!("Document into sorter");
+
            document_word_positions_into_sorter(
-                curr_document_id,
-                document_word_positions_heap,
-                &mut word_pair_proximity_docids_sorter,
+                current_document_id.unwrap(),
+                &del_word_pair_proximity,
+                &add_word_pair_proximity,
+                &mut word_pair_proximity_docids_sorters,
            )?;
-            current_document_id = Some(document_id);
+            del_word_pair_proximity.clear();
+            add_word_pair_proximity.clear();
        }

-        let word = word.to_string();
-        let mut positions: Vec<_> = read_u32_ne_bytes(value).collect();
-        positions.sort_unstable();
-        let mut iter = positions.into_iter();
-        if let Some(position) = iter.next() {
-            document_word_positions_heap.push(PeekedWordPosition { word, position, iter });
-        }
+        current_document_id = Some(document_id);
+
+        let (del, add): (Result<_>, Result<_>) = rayon::join(
+            || {
+                // deletions
+                if let Some(deletion) = KvReaderDelAdd::new(value).get(DelAdd::Deletion) {
+                    for (position, word) in KvReaderU16::new(deletion).iter() {
+                        // drain the proximity window until the head word is considered close to the word we are inserting.
+                        while del_word_positions.get(0).map_or(false, |(_w, p)| {
+                            index_proximity(*p as u32, position as u32) >= MAX_DISTANCE
+                        }) {
+                            word_positions_into_word_pair_proximity(
+                                &mut del_word_positions,
+                                &mut del_word_pair_proximity,
+                            )?;
+                        }
+
+                        // insert the new word.
+                        let word = std::str::from_utf8(word)?;
+                        del_word_positions.push_back((word.to_string(), position));
+                    }
+
+                    while !del_word_positions.is_empty() {
+                        word_positions_into_word_pair_proximity(
+                            &mut del_word_positions,
+                            &mut del_word_pair_proximity,
+                        )?;
+                    }
+                }
+
+                Ok(())
+            },
+            || {
+                // additions
+                if let Some(addition) = KvReaderDelAdd::new(value).get(DelAdd::Addition) {
+                    for (position, word) in KvReaderU16::new(addition).iter() {
+                        // drain the proximity window until the head word is considered close to the word we are inserting.
+                        while add_word_positions.get(0).map_or(false, |(_w, p)| {
+                            index_proximity(*p as u32, position as u32) >= MAX_DISTANCE
+                        }) {
+                            word_positions_into_word_pair_proximity(
+                                &mut add_word_positions,
+                                &mut add_word_pair_proximity,
+                            )?;
+                        }
+
+                        // insert the new word.
+                        let word = std::str::from_utf8(word)?;
+                        add_word_positions.push_back((word.to_string(), position));
+                    }
+
+                    while !add_word_positions.is_empty() {
+                        word_positions_into_word_pair_proximity(
+                            &mut add_word_positions,
+                            &mut add_word_pair_proximity,
+                        )?;
+                    }
+                }
+
+                Ok(())
+            },
+        );
+
+        del?;
+        add?;
    }

    if let Some(document_id) = current_document_id {
-        // We must make sure that don't lose the current document field id
-        // word count map if we break because we reached the end of the chunk.
-        let document_word_positions_heap = mem::take(&mut document_word_positions_heap);
+        puffin::profile_scope!("Final document into sorter");
        document_word_positions_into_sorter(
            document_id,
-            document_word_positions_heap,
-            &mut word_pair_proximity_docids_sorter,
+            &del_word_pair_proximity,
+            &add_word_pair_proximity,
+            &mut word_pair_proximity_docids_sorters,
        )?;
    }
+    {
+        puffin::profile_scope!("sorter_into_reader");
+        let mut writer = create_writer(
+            indexer.chunk_compression_type,
+            indexer.chunk_compression_level,
+            tempfile::tempfile()?,
+        );

-    sorter_into_reader(word_pair_proximity_docids_sorter, indexer)
+        for sorter in word_pair_proximity_docids_sorters {
+            sorter.write_into_stream_writer(&mut writer)?;
+        }
+
+        writer_into_reader(writer)
+    }
 }

 /// Fills the list of all pairs of words with the shortest proximity between 1 and 7 inclusive.
@ -86,96 +167,66 @@ pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
 /// close to each other.
 fn document_word_positions_into_sorter(
    document_id: DocumentId,
-    mut word_positions_heap: BinaryHeap<PeekedWordPosition<vec::IntoIter<u32>>>,
-    word_pair_proximity_docids_sorter: &mut grenad::Sorter<MergeFn>,
+    del_word_pair_proximity: &BTreeMap<(String, String), u8>,
+    add_word_pair_proximity: &BTreeMap<(String, String), u8>,
+    word_pair_proximity_docids_sorters: &mut [grenad::Sorter<MergeFn>],
 ) -> Result<()> {
-    let mut word_pair_proximity = HashMap::new();
-    let mut ordered_peeked_word_positions = Vec::new();
-    while !word_positions_heap.is_empty() {
-        while let Some(peeked_word_position) = word_positions_heap.pop() {
-            ordered_peeked_word_positions.push(peeked_word_position);
-            if ordered_peeked_word_positions.len() == 7 {
-                break;
-            }
-        }
-
-        if let Some((head, tail)) = ordered_peeked_word_positions.split_first() {
-            for PeekedWordPosition { word, position, .. } in tail {
-                let prox = positions_proximity(head.position, *position);
-                if prox > 0 && prox < MAX_DISTANCE {
-                    word_pair_proximity
-                        .entry((head.word.clone(), word.clone()))
-                        .and_modify(|p| {
-                            *p = cmp::min(*p, prox);
-                        })
-                        .or_insert(prox);
-                }
-            }
-
-            // Push the tail in the heap.
-            let tail_iter = ordered_peeked_word_positions.drain(1..);
-            word_positions_heap.extend(tail_iter);
-
-            // Advance the head and push it in the heap.
-            if let Some(mut head) = ordered_peeked_word_positions.pop() {
-                if let Some(next_position) = head.iter.next() {
-                    let prox = positions_proximity(head.position, next_position);
-
-                    if prox > 0 && prox < MAX_DISTANCE {
-                        word_pair_proximity
-                            .entry((head.word.clone(), head.word.clone()))
-                            .and_modify(|p| {
-                                *p = cmp::min(*p, prox);
-                            })
-                            .or_insert(prox);
-                    }
-
-                    word_positions_heap.push(PeekedWordPosition {
-                        word: head.word,
-                        position: next_position,
-                        iter: head.iter,
-                    });
-                }
-            }
-        }
-    }
+    use itertools::merge_join_by;
+    use itertools::EitherOrBoth::{Both, Left, Right};

+    let mut buffer = Vec::new();
    let mut key_buffer = Vec::new();
-    for ((w1, w2), prox) in word_pair_proximity {
+    for eob in
+        merge_join_by(del_word_pair_proximity.iter(), add_word_pair_proximity.iter(), |d, a| {
+            d.cmp(a)
+        })
+    {
+        buffer.clear();
+        let mut value_writer = KvWriterDelAdd::new(&mut buffer);
+        let ((w1, w2), prox) = match eob {
+            Left(key_value) => {
+                value_writer.insert(DelAdd::Deletion, document_id.to_ne_bytes()).unwrap();
+                key_value
+            }
+            Right(key_value) => {
+                value_writer.insert(DelAdd::Addition, document_id.to_ne_bytes()).unwrap();
+                key_value
+            }
+            Both(key_value, _) => {
+                value_writer.insert(DelAdd::Deletion, document_id.to_ne_bytes()).unwrap();
+                value_writer.insert(DelAdd::Addition, document_id.to_ne_bytes()).unwrap();
+                key_value
+            }
+        };
+
        key_buffer.clear();
-        key_buffer.push(prox as u8);
+        key_buffer.push(*prox);
        key_buffer.extend_from_slice(w1.as_bytes());
        key_buffer.push(0);
        key_buffer.extend_from_slice(w2.as_bytes());

-        word_pair_proximity_docids_sorter.insert(&key_buffer, document_id.to_ne_bytes())?;
+        word_pair_proximity_docids_sorters[*prox as usize - 1]
+            .insert(&key_buffer, value_writer.into_inner().unwrap())?;
    }

    Ok(())
 }

-struct PeekedWordPosition<I> {
-    word: String,
-    position: u32,
-    iter: I,
-}
-
-impl<I> Ord for PeekedWordPosition<I> {
-    fn cmp(&self, other: &Self) -> Ordering {
-        self.position.cmp(&other.position).reverse()
-    }
-}
-
-impl<I> PartialOrd for PeekedWordPosition<I> {
-    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
-        Some(self.cmp(other))
-    }
-}
-
-impl<I> Eq for PeekedWordPosition<I> {}
-
-impl<I> PartialEq for PeekedWordPosition<I> {
-    fn eq(&self, other: &Self) -> bool {
-        self.position == other.position
+fn word_positions_into_word_pair_proximity(
+    word_positions: &mut VecDeque<(String, u16)>,
+    word_pair_proximity: &mut BTreeMap<(String, String), u8>,
+) -> Result<()> {
+    let (head_word, head_position) = word_positions.pop_front().unwrap();
+    for (word, position) in word_positions.iter() {
+        let prox = index_proximity(head_position as u32, *position as u32) as u8;
+        if prox > 0 && prox < MAX_DISTANCE as u8 {
+            word_pair_proximity
+                .entry((head_word.clone(), word.clone()))
+                .and_modify(|p| {
+                    *p = cmp::min(*p, prox);
+                })
+                .or_insert(prox);
+        }
    }
+    Ok(())
 }
--- a/milli/src/update/index_documents/extract/extract_word_position_docids.rs
+++ b/milli/src/update/index_documents/extract/extract_word_position_docids.rs
@ -1,13 +1,18 @@
+use std::collections::BTreeSet;
 use std::fs::File;
 use std::io::{self, BufReader};

+use obkv::KvReaderU16;
+
 use super::helpers::{
-    create_sorter, merge_cbo_roaring_bitmaps, read_u32_ne_bytes, sorter_into_reader,
-    try_split_array_at, GrenadParameters,
+    create_sorter, merge_deladd_cbo_roaring_bitmaps, sorter_into_reader, try_split_array_at,
+    GrenadParameters,
 };
 use crate::error::SerializationError;
 use crate::index::db_name::DOCID_WORD_POSITIONS;
-use crate::{bucketed_position, relative_from_absolute_position, DocumentId, Result};
+use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd};
+use crate::update::MergeFn;
+use crate::{bucketed_position, DocumentId, Result};

 /// Extracts the word positions and the documents ids where this word appear.
 ///
@ -24,32 +29,111 @@ pub fn extract_word_position_docids<R: io::Read + io::Seek>(

    let mut word_position_docids_sorter = create_sorter(
        grenad::SortAlgorithm::Unstable,
-        merge_cbo_roaring_bitmaps,
+        merge_deladd_cbo_roaring_bitmaps,
        indexer.chunk_compression_type,
        indexer.chunk_compression_level,
        indexer.max_nb_chunks,
        max_memory,
    );

+    let mut del_word_positions: BTreeSet<(u16, Vec<u8>)> = BTreeSet::new();
+    let mut add_word_positions: BTreeSet<(u16, Vec<u8>)> = BTreeSet::new();
+    let mut current_document_id: Option<u32> = None;
    let mut key_buffer = Vec::new();
    let mut cursor = docid_word_positions.into_cursor()?;
    while let Some((key, value)) = cursor.move_on_next()? {
-        let (document_id_bytes, word_bytes) = try_split_array_at(key)
+        let (document_id_bytes, _fid_bytes) = try_split_array_at(key)
            .ok_or(SerializationError::Decoding { db_name: Some(DOCID_WORD_POSITIONS) })?;
        let document_id = DocumentId::from_be_bytes(document_id_bytes);

-        for position in read_u32_ne_bytes(value) {
-            key_buffer.clear();
-            key_buffer.extend_from_slice(word_bytes);
-            key_buffer.push(0);
-            let (_, position) = relative_from_absolute_position(position);
-            let position = bucketed_position(position);
-            key_buffer.extend_from_slice(&position.to_be_bytes());
-            word_position_docids_sorter.insert(&key_buffer, document_id.to_ne_bytes())?;
+        if current_document_id.map_or(false, |id| document_id != id) {
+            words_position_into_sorter(
+                current_document_id.unwrap(),
+                &mut key_buffer,
+                &del_word_positions,
+                &add_word_positions,
+                &mut word_position_docids_sorter,
+            )?;
+            del_word_positions.clear();
+            add_word_positions.clear();
+        }
+
+        current_document_id = Some(document_id);
+
+        let del_add_reader = KvReaderDelAdd::new(value);
+        // extract all unique words to remove.
+        if let Some(deletion) = del_add_reader.get(DelAdd::Deletion) {
+            for (position, word_bytes) in KvReaderU16::new(deletion).iter() {
+                let position = bucketed_position(position);
+                del_word_positions.insert((position, word_bytes.to_vec()));
+            }
+        }
+
+        // extract all unique additional words.
+        if let Some(addition) = del_add_reader.get(DelAdd::Addition) {
+            for (position, word_bytes) in KvReaderU16::new(addition).iter() {
+                let position = bucketed_position(position);
+                add_word_positions.insert((position, word_bytes.to_vec()));
+            }
        }
    }

+    if let Some(document_id) = current_document_id {
+        words_position_into_sorter(
+            document_id,
+            &mut key_buffer,
+            &del_word_positions,
+            &add_word_positions,
+            &mut word_position_docids_sorter,
+        )?;
+    }
+
+    // TODO remove noop DelAdd OBKV
    let word_position_docids_reader = sorter_into_reader(word_position_docids_sorter, indexer)?;

    Ok(word_position_docids_reader)
 }
+
+fn words_position_into_sorter(
+    document_id: DocumentId,
+    key_buffer: &mut Vec<u8>,
+    del_word_positions: &BTreeSet<(u16, Vec<u8>)>,
+    add_word_positions: &BTreeSet<(u16, Vec<u8>)>,
+    word_position_docids_sorter: &mut grenad::Sorter<MergeFn>,
+) -> Result<()> {
+    puffin::profile_function!();
+
+    use itertools::merge_join_by;
+    use itertools::EitherOrBoth::{Both, Left, Right};
+
+    let mut buffer = Vec::new();
+    for eob in merge_join_by(del_word_positions.iter(), add_word_positions.iter(), |d, a| d.cmp(a))
+    {
+        buffer.clear();
+        let mut value_writer = KvWriterDelAdd::new(&mut buffer);
+        let (position, word_bytes) = match eob {
+            Left(key) => {
+                value_writer.insert(DelAdd::Deletion, document_id.to_ne_bytes()).unwrap();
+                key
+            }
+            Right(key) => {
+                value_writer.insert(DelAdd::Addition, document_id.to_ne_bytes()).unwrap();
+                key
+            }
+            Both(key, _) => {
+                // both values needs to be kept because it will be used in other extractors.
+                value_writer.insert(DelAdd::Deletion, document_id.to_ne_bytes()).unwrap();
+                value_writer.insert(DelAdd::Addition, document_id.to_ne_bytes()).unwrap();
+                key
+            }
+        };
+
+        key_buffer.clear();
+        key_buffer.extend_from_slice(word_bytes);
+        key_buffer.push(0);
+        key_buffer.extend_from_slice(&position.to_be_bytes());
+        word_position_docids_sorter.insert(&key_buffer, value_writer.into_inner().unwrap())?;
+    }
+
+    Ok(())
+}
--- a/milli/src/update/index_documents/extract/mod.rs
+++ b/milli/src/update/index_documents/extract/mod.rs
@ -6,7 +6,6 @@ mod extract_fid_word_count_docids;
 mod extract_geo_points;
 mod extract_vector_points;
 mod extract_word_docids;
-mod extract_word_fid_docids;
 mod extract_word_pair_proximity_docids;
 mod extract_word_position_docids;

@ -26,12 +25,11 @@ use self::extract_fid_word_count_docids::extract_fid_word_count_docids;
 use self::extract_geo_points::extract_geo_points;
 use self::extract_vector_points::extract_vector_points;
 use self::extract_word_docids::extract_word_docids;
-use self::extract_word_fid_docids::extract_word_fid_docids;
 use self::extract_word_pair_proximity_docids::extract_word_pair_proximity_docids;
 use self::extract_word_position_docids::extract_word_position_docids;
 use super::helpers::{
-    as_cloneable_grenad, merge_cbo_roaring_bitmaps, merge_roaring_bitmaps, CursorClonableMmap,
-    GrenadParameters, MergeFn, MergeableReader,
+    as_cloneable_grenad, merge_deladd_cbo_roaring_bitmaps, CursorClonableMmap, GrenadParameters,
+    MergeFn, MergeableReader,
 };
 use super::{helpers, TypedChunk};
 use crate::{FieldId, Result};
@ -65,7 +63,6 @@ pub(crate) fn data_from_obkv_documents(
                indexer,
                lmdb_writer_sx.clone(),
                vectors_field_id,
-                primary_key_id,
            )
        })
        .collect::<Result<()>>()?;
@ -94,9 +91,9 @@ pub(crate) fn data_from_obkv_documents(
    let (
        docid_word_positions_chunks,
        (
-            docid_fid_facet_numbers_chunks,
+            fid_docid_facet_numbers_chunks,
            (
-                docid_fid_facet_strings_chunks,
+                fid_docid_facet_strings_chunks,
                (
                    facet_is_null_docids_chunks,
                    (facet_is_empty_docids_chunks, facet_exists_docids_chunks),
@ -110,7 +107,7 @@ pub(crate) fn data_from_obkv_documents(
        let lmdb_writer_sx = lmdb_writer_sx.clone();
        rayon::spawn(move || {
            debug!("merge {} database", "facet-id-exists-docids");
-            match facet_exists_docids_chunks.merge(merge_cbo_roaring_bitmaps, &indexer) {
+            match facet_exists_docids_chunks.merge(merge_deladd_cbo_roaring_bitmaps, &indexer) {
                Ok(reader) => {
                    let _ = lmdb_writer_sx.send(Ok(TypedChunk::FieldIdFacetExistsDocids(reader)));
                }
@ -126,7 +123,7 @@ pub(crate) fn data_from_obkv_documents(
        let lmdb_writer_sx = lmdb_writer_sx.clone();
        rayon::spawn(move || {
            debug!("merge {} database", "facet-id-is-null-docids");
-            match facet_is_null_docids_chunks.merge(merge_cbo_roaring_bitmaps, &indexer) {
+            match facet_is_null_docids_chunks.merge(merge_deladd_cbo_roaring_bitmaps, &indexer) {
                Ok(reader) => {
                    let _ = lmdb_writer_sx.send(Ok(TypedChunk::FieldIdFacetIsNullDocids(reader)));
                }
@ -142,7 +139,7 @@ pub(crate) fn data_from_obkv_documents(
        let lmdb_writer_sx = lmdb_writer_sx.clone();
        rayon::spawn(move || {
            debug!("merge {} database", "facet-id-is-empty-docids");
-            match facet_is_empty_docids_chunks.merge(merge_cbo_roaring_bitmaps, &indexer) {
+            match facet_is_empty_docids_chunks.merge(merge_deladd_cbo_roaring_bitmaps, &indexer) {
                Ok(reader) => {
                    let _ = lmdb_writer_sx.send(Ok(TypedChunk::FieldIdFacetIsEmptyDocids(reader)));
                }
@ -158,7 +155,7 @@ pub(crate) fn data_from_obkv_documents(
        indexer,
        lmdb_writer_sx.clone(),
        extract_word_pair_proximity_docids,
-        merge_cbo_roaring_bitmaps,
+        merge_deladd_cbo_roaring_bitmaps,
        TypedChunk::WordPairProximityDocids,
        "word-pair-proximity-docids",
    );
@ -168,24 +165,31 @@ pub(crate) fn data_from_obkv_documents(
        indexer,
        lmdb_writer_sx.clone(),
        extract_fid_word_count_docids,
-        merge_cbo_roaring_bitmaps,
-        TypedChunk::FieldIdWordcountDocids,
+        merge_deladd_cbo_roaring_bitmaps,
+        TypedChunk::FieldIdWordCountDocids,
        "field-id-wordcount-docids",
    );

    spawn_extraction_task::<
        _,
        _,
-        Vec<(grenad::Reader<BufReader<File>>, grenad::Reader<BufReader<File>>)>,
+        Vec<(
+            grenad::Reader<BufReader<File>>,
+            grenad::Reader<BufReader<File>>,
+            grenad::Reader<BufReader<File>>,
+        )>,
    >(
        docid_word_positions_chunks.clone(),
        indexer,
        lmdb_writer_sx.clone(),
        move |doc_word_pos, indexer| extract_word_docids(doc_word_pos, indexer, &exact_attributes),
-        merge_roaring_bitmaps,
-        |(word_docids_reader, exact_word_docids_reader)| TypedChunk::WordDocids {
-            word_docids_reader,
-            exact_word_docids_reader,
+        merge_deladd_cbo_roaring_bitmaps,
+        |(word_docids_reader, exact_word_docids_reader, word_fid_docids_reader)| {
+            TypedChunk::WordDocids {
+                word_docids_reader,
+                exact_word_docids_reader,
+                word_fid_docids_reader,
+            }
        },
        "word-docids",
    );
@ -195,36 +199,27 @@ pub(crate) fn data_from_obkv_documents(
        indexer,
        lmdb_writer_sx.clone(),
        extract_word_position_docids,
-        merge_cbo_roaring_bitmaps,
+        merge_deladd_cbo_roaring_bitmaps,
        TypedChunk::WordPositionDocids,
        "word-position-docids",
    );
-    spawn_extraction_task::<_, _, Vec<grenad::Reader<BufReader<File>>>>(
-        docid_word_positions_chunks,
-        indexer,
-        lmdb_writer_sx.clone(),
-        extract_word_fid_docids,
-        merge_cbo_roaring_bitmaps,
-        TypedChunk::WordFidDocids,
-        "word-fid-docids",
-    );

    spawn_extraction_task::<_, _, Vec<grenad::Reader<BufReader<File>>>>(
-        docid_fid_facet_strings_chunks,
+        fid_docid_facet_strings_chunks,
        indexer,
        lmdb_writer_sx.clone(),
        extract_facet_string_docids,
-        merge_cbo_roaring_bitmaps,
+        merge_deladd_cbo_roaring_bitmaps,
        TypedChunk::FieldIdFacetStringDocids,
        "field-id-facet-string-docids",
    );

    spawn_extraction_task::<_, _, Vec<grenad::Reader<BufReader<File>>>>(
-        docid_fid_facet_numbers_chunks,
+        fid_docid_facet_numbers_chunks,
        indexer,
        lmdb_writer_sx,
        extract_facet_number_docids,
-        merge_cbo_roaring_bitmaps,
+        merge_deladd_cbo_roaring_bitmaps,
        TypedChunk::FieldIdFacetNumberDocids,
        "field-id-facet-number-docids",
    );
@ -278,7 +273,6 @@ fn send_original_documents_data(
    indexer: GrenadParameters,
    lmdb_writer_sx: Sender<Result<TypedChunk>>,
    vectors_field_id: Option<FieldId>,
-    primary_key_id: FieldId,
 ) -> Result<()> {
    let original_documents_chunk =
        original_documents_chunk.and_then(|c| unsafe { as_cloneable_grenad(&c) })?;
@ -287,12 +281,7 @@ fn send_original_documents_data(
        let documents_chunk_cloned = original_documents_chunk.clone();
        let lmdb_writer_sx_cloned = lmdb_writer_sx.clone();
        rayon::spawn(move || {
-            let result = extract_vector_points(
-                documents_chunk_cloned,
-                indexer,
-                primary_key_id,
-                vectors_field_id,
-            );
+            let result = extract_vector_points(documents_chunk_cloned, indexer, vectors_field_id);
            let _ = match result {
                Ok(vector_points) => {
                    lmdb_writer_sx_cloned.send(Ok(TypedChunk::VectorPoints(vector_points)))
@ -356,10 +345,10 @@ fn send_and_extract_flattened_documents_data(
        });
    }

-    let (docid_word_positions_chunk, docid_fid_facet_values_chunks): (Result<_>, Result<_>) =
+    let (docid_word_positions_chunk, fid_docid_facet_values_chunks): (Result<_>, Result<_>) =
        rayon::join(
            || {
-                let (documents_ids, docid_word_positions_chunk, script_language_pair) =
+                let (docid_word_positions_chunk, script_language_pair) =
                    extract_docid_word_positions(
                        flattened_documents_chunk.clone(),
                        indexer,
@ -370,9 +359,6 @@ fn send_and_extract_flattened_documents_data(
                        max_positions_per_attributes,
                    )?;

-                // send documents_ids to DB writer
-                let _ = lmdb_writer_sx.send(Ok(TypedChunk::NewDocumentsIds(documents_ids)));
-
                // send docid_word_positions_chunk to DB writer
                let docid_word_positions_chunk =
                    unsafe { as_cloneable_grenad(&docid_word_positions_chunk)? };
@ -384,8 +370,8 @@ fn send_and_extract_flattened_documents_data(
            },
            || {
                let ExtractedFacetValues {
-                    docid_fid_facet_numbers_chunk,
-                    docid_fid_facet_strings_chunk,
+                    fid_docid_facet_numbers_chunk,
+                    fid_docid_facet_strings_chunk,
                    fid_facet_is_null_docids_chunk,
                    fid_facet_is_empty_docids_chunk,
                    fid_facet_exists_docids_chunk,
@ -396,26 +382,26 @@ fn send_and_extract_flattened_documents_data(
                    geo_fields_ids,
                )?;

-                // send docid_fid_facet_numbers_chunk to DB writer
-                let docid_fid_facet_numbers_chunk =
-                    unsafe { as_cloneable_grenad(&docid_fid_facet_numbers_chunk)? };
+                // send fid_docid_facet_numbers_chunk to DB writer
+                let fid_docid_facet_numbers_chunk =
+                    unsafe { as_cloneable_grenad(&fid_docid_facet_numbers_chunk)? };

                let _ = lmdb_writer_sx.send(Ok(TypedChunk::FieldIdDocidFacetNumbers(
-                    docid_fid_facet_numbers_chunk.clone(),
+                    fid_docid_facet_numbers_chunk.clone(),
                )));

-                // send docid_fid_facet_strings_chunk to DB writer
-                let docid_fid_facet_strings_chunk =
-                    unsafe { as_cloneable_grenad(&docid_fid_facet_strings_chunk)? };
+                // send fid_docid_facet_strings_chunk to DB writer
+                let fid_docid_facet_strings_chunk =
+                    unsafe { as_cloneable_grenad(&fid_docid_facet_strings_chunk)? };

                let _ = lmdb_writer_sx.send(Ok(TypedChunk::FieldIdDocidFacetStrings(
-                    docid_fid_facet_strings_chunk.clone(),
+                    fid_docid_facet_strings_chunk.clone(),
                )));

                Ok((
-                    docid_fid_facet_numbers_chunk,
+                    fid_docid_facet_numbers_chunk,
                    (
-                        docid_fid_facet_strings_chunk,
+                        fid_docid_facet_strings_chunk,
                        (
                            fid_facet_is_null_docids_chunk,
                            (fid_facet_is_empty_docids_chunk, fid_facet_exists_docids_chunk),
@ -425,5 +411,5 @@ fn send_and_extract_flattened_documents_data(
            },
        );

-    Ok((docid_word_positions_chunk?, docid_fid_facet_values_chunks?))
+    Ok((docid_word_positions_chunk?, fid_docid_facet_values_chunks?))
 }
--- a/Show More
+++ b/Show More