Accept the max readers param by env var and increase it

Merge #5159
5159: Fix the New Indexer Spilling r=irevoire a=Kerollmops Fix two bugs in the merging of the spilled caches. Thanks to `@ManyTheFish` and `@irevoire` 👏 Co-authored-by: Kerollmops <clement@meilisearch.com> Co-authored-by: ManyTheFish <many@meilisearch.com>
2025-07-19 21:10:34 +00:00 · 2025-01-28 14:48:01 +01:00 · 2024-12-12 17:16:53 +00:00 · 2024-12-12 18:12:00 +01:00 · 2024-12-12 16:15:37 +01:00 · 2024-12-12 14:14:44 +00:00
70 changed files with 1669 additions and 722 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -706,6 +706,20 @@ dependencies = [
 "serde",
 ]

+[[package]]
+name = "bumparaw-collections"
+version = "0.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4ce682bdc86c2e25ef5cd95881d9d6a1902214eddf74cf9ffea88fe1464377e8"
+dependencies = [
+ "allocator-api2",
+ "bitpacking",
+ "bumpalo",
+ "hashbrown 0.15.1",
+ "serde",
+ "serde_json",
+]
+
 [[package]]
 name = "byte-unit"
 version = "5.1.4"
@ -2617,6 +2631,8 @@ dependencies = [
 "big_s",
 "bincode",
 "bumpalo",
+ "bumparaw-collections",
+ "convert_case 0.6.0",
 "crossbeam-channel",
 "csv",
 "derive_builder 0.20.0",
@ -2631,7 +2647,6 @@ dependencies = [
 "meilisearch-types",
 "memmap2",
 "page_size",
- "raw-collections",
 "rayon",
 "roaring",
 "serde",
@ -2647,12 +2662,12 @@ dependencies = [

 [[package]]
 name = "indexmap"
-version = "2.2.6"
+version = "2.7.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "168fb715dda47215e360912c096649d23d58bf392ac62f73919e831745e40f26"
+checksum = "62f822373a4fe84d4bb149bf54e584a7f4abec90e072ed49cda0edea5b95471f"
 dependencies = [
 "equivalent",
- "hashbrown 0.14.3",
+ "hashbrown 0.15.1",
 "serde",
 ]

@ -3549,6 +3564,7 @@ dependencies = [
 "actix-web",
 "anyhow",
 "bumpalo",
+ "bumparaw-collections",
 "convert_case 0.6.0",
 "csv",
 "deserr",
@ -3561,8 +3577,8 @@ dependencies = [
 "meili-snap",
 "memmap2",
 "milli",
- "raw-collections",
 "roaring",
+ "rustc-hash 2.1.0",
 "serde",
 "serde-cs",
 "serde_json",
@ -3583,9 +3599,12 @@ dependencies = [
 "clap",
 "dump",
 "file-store",
+ "indexmap",
 "meilisearch-auth",
 "meilisearch-types",
 "serde",
+ "serde_json",
+ "tempfile",
 "time",
 "uuid",
 ]
@ -3618,6 +3637,7 @@ dependencies = [
 "bincode",
 "bstr",
 "bumpalo",
+ "bumparaw-collections",
 "bytemuck",
 "byteorder",
 "candle-core",
@ -3656,13 +3676,12 @@ dependencies = [
 "once_cell",
 "ordered-float",
 "rand",
- "raw-collections",
 "rayon",
 "rayon-par-bridge",
 "rhai",
 "roaring",
 "rstar",
- "rustc-hash 2.0.0",
+ "rustc-hash 2.1.0",
 "serde",
 "serde_json",
 "slice-group-by",
@ -4411,7 +4430,7 @@ dependencies = [
 "bytes",
 "rand",
 "ring",
- "rustc-hash 2.0.0",
+ "rustc-hash 2.1.0",
 "rustls",
 "slab",
 "thiserror",
@ -4487,19 +4506,6 @@ dependencies = [
 "rand",
 ]

-[[package]]
-name = "raw-collections"
-version = "0.1.0"
-source = "git+https://github.com/meilisearch/raw-collections.git#15e5d7bdebc0c149b2a28b2454f307c717d07f8a"
-dependencies = [
- "allocator-api2",
- "bitpacking",
- "bumpalo",
- "hashbrown 0.15.1",
- "serde",
- "serde_json",
-]
-
 [[package]]
 name = "raw-cpuid"
 version = "10.7.0"
@ -4797,9 +4803,9 @@ checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"

 [[package]]
 name = "rustc-hash"
-version = "2.0.0"
+version = "2.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "583034fd73374156e66797ed8e5b0d5690409c9226b22d87cb7f19821c05d152"
+checksum = "c7fb8039b3032c191086b10f11f319a6e99e1e82889c5cc6046f515c9db1d497"

 [[package]]
 name = "rustc_version"
@ -4968,9 +4974,9 @@ dependencies = [

 [[package]]
 name = "serde_json"
-version = "1.0.132"
+version = "1.0.133"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d726bfaff4b320266d395898905d0eba0345aae23b54aee3a737e260fd46db03"
+checksum = "c7fceb2473b9166b2294ef05efcb65a3db80803f0b03ef86a5fc88a2b85ee377"
 dependencies = [
 "indexmap",
 "itoa",
--- a/crates/benchmarks/benches/indexing.rs
+++ b/crates/benchmarks/benches/indexing.rs
@ -8,6 +8,7 @@ use bumpalo::Bump;
 use criterion::{criterion_group, criterion_main, Criterion};
 use milli::documents::PrimaryKey;
 use milli::heed::{EnvOpenOptions, RwTxn};
+use milli::progress::Progress;
 use milli::update::new::indexer;
 use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings};
 use milli::vector::EmbeddingConfigs;
@ -151,7 +152,7 @@ fn indexing_songs_default(c: &mut Criterion) {
                        None,
                        &mut new_fields_ids_map,
                        &|| false,
-                        &|_progress| (),
+                        Progress::default(),
                    )
                    .unwrap();

@ -166,7 +167,7 @@ fn indexing_songs_default(c: &mut Criterion) {
                    &document_changes,
                    EmbeddingConfigs::default(),
                    &|| false,
-                    &|_| (),
+                    &Progress::default(),
                )
                .unwrap();

@ -218,7 +219,7 @@ fn reindexing_songs_default(c: &mut Criterion) {
                        None,
                        &mut new_fields_ids_map,
                        &|| false,
-                        &|_progress| (),
+                        Progress::default(),
                    )
                    .unwrap();

@ -233,7 +234,7 @@ fn reindexing_songs_default(c: &mut Criterion) {
                    &document_changes,
                    EmbeddingConfigs::default(),
                    &|| false,
-                    &|_| (),
+                    &Progress::default(),
                )
                .unwrap();

@ -263,7 +264,7 @@ fn reindexing_songs_default(c: &mut Criterion) {
                        None,
                        &mut new_fields_ids_map,
                        &|| false,
-                        &|_progress| (),
+                        Progress::default(),
                    )
                    .unwrap();

@ -278,7 +279,7 @@ fn reindexing_songs_default(c: &mut Criterion) {
                    &document_changes,
                    EmbeddingConfigs::default(),
                    &|| false,
-                    &|_| (),
+                    &Progress::default(),
                )
                .unwrap();

@ -332,7 +333,7 @@ fn deleting_songs_in_batches_default(c: &mut Criterion) {
                        None,
                        &mut new_fields_ids_map,
                        &|| false,
-                        &|_progress| (),
+                        Progress::default(),
                    )
                    .unwrap();

@ -347,7 +348,7 @@ fn deleting_songs_in_batches_default(c: &mut Criterion) {
                    &document_changes,
                    EmbeddingConfigs::default(),
                    &|| false,
-                    &|_| (),
+                    &Progress::default(),
                )
                .unwrap();

@ -409,7 +410,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
                        None,
                        &mut new_fields_ids_map,
                        &|| false,
-                        &|_progress| (),
+                        Progress::default(),
                    )
                    .unwrap();

@ -424,7 +425,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
                    &document_changes,
                    EmbeddingConfigs::default(),
                    &|| false,
-                    &|_| (),
+                    &Progress::default(),
                )
                .unwrap();

@ -454,7 +455,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
                        None,
                        &mut new_fields_ids_map,
                        &|| false,
-                        &|_progress| (),
+                        Progress::default(),
                    )
                    .unwrap();

@ -469,7 +470,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
                    &document_changes,
                    EmbeddingConfigs::default(),
                    &|| false,
-                    &|_| (),
+                    &Progress::default(),
                )
                .unwrap();

@ -495,7 +496,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
                        None,
                        &mut new_fields_ids_map,
                        &|| false,
-                        &|_progress| (),
+                        Progress::default(),
                    )
                    .unwrap();

@ -510,7 +511,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
                    &document_changes,
                    EmbeddingConfigs::default(),
                    &|| false,
-                    &|_| (),
+                    &Progress::default(),
                )
                .unwrap();

@ -563,7 +564,7 @@ fn indexing_songs_without_faceted_numbers(c: &mut Criterion) {
                        None,
                        &mut new_fields_ids_map,
                        &|| false,
-                        &|_progress| (),
+                        Progress::default(),
                    )
                    .unwrap();

@ -578,7 +579,7 @@ fn indexing_songs_without_faceted_numbers(c: &mut Criterion) {
                    &document_changes,
                    EmbeddingConfigs::default(),
                    &|| false,
-                    &|_| (),
+                    &Progress::default(),
                )
                .unwrap();

@ -630,7 +631,7 @@ fn indexing_songs_without_faceted_fields(c: &mut Criterion) {
                        None,
                        &mut new_fields_ids_map,
                        &|| false,
-                        &|_progress| (),
+                        Progress::default(),
                    )
                    .unwrap();

@ -645,7 +646,7 @@ fn indexing_songs_without_faceted_fields(c: &mut Criterion) {
                    &document_changes,
                    EmbeddingConfigs::default(),
                    &|| false,
-                    &|_| (),
+                    &Progress::default(),
                )
                .unwrap();

@ -697,7 +698,7 @@ fn indexing_wiki(c: &mut Criterion) {
                        None,
                        &mut new_fields_ids_map,
                        &|| false,
-                        &|_progress| (),
+                        Progress::default(),
                    )
                    .unwrap();

@ -712,7 +713,7 @@ fn indexing_wiki(c: &mut Criterion) {
                    &document_changes,
                    EmbeddingConfigs::default(),
                    &|| false,
-                    &|_| (),
+                    &Progress::default(),
                )
                .unwrap();

@ -763,7 +764,7 @@ fn reindexing_wiki(c: &mut Criterion) {
                        None,
                        &mut new_fields_ids_map,
                        &|| false,
-                        &|_progress| (),
+                        Progress::default(),
                    )
                    .unwrap();

@ -778,7 +779,7 @@ fn reindexing_wiki(c: &mut Criterion) {
                    &document_changes,
                    EmbeddingConfigs::default(),
                    &|| false,
-                    &|_| (),
+                    &Progress::default(),
                )
                .unwrap();

@ -808,7 +809,7 @@ fn reindexing_wiki(c: &mut Criterion) {
                        None,
                        &mut new_fields_ids_map,
                        &|| false,
-                        &|_progress| (),
+                        Progress::default(),
                    )
                    .unwrap();

@ -823,7 +824,7 @@ fn reindexing_wiki(c: &mut Criterion) {
                    &document_changes,
                    EmbeddingConfigs::default(),
                    &|| false,
-                    &|_| (),
+                    &Progress::default(),
                )
                .unwrap();

@ -876,7 +877,7 @@ fn deleting_wiki_in_batches_default(c: &mut Criterion) {
                        None,
                        &mut new_fields_ids_map,
                        &|| false,
-                        &|_progress| (),
+                        Progress::default(),
                    )
                    .unwrap();

@ -891,7 +892,7 @@ fn deleting_wiki_in_batches_default(c: &mut Criterion) {
                    &document_changes,
                    EmbeddingConfigs::default(),
                    &|| false,
-                    &|_| (),
+                    &Progress::default(),
                )
                .unwrap();

@ -953,7 +954,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
                        None,
                        &mut new_fields_ids_map,
                        &|| false,
-                        &|_progress| (),
+                        Progress::default(),
                    )
                    .unwrap();

@ -968,7 +969,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
                    &document_changes,
                    EmbeddingConfigs::default(),
                    &|| false,
-                    &|_| (),
+                    &Progress::default(),
                )
                .unwrap();

@ -999,7 +1000,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
                        None,
                        &mut new_fields_ids_map,
                        &|| false,
-                        &|_progress| (),
+                        Progress::default(),
                    )
                    .unwrap();

@ -1014,7 +1015,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
                    &document_changes,
                    EmbeddingConfigs::default(),
                    &|| false,
-                    &|_| (),
+                    &Progress::default(),
                )
                .unwrap();

@ -1041,7 +1042,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
                        None,
                        &mut new_fields_ids_map,
                        &|| false,
-                        &|_progress| (),
+                        Progress::default(),
                    )
                    .unwrap();

@ -1056,7 +1057,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
                    &document_changes,
                    EmbeddingConfigs::default(),
                    &|| false,
-                    &|_| (),
+                    &Progress::default(),
                )
                .unwrap();

@ -1108,7 +1109,7 @@ fn indexing_movies_default(c: &mut Criterion) {
                        None,
                        &mut new_fields_ids_map,
                        &|| false,
-                        &|_progress| (),
+                        Progress::default(),
                    )
                    .unwrap();

@ -1123,7 +1124,7 @@ fn indexing_movies_default(c: &mut Criterion) {
                    &document_changes,
                    EmbeddingConfigs::default(),
                    &|| false,
-                    &|_| (),
+                    &Progress::default(),
                )
                .unwrap();

@ -1174,7 +1175,7 @@ fn reindexing_movies_default(c: &mut Criterion) {
                        None,
                        &mut new_fields_ids_map,
                        &|| false,
-                        &|_progress| (),
+                        Progress::default(),
                    )
                    .unwrap();

@ -1189,7 +1190,7 @@ fn reindexing_movies_default(c: &mut Criterion) {
                    &document_changes,
                    EmbeddingConfigs::default(),
                    &|| false,
-                    &|_| (),
+                    &Progress::default(),
                )
                .unwrap();

@ -1219,7 +1220,7 @@ fn reindexing_movies_default(c: &mut Criterion) {
                        None,
                        &mut new_fields_ids_map,
                        &|| false,
-                        &|_progress| (),
+                        Progress::default(),
                    )
                    .unwrap();

@ -1234,7 +1235,7 @@ fn reindexing_movies_default(c: &mut Criterion) {
                    &document_changes,
                    EmbeddingConfigs::default(),
                    &|| false,
-                    &|_| (),
+                    &Progress::default(),
                )
                .unwrap();

@ -1287,7 +1288,7 @@ fn deleting_movies_in_batches_default(c: &mut Criterion) {
                        None,
                        &mut new_fields_ids_map,
                        &|| false,
-                        &|_progress| (),
+                        Progress::default(),
                    )
                    .unwrap();

@ -1302,7 +1303,7 @@ fn deleting_movies_in_batches_default(c: &mut Criterion) {
                    &document_changes,
                    EmbeddingConfigs::default(),
                    &|| false,
-                    &|_| (),
+                    &Progress::default(),
                )
                .unwrap();

@ -1350,7 +1351,7 @@ fn delete_documents_from_ids(index: Index, document_ids_to_delete: Vec<RoaringBi
            &document_changes,
            EmbeddingConfigs::default(),
            &|| false,
-            &|_| (),
+            &Progress::default(),
        )
        .unwrap();

@ -1400,7 +1401,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
                        None,
                        &mut new_fields_ids_map,
                        &|| false,
-                        &|_progress| (),
+                        Progress::default(),
                    )
                    .unwrap();

@ -1415,7 +1416,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
                    &document_changes,
                    EmbeddingConfigs::default(),
                    &|| false,
-                    &|_| (),
+                    &Progress::default(),
                )
                .unwrap();

@ -1445,7 +1446,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
                        None,
                        &mut new_fields_ids_map,
                        &|| false,
-                        &|_progress| (),
+                        Progress::default(),
                    )
                    .unwrap();

@ -1460,7 +1461,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
                    &document_changes,
                    EmbeddingConfigs::default(),
                    &|| false,
-                    &|_| (),
+                    &Progress::default(),
                )
                .unwrap();

@ -1486,7 +1487,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
                        None,
                        &mut new_fields_ids_map,
                        &|| false,
-                        &|_progress| (),
+                        Progress::default(),
                    )
                    .unwrap();

@ -1501,7 +1502,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
                    &document_changes,
                    EmbeddingConfigs::default(),
                    &|| false,
-                    &|_| (),
+                    &Progress::default(),
                )
                .unwrap();

@ -1576,7 +1577,7 @@ fn indexing_nested_movies_default(c: &mut Criterion) {
                        None,
                        &mut new_fields_ids_map,
                        &|| false,
-                        &|_progress| (),
+                        Progress::default(),
                    )
                    .unwrap();

@ -1591,7 +1592,7 @@ fn indexing_nested_movies_default(c: &mut Criterion) {
                    &document_changes,
                    EmbeddingConfigs::default(),
                    &|| false,
-                    &|_| (),
+                    &Progress::default(),
                )
                .unwrap();

@ -1667,7 +1668,7 @@ fn deleting_nested_movies_in_batches_default(c: &mut Criterion) {
                        None,
                        &mut new_fields_ids_map,
                        &|| false,
-                        &|_progress| (),
+                        Progress::default(),
                    )
                    .unwrap();

@ -1682,7 +1683,7 @@ fn deleting_nested_movies_in_batches_default(c: &mut Criterion) {
                    &document_changes,
                    EmbeddingConfigs::default(),
                    &|| false,
-                    &|_| (),
+                    &Progress::default(),
                )
                .unwrap();

@ -1750,7 +1751,7 @@ fn indexing_nested_movies_without_faceted_fields(c: &mut Criterion) {
                        None,
                        &mut new_fields_ids_map,
                        &|| false,
-                        &|_progress| (),
+                        Progress::default(),
                    )
                    .unwrap();

@ -1765,7 +1766,7 @@ fn indexing_nested_movies_without_faceted_fields(c: &mut Criterion) {
                    &document_changes,
                    EmbeddingConfigs::default(),
                    &|| false,
-                    &|_| (),
+                    &Progress::default(),
                )
                .unwrap();

@ -1817,7 +1818,7 @@ fn indexing_geo(c: &mut Criterion) {
                        None,
                        &mut new_fields_ids_map,
                        &|| false,
-                        &|_progress| (),
+                        Progress::default(),
                    )
                    .unwrap();

@ -1832,7 +1833,7 @@ fn indexing_geo(c: &mut Criterion) {
                    &document_changes,
                    EmbeddingConfigs::default(),
                    &|| false,
-                    &|_| (),
+                    &Progress::default(),
                )
                .unwrap();

@ -1883,7 +1884,7 @@ fn reindexing_geo(c: &mut Criterion) {
                        None,
                        &mut new_fields_ids_map,
                        &|| false,
-                        &|_progress| (),
+                        Progress::default(),
                    )
                    .unwrap();

@ -1898,7 +1899,7 @@ fn reindexing_geo(c: &mut Criterion) {
                    &document_changes,
                    EmbeddingConfigs::default(),
                    &|| false,
-                    &|_| (),
+                    &Progress::default(),
                )
                .unwrap();

@ -1928,7 +1929,7 @@ fn reindexing_geo(c: &mut Criterion) {
                        None,
                        &mut new_fields_ids_map,
                        &|| false,
-                        &|_progress| (),
+                        Progress::default(),
                    )
                    .unwrap();

@ -1943,7 +1944,7 @@ fn reindexing_geo(c: &mut Criterion) {
                    &document_changes,
                    EmbeddingConfigs::default(),
                    &|| false,
-                    &|_| (),
+                    &Progress::default(),
                )
                .unwrap();

@ -1996,7 +1997,7 @@ fn deleting_geo_in_batches_default(c: &mut Criterion) {
                        None,
                        &mut new_fields_ids_map,
                        &|| false,
-                        &|_progress| (),
+                        Progress::default(),
                    )
                    .unwrap();

@ -2011,7 +2012,7 @@ fn deleting_geo_in_batches_default(c: &mut Criterion) {
                    &document_changes,
                    EmbeddingConfigs::default(),
                    &|| false,
-                    &|_| (),
+                    &Progress::default(),
                )
                .unwrap();

--- a/crates/benchmarks/benches/utils.rs
+++ b/crates/benchmarks/benches/utils.rs
@ -10,6 +10,7 @@ use bumpalo::Bump;
 use criterion::BenchmarkId;
 use memmap2::Mmap;
 use milli::heed::EnvOpenOptions;
+use milli::progress::Progress;
 use milli::update::new::indexer;
 use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings};
 use milli::vector::EmbeddingConfigs;
@ -110,7 +111,7 @@ pub fn base_setup(conf: &Conf) -> Index {
            None,
            &mut new_fields_ids_map,
            &|| false,
-            &|_progress| (),
+            Progress::default(),
        )
        .unwrap();

@ -125,7 +126,7 @@ pub fn base_setup(conf: &Conf) -> Index {
        &document_changes,
        EmbeddingConfigs::default(),
        &|| false,
-        &|_| (),
+        &Progress::default(),
    )
    .unwrap();

--- a/crates/file-store/src/lib.rs
+++ b/crates/file-store/src/lib.rs
@ -136,6 +136,14 @@ pub struct File {
 }

 impl File {
+    pub fn from_parts(path: PathBuf, file: Option<NamedTempFile>) -> Self {
+        Self { path, file }
+    }
+
+    pub fn into_parts(self) -> (PathBuf, Option<NamedTempFile>) {
+        (self.path, self.file)
+    }
+
    pub fn dry_file() -> Result<Self> {
        Ok(Self { path: PathBuf::new(), file: None })
    }
--- a/crates/fuzzers/src/bin/fuzz-indexing.rs
+++ b/crates/fuzzers/src/bin/fuzz-indexing.rs
@ -10,6 +10,7 @@ use either::Either;
 use fuzzers::Operation;
 use milli::documents::mmap_from_objects;
 use milli::heed::EnvOpenOptions;
+use milli::progress::Progress;
 use milli::update::new::indexer;
 use milli::update::{IndexDocumentsMethod, IndexerConfig};
 use milli::vector::EmbeddingConfigs;
@ -128,7 +129,7 @@ fn main() {
                                    None,
                                    &mut new_fields_ids_map,
                                    &|| false,
-                                    &|_progress| (),
+                                    Progress::default(),
                                )
                                .unwrap();

@ -143,7 +144,7 @@ fn main() {
                                &document_changes,
                                embedders,
                                &|| false,
-                                &|_| (),
+                                &Progress::default(),
                            )
                            .unwrap();

--- a/crates/index-scheduler/Cargo.toml
+++ b/crates/index-scheduler/Cargo.toml
@ -13,6 +13,9 @@ license.workspace = true
 [dependencies]
 anyhow = "1.0.86"
 bincode = "1.3.3"
+bumpalo = "3.16.0"
+bumparaw-collections = "0.1.2"
+convert_case = "0.6.0"
 csv = "1.3.0"
 derive_builder = "0.20.0"
 dump = { path = "../dump" }
@ -21,8 +24,8 @@ file-store = { path = "../file-store" }
 flate2 = "1.0.30"
 meilisearch-auth = { path = "../meilisearch-auth" }
 meilisearch-types = { path = "../meilisearch-types" }
+memmap2 = "0.9.4"
 page_size = "0.6.0"
-raw-collections = { git = "https://github.com/meilisearch/raw-collections.git", version = "0.1.0" }
 rayon = "1.10.0"
 roaring = { version = "0.10.7", features = ["serde"] }
 serde = { version = "1.0.204", features = ["derive"] }
@ -30,7 +33,6 @@ serde_json = { version = "1.0.120", features = ["preserve_order"] }
 synchronoise = "1.0.1"
 tempfile = "3.10.1"
 thiserror = "1.0.61"
-memmap2 = "0.9.4"
 time = { version = "0.3.36", features = [
    "serde-well-known",
    "formatting",
@ -40,7 +42,6 @@ time = { version = "0.3.36", features = [
 tracing = "0.1.40"
 ureq = "2.10.0"
 uuid = { version = "1.10.0", features = ["serde", "v4"] }
-bumpalo = "3.16.0"

 [dev-dependencies]
 arroy = "0.5.0"
--- a/crates/index-scheduler/src/batch.rs
+++ b/crates/index-scheduler/src/batch.rs
@ -22,8 +22,7 @@ use std::ffi::OsStr;
 use std::fmt;
 use std::fs::{self, File};
 use std::io::BufWriter;
-use std::sync::atomic::{self, AtomicU64};
-use std::time::Duration;
+use std::sync::atomic::Ordering;

 use bumpalo::collections::CollectIn;
 use bumpalo::Bump;
@ -32,16 +31,17 @@ use meilisearch_types::batches::BatchId;
 use meilisearch_types::heed::{RoTxn, RwTxn};
 use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader, PrimaryKey};
 use meilisearch_types::milli::heed::CompactionOption;
+use meilisearch_types::milli::progress::Progress;
 use meilisearch_types::milli::update::new::indexer::{self, UpdateByFunction};
-use meilisearch_types::milli::update::{IndexDocumentsMethod, Settings as MilliSettings};
+use meilisearch_types::milli::update::{
+    DocumentAdditionResult, IndexDocumentsMethod, Settings as MilliSettings,
+};
 use meilisearch_types::milli::vector::parsed_vectors::{
    ExplicitVectors, VectorOrArrayOfVectors, RESERVED_VECTORS_FIELD_NAME,
 };
 use meilisearch_types::milli::{self, Filter, ThreadPoolNoAbortBuilder};
 use meilisearch_types::settings::{apply_settings_to_builder, Settings, Unchecked};
-use meilisearch_types::tasks::{
-    Details, IndexSwap, Kind, KindWithContent, Status, Task, TaskProgress,
-};
+use meilisearch_types::tasks::{Details, IndexSwap, Kind, KindWithContent, Status, Task};
 use meilisearch_types::{compression, Index, VERSION_FILE_NAME};
 use roaring::RoaringBitmap;
 use time::macros::format_description;
@ -49,6 +49,13 @@ use time::OffsetDateTime;
 use uuid::Uuid;

 use crate::autobatcher::{self, BatchKind};
+use crate::processing::{
+    AtomicBatchStep, AtomicDocumentStep, AtomicTaskStep, AtomicUpdateFileStep, CreateIndexProgress,
+    DeleteIndexProgress, DocumentDeletionProgress, DocumentEditionProgress,
+    DocumentOperationProgress, DumpCreationProgress, InnerSwappingTwoIndexes, SettingsProgress,
+    SnapshotCreationProgress, SwappingTheIndexes, TaskCancelationProgress, TaskDeletionProgress,
+    UpdateIndexProgress, VariableNameStep,
+};
 use crate::utils::{self, swap_index_uid_in_task, ProcessingBatch};
 use crate::{Error, IndexScheduler, Result, TaskId};

@ -559,11 +566,12 @@ impl IndexScheduler {
    /// The list of tasks that were processed. The metadata of each task in the returned
    /// list is updated accordingly, with the exception of the its date fields
    /// [`finished_at`](meilisearch_types::tasks::Task::finished_at) and [`started_at`](meilisearch_types::tasks::Task::started_at).
-    #[tracing::instrument(level = "trace", skip(self, batch), target = "indexing::scheduler", fields(batch=batch.to_string()))]
+    #[tracing::instrument(level = "trace", skip(self, batch, progress), target = "indexing::scheduler", fields(batch=batch.to_string()))]
    pub(crate) fn process_batch(
        &self,
        batch: Batch,
        current_batch: &mut ProcessingBatch,
+        progress: Progress,
    ) -> Result<Vec<Task>> {
        #[cfg(test)]
        {
@ -583,8 +591,13 @@ impl IndexScheduler {
                    };

                let rtxn = self.env.read_txn()?;
-                let mut canceled_tasks =
-                    self.cancel_matched_tasks(&rtxn, task.uid, current_batch, matched_tasks)?;
+                let mut canceled_tasks = self.cancel_matched_tasks(
+                    &rtxn,
+                    task.uid,
+                    current_batch,
+                    matched_tasks,
+                    &progress,
+                )?;

                task.status = Status::Succeeded;
                match &mut task.details {
@ -615,7 +628,8 @@ impl IndexScheduler {
                }

                let mut wtxn = self.env.write_txn()?;
-                let mut deleted_tasks = self.delete_matched_tasks(&mut wtxn, &matched_tasks)?;
+                let mut deleted_tasks =
+                    self.delete_matched_tasks(&mut wtxn, &matched_tasks, &progress)?;
                wtxn.commit()?;

                for task in tasks.iter_mut() {
@ -641,6 +655,8 @@ impl IndexScheduler {
                Ok(tasks)
            }
            Batch::SnapshotCreation(mut tasks) => {
+                progress.update_progress(SnapshotCreationProgress::StartTheSnapshotCreation);
+
                fs::create_dir_all(&self.snapshots_path)?;
                let temp_snapshot_dir = tempfile::tempdir()?;

@ -661,6 +677,7 @@ impl IndexScheduler {
                // two read operations as the task processing is synchronous.

                // 2.1 First copy the LMDB env of the index-scheduler
+                progress.update_progress(SnapshotCreationProgress::SnapshotTheIndexScheduler);
                let dst = temp_snapshot_dir.path().join("tasks");
                fs::create_dir_all(&dst)?;
                self.env.copy_to_file(dst.join("data.mdb"), CompactionOption::Enabled)?;
@ -673,18 +690,29 @@ impl IndexScheduler {
                fs::create_dir_all(&update_files_dir)?;

                // 2.4 Only copy the update files of the enqueued tasks
-                for task_id in self.get_status(&rtxn, Status::Enqueued)? {
+                progress.update_progress(SnapshotCreationProgress::SnapshotTheUpdateFiles);
+                let enqueued = self.get_status(&rtxn, Status::Enqueued)?;
+                let (atomic, update_file_progress) =
+                    AtomicUpdateFileStep::new(enqueued.len() as u32);
+                progress.update_progress(update_file_progress);
+                for task_id in enqueued {
                    let task = self.get_task(&rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;
                    if let Some(content_uuid) = task.content_uuid() {
                        let src = self.file_store.get_update_path(content_uuid);
                        let dst = update_files_dir.join(content_uuid.to_string());
                        fs::copy(src, dst)?;
                    }
+                    atomic.fetch_add(1, Ordering::Relaxed);
                }

                // 3. Snapshot every indexes
-                for result in self.index_mapper.index_mapping.iter(&rtxn)? {
+                progress.update_progress(SnapshotCreationProgress::SnapshotTheIndexes);
+                let index_mapping = self.index_mapper.index_mapping;
+                let nb_indexes = index_mapping.len(&rtxn)? as u32;
+
+                for (i, result) in index_mapping.iter(&rtxn)?.enumerate() {
                    let (name, uuid) = result?;
+                    progress.update_progress(VariableNameStep::new(name, i as u32, nb_indexes));
                    let index = self.index_mapper.index(&rtxn, name)?;
                    let dst = temp_snapshot_dir.path().join("indexes").join(uuid.to_string());
                    fs::create_dir_all(&dst)?;
@ -696,6 +724,7 @@ impl IndexScheduler {
                drop(rtxn);

                // 4. Snapshot the auth LMDB env
+                progress.update_progress(SnapshotCreationProgress::SnapshotTheApiKeys);
                let dst = temp_snapshot_dir.path().join("auth");
                fs::create_dir_all(&dst)?;
                // TODO We can't use the open_auth_store_env function here but we should
@ -708,6 +737,7 @@ impl IndexScheduler {
                auth.copy_to_file(dst.join("data.mdb"), CompactionOption::Enabled)?;

                // 5. Copy and tarball the flat snapshot
+                progress.update_progress(SnapshotCreationProgress::CreateTheTarball);
                // 5.1 Find the original name of the database
                // TODO find a better way to get this path
                let mut base_path = self.env.path().to_owned();
@ -740,6 +770,7 @@ impl IndexScheduler {
                Ok(tasks)
            }
            Batch::Dump(mut task) => {
+                progress.update_progress(DumpCreationProgress::StartTheDumpCreation);
                let started_at = OffsetDateTime::now_utc();
                let (keys, instance_uid) =
                    if let KindWithContent::DumpCreation { keys, instance_uid } = &task.kind {
@ -750,6 +781,7 @@ impl IndexScheduler {
                let dump = dump::DumpWriter::new(*instance_uid)?;

                // 1. dump the keys
+                progress.update_progress(DumpCreationProgress::DumpTheApiKeys);
                let mut dump_keys = dump.create_keys()?;
                for key in keys {
                    dump_keys.push_key(key)?;
@ -759,7 +791,13 @@ impl IndexScheduler {
                let rtxn = self.env.read_txn()?;

                // 2. dump the tasks
+                progress.update_progress(DumpCreationProgress::DumpTheTasks);
                let mut dump_tasks = dump.create_tasks_queue()?;
+
+                let (atomic, update_task_progress) =
+                    AtomicTaskStep::new(self.all_tasks.len(&rtxn)? as u32);
+                progress.update_progress(update_task_progress);
+
                for ret in self.all_tasks.iter(&rtxn)? {
                    if self.must_stop_processing.get() {
                        return Err(Error::AbortedTask);
@ -809,11 +847,22 @@ impl IndexScheduler {
                            dump_content_file.flush()?;
                        }
                    }
+                    atomic.fetch_add(1, Ordering::Relaxed);
                }
                dump_tasks.flush()?;

                // 3. Dump the indexes
+                progress.update_progress(DumpCreationProgress::DumpTheIndexes);
+                let nb_indexes = self.index_mapper.index_mapping.len(&rtxn)? as u32;
+                let mut count = 0;
                self.index_mapper.try_for_each_index(&rtxn, |uid, index| -> Result<()> {
+                    progress.update_progress(VariableNameStep::new(
+                        uid.to_string(),
+                        count,
+                        nb_indexes,
+                    ));
+                    count += 1;
+
                    let rtxn = index.read_txn()?;
                    let metadata = IndexMetadata {
                        uid: uid.to_owned(),
@ -833,6 +882,12 @@ impl IndexScheduler {
                        .embedding_configs(&rtxn)
                        .map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;

+                    let nb_documents = index
+                        .number_of_documents(&rtxn)
+                        .map_err(|e| Error::from_milli(e, Some(uid.to_string())))?
+                        as u32;
+                    let (atomic, update_document_progress) = AtomicDocumentStep::new(nb_documents);
+                    progress.update_progress(update_document_progress);
                    let documents = index
                        .all_documents(&rtxn)
                        .map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
@ -902,6 +957,7 @@ impl IndexScheduler {
                        }

                        index_dumper.push_document(&document)?;
+                        atomic.fetch_add(1, Ordering::Relaxed);
                    }

                    // 3.2. Dump the settings
@ -916,6 +972,7 @@ impl IndexScheduler {
                })?;

                // 4. Dump experimental feature settings
+                progress.update_progress(DumpCreationProgress::DumpTheExperimentalFeatures);
                let features = self.features().runtime_features();
                dump.create_experimental_features(features)?;

@ -926,6 +983,7 @@ impl IndexScheduler {
                if self.must_stop_processing.get() {
                    return Err(Error::AbortedTask);
                }
+                progress.update_progress(DumpCreationProgress::CompressTheDump);
                let path = self.dumps_path.join(format!("{}.dump", dump_uid));
                let file = File::create(path)?;
                dump.persist_to(BufWriter::new(file))?;
@ -951,7 +1009,7 @@ impl IndexScheduler {
                    .set_currently_updating_index(Some((index_uid.clone(), index.clone())));

                let mut index_wtxn = index.write_txn()?;
-                let tasks = self.apply_index_operation(&mut index_wtxn, &index, op)?;
+                let tasks = self.apply_index_operation(&mut index_wtxn, &index, op, progress)?;

                {
                    let span = tracing::trace_span!(target: "indexing::scheduler", "commit");
@ -985,6 +1043,8 @@ impl IndexScheduler {
                Ok(tasks)
            }
            Batch::IndexCreation { index_uid, primary_key, task } => {
+                progress.update_progress(CreateIndexProgress::CreatingTheIndex);
+
                let wtxn = self.env.write_txn()?;
                if self.index_mapper.exists(&wtxn, &index_uid)? {
                    return Err(Error::IndexAlreadyExists(index_uid));
@ -994,9 +1054,11 @@ impl IndexScheduler {
                self.process_batch(
                    Batch::IndexUpdate { index_uid, primary_key, task },
                    current_batch,
+                    progress,
                )
            }
            Batch::IndexUpdate { index_uid, primary_key, mut task } => {
+                progress.update_progress(UpdateIndexProgress::UpdatingTheIndex);
                let rtxn = self.env.read_txn()?;
                let index = self.index_mapper.index(&rtxn, &index_uid)?;

@ -1049,6 +1111,7 @@ impl IndexScheduler {
                Ok(vec![task])
            }
            Batch::IndexDeletion { index_uid, index_has_been_created, mut tasks } => {
+                progress.update_progress(DeleteIndexProgress::DeletingTheIndex);
                let wtxn = self.env.write_txn()?;

                // it's possible that the index doesn't exist
@ -1082,6 +1145,8 @@ impl IndexScheduler {
                Ok(tasks)
            }
            Batch::IndexSwap { mut task } => {
+                progress.update_progress(SwappingTheIndexes::EnsuringCorrectnessOfTheSwap);
+
                let mut wtxn = self.env.write_txn()?;
                let swaps = if let KindWithContent::IndexSwap { swaps } = &task.kind {
                    swaps
@ -1108,8 +1173,20 @@ impl IndexScheduler {
                        ));
                    }
                }
-                for swap in swaps {
-                    self.apply_index_swap(&mut wtxn, task.uid, &swap.indexes.0, &swap.indexes.1)?;
+                progress.update_progress(SwappingTheIndexes::SwappingTheIndexes);
+                for (step, swap) in swaps.iter().enumerate() {
+                    progress.update_progress(VariableNameStep::new(
+                        format!("swapping index {} and {}", swap.indexes.0, swap.indexes.1),
+                        step as u32,
+                        swaps.len() as u32,
+                    ));
+                    self.apply_index_swap(
+                        &mut wtxn,
+                        &progress,
+                        task.uid,
+                        &swap.indexes.0,
+                        &swap.indexes.1,
+                    )?;
                }
                wtxn.commit()?;
                task.status = Status::Succeeded;
@ -1119,7 +1196,15 @@ impl IndexScheduler {
    }

    /// Swap the index `lhs` with the index `rhs`.
-    fn apply_index_swap(&self, wtxn: &mut RwTxn, task_id: u32, lhs: &str, rhs: &str) -> Result<()> {
+    fn apply_index_swap(
+        &self,
+        wtxn: &mut RwTxn,
+        progress: &Progress,
+        task_id: u32,
+        lhs: &str,
+        rhs: &str,
+    ) -> Result<()> {
+        progress.update_progress(InnerSwappingTwoIndexes::RetrieveTheTasks);
        // 1. Verify that both lhs and rhs are existing indexes
        let index_lhs_exists = self.index_mapper.index_exists(wtxn, lhs)?;
        if !index_lhs_exists {
@ -1137,14 +1222,21 @@ impl IndexScheduler {
        index_rhs_task_ids.remove_range(task_id..);

        // 3. before_name -> new_name in the task's KindWithContent
-        for task_id in &index_lhs_task_ids | &index_rhs_task_ids {
+        progress.update_progress(InnerSwappingTwoIndexes::UpdateTheTasks);
+        let tasks_to_update = &index_lhs_task_ids | &index_rhs_task_ids;
+        let (atomic, task_progress) = AtomicTaskStep::new(tasks_to_update.len() as u32);
+        progress.update_progress(task_progress);
+
+        for task_id in tasks_to_update {
            let mut task = self.get_task(wtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;
            swap_index_uid_in_task(&mut task, (lhs, rhs));
            self.all_tasks.put(wtxn, &task_id, &task)?;
+            atomic.fetch_add(1, Ordering::Relaxed);
        }

        // 4. remove the task from indexuid = before_name
        // 5. add the task to indexuid = after_name
+        progress.update_progress(InnerSwappingTwoIndexes::UpdateTheIndexesMetadata);
        self.update_index(wtxn, lhs, |lhs_tasks| {
            *lhs_tasks -= &index_lhs_task_ids;
            *lhs_tasks |= &index_rhs_task_ids;
@ -1166,7 +1258,7 @@ impl IndexScheduler {
    /// The list of processed tasks.
    #[tracing::instrument(
        level = "trace",
-        skip(self, index_wtxn, index),
+        skip(self, index_wtxn, index, progress),
        target = "indexing::scheduler"
    )]
    fn apply_index_operation<'i>(
@ -1174,44 +1266,12 @@ impl IndexScheduler {
        index_wtxn: &mut RwTxn<'i>,
        index: &'i Index,
        operation: IndexOperation,
+        progress: Progress,
    ) -> Result<Vec<Task>> {
        let indexer_alloc = Bump::new();

        let started_processing_at = std::time::Instant::now();
-        let secs_since_started_processing_at = AtomicU64::new(0);
-        const PRINT_SECS_DELTA: u64 = 5;
-
-        let processing_tasks = self.processing_tasks.clone();
        let must_stop_processing = self.must_stop_processing.clone();
-        let send_progress = |progress| {
-            let now = std::time::Instant::now();
-            let elapsed = secs_since_started_processing_at.load(atomic::Ordering::Relaxed);
-            let previous = started_processing_at + Duration::from_secs(elapsed);
-            let elapsed = now - previous;
-
-            if elapsed.as_secs() < PRINT_SECS_DELTA {
-                return;
-            }
-
-            secs_since_started_processing_at
-                .store((now - started_processing_at).as_secs(), atomic::Ordering::Relaxed);
-
-            let TaskProgress {
-                current_step,
-                finished_steps,
-                total_steps,
-                finished_substeps,
-                total_substeps,
-            } = processing_tasks.write().unwrap().update_progress(progress);
-
-            tracing::info!(
-                current_step,
-                finished_steps,
-                total_steps,
-                finished_substeps,
-                total_substeps
-            );
-        };

        match operation {
            IndexOperation::DocumentClear { index_uid, mut tasks } => {
@ -1243,6 +1303,7 @@ impl IndexScheduler {
                operations,
                mut tasks,
            } => {
+                progress.update_progress(DocumentOperationProgress::RetrievingConfig);
                // TODO: at some point, for better efficiency we might want to reuse the bumpalo for successive batches.
                // this is made difficult by the fact we're doing private clones of the index scheduler and sending it
                // to a fresh thread.
@ -1298,6 +1359,7 @@ impl IndexScheduler {
                    }
                };

+                progress.update_progress(DocumentOperationProgress::ComputingDocumentChanges);
                let (document_changes, operation_stats, primary_key) = indexer
                    .into_changes(
                        &indexer_alloc,
@ -1306,13 +1368,13 @@ impl IndexScheduler {
                        primary_key.as_deref(),
                        &mut new_fields_ids_map,
                        &|| must_stop_processing.get(),
-                        &send_progress,
+                        progress.clone(),
                    )
                    .map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?;

-                let mut addition = 0;
+                let mut candidates_count = 0;
                for (stats, task) in operation_stats.into_iter().zip(&mut tasks) {
-                    addition += stats.document_count;
+                    candidates_count += stats.document_count;
                    match stats.error {
                        Some(error) => {
                            task.status = Status::Failed;
@ -1342,6 +1404,7 @@ impl IndexScheduler {
                    }
                }

+                progress.update_progress(DocumentOperationProgress::Indexing);
                if tasks.iter().any(|res| res.error.is_none()) {
                    indexer::index(
                        index_wtxn,
@ -1354,16 +1417,25 @@ impl IndexScheduler {
                        &document_changes,
                        embedders,
                        &|| must_stop_processing.get(),
-                        &send_progress,
+                        &progress,
                    )
                    .map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?;

+                    let addition = DocumentAdditionResult {
+                        indexed_documents: candidates_count,
+                        number_of_documents: index
+                            .number_of_documents(index_wtxn)
+                            .map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?,
+                    };
+
                    tracing::info!(indexing_result = ?addition, processed_in = ?started_processing_at.elapsed(), "document indexing done");
                }

                Ok(tasks)
            }
            IndexOperation::DocumentEdition { index_uid, mut task } => {
+                progress.update_progress(DocumentEditionProgress::RetrievingConfig);
+
                let (filter, code) = if let KindWithContent::DocumentEdition {
                    filter_expr,
                    context: _,
@ -1436,6 +1508,8 @@ impl IndexScheduler {
                        }
                    };

+                    let candidates_count = candidates.len();
+                    progress.update_progress(DocumentEditionProgress::ComputingDocumentChanges);
                    let indexer = UpdateByFunction::new(candidates, context.clone(), code.clone());
                    let document_changes = pool
                        .install(|| {
@ -1449,6 +1523,7 @@ impl IndexScheduler {
                        .map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?;
                    let embedders = self.embedders(index_uid.clone(), embedders)?;

+                    progress.update_progress(DocumentEditionProgress::Indexing);
                    indexer::index(
                        index_wtxn,
                        index,
@ -1460,11 +1535,18 @@ impl IndexScheduler {
                        &document_changes,
                        embedders,
                        &|| must_stop_processing.get(),
-                        &send_progress,
+                        &progress,
                    )
                    .map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?;

-                    // tracing::info!(indexing_result = ?addition, processed_in = ?started_processing_at.elapsed(), "document indexing done");
+                    let addition = DocumentAdditionResult {
+                        indexed_documents: candidates_count,
+                        number_of_documents: index
+                            .number_of_documents(index_wtxn)
+                            .map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?,
+                    };
+
+                    tracing::info!(indexing_result = ?addition, processed_in = ?started_processing_at.elapsed(), "document indexing done");
                }

                match result_count {
@ -1494,6 +1576,8 @@ impl IndexScheduler {
                Ok(vec![task])
            }
            IndexOperation::DocumentDeletion { mut tasks, index_uid } => {
+                progress.update_progress(DocumentDeletionProgress::RetrievingConfig);
+
                let mut to_delete = RoaringBitmap::new();
                let external_documents_ids = index.external_documents_ids();

@ -1584,7 +1668,9 @@ impl IndexScheduler {
                        }
                    };

+                    progress.update_progress(DocumentDeletionProgress::DeleteDocuments);
                    let mut indexer = indexer::DocumentDeletion::new();
+                    let candidates_count = to_delete.len();
                    indexer.delete_documents_by_docids(to_delete);
                    let document_changes = indexer.into_changes(&indexer_alloc, primary_key);
                    let embedders = index
@ -1592,6 +1678,7 @@ impl IndexScheduler {
                        .map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?;
                    let embedders = self.embedders(index_uid.clone(), embedders)?;

+                    progress.update_progress(DocumentDeletionProgress::Indexing);
                    indexer::index(
                        index_wtxn,
                        index,
@ -1603,16 +1690,24 @@ impl IndexScheduler {
                        &document_changes,
                        embedders,
                        &|| must_stop_processing.get(),
-                        &send_progress,
+                        &progress,
                    )
                    .map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?;

-                    // tracing::info!(indexing_result = ?addition, processed_in = ?started_processing_at.elapsed(), "document indexing done");
+                    let addition = DocumentAdditionResult {
+                        indexed_documents: candidates_count,
+                        number_of_documents: index
+                            .number_of_documents(index_wtxn)
+                            .map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?,
+                    };
+
+                    tracing::info!(indexing_result = ?addition, processed_in = ?started_processing_at.elapsed(), "document indexing done");
                }

                Ok(tasks)
            }
            IndexOperation::Settings { index_uid, settings, mut tasks } => {
+                progress.update_progress(SettingsProgress::RetrievingAndMergingTheSettings);
                let indexer_config = self.index_mapper.indexer_config();
                let mut builder = milli::update::Settings::new(index_wtxn, index, indexer_config);

@ -1626,6 +1721,7 @@ impl IndexScheduler {
                    task.status = Status::Succeeded;
                }

+                progress.update_progress(SettingsProgress::ApplyTheSettings);
                builder
                    .execute(
                        |indexing_step| tracing::debug!(update = ?indexing_step),
@ -1648,12 +1744,14 @@ impl IndexScheduler {
                        index_uid: index_uid.clone(),
                        tasks: cleared_tasks,
                    },
+                    progress.clone(),
                )?;

                let settings_tasks = self.apply_index_operation(
                    index_wtxn,
                    index,
                    IndexOperation::Settings { index_uid, settings, tasks: settings_tasks },
+                    progress,
                )?;

                let mut tasks = settings_tasks;
@ -1670,15 +1768,18 @@ impl IndexScheduler {
        &self,
        wtxn: &mut RwTxn,
        matched_tasks: &RoaringBitmap,
+        progress: &Progress,
    ) -> Result<RoaringBitmap> {
+        progress.update_progress(TaskDeletionProgress::DeletingTasksDateTime);
+
        // 1. Remove from this list the tasks that we are not allowed to delete
        let enqueued_tasks = self.get_status(wtxn, Status::Enqueued)?;
        let processing_tasks = &self.processing_tasks.read().unwrap().processing.clone();

        let all_task_ids = self.all_task_ids(wtxn)?;
        let mut to_delete_tasks = all_task_ids & matched_tasks;
-        to_delete_tasks -= processing_tasks;
-        to_delete_tasks -= enqueued_tasks;
+        to_delete_tasks -= &**processing_tasks;
+        to_delete_tasks -= &enqueued_tasks;

        // 2. We now have a list of tasks to delete, delete them

@ -1689,6 +1790,8 @@ impl IndexScheduler {
        // The tasks that have been removed *per batches*.
        let mut affected_batches: HashMap<BatchId, RoaringBitmap> = HashMap::new();

+        let (atomic_progress, task_progress) = AtomicTaskStep::new(to_delete_tasks.len() as u32);
+        progress.update_progress(task_progress);
        for task_id in to_delete_tasks.iter() {
            let task = self.get_task(wtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;

@ -1712,22 +1815,35 @@ impl IndexScheduler {
            if let Some(batch_uid) = task.batch_uid {
                affected_batches.entry(batch_uid).or_default().insert(task_id);
            }
+            atomic_progress.fetch_add(1, Ordering::Relaxed);
        }

+        progress.update_progress(TaskDeletionProgress::DeletingTasksMetadata);
+        let (atomic_progress, task_progress) = AtomicTaskStep::new(
+            (affected_indexes.len() + affected_statuses.len() + affected_kinds.len()) as u32,
+        );
+        progress.update_progress(task_progress);
        for index in affected_indexes.iter() {
            self.update_index(wtxn, index, |bitmap| *bitmap -= &to_delete_tasks)?;
+            atomic_progress.fetch_add(1, Ordering::Relaxed);
        }

        for status in affected_statuses.iter() {
            self.update_status(wtxn, *status, |bitmap| *bitmap -= &to_delete_tasks)?;
+            atomic_progress.fetch_add(1, Ordering::Relaxed);
        }

        for kind in affected_kinds.iter() {
            self.update_kind(wtxn, *kind, |bitmap| *bitmap -= &to_delete_tasks)?;
+            atomic_progress.fetch_add(1, Ordering::Relaxed);
        }

+        progress.update_progress(TaskDeletionProgress::DeletingTasks);
+        let (atomic_progress, task_progress) = AtomicTaskStep::new(to_delete_tasks.len() as u32);
+        progress.update_progress(task_progress);
        for task in to_delete_tasks.iter() {
            self.all_tasks.delete(wtxn, &task)?;
+            atomic_progress.fetch_add(1, Ordering::Relaxed);
        }
        for canceled_by in affected_canceled_by {
            if let Some(mut tasks) = self.canceled_by.get(wtxn, &canceled_by)? {
@ -1739,6 +1855,9 @@ impl IndexScheduler {
                }
            }
        }
+        progress.update_progress(TaskDeletionProgress::DeletingBatches);
+        let (atomic_progress, batch_progress) = AtomicBatchStep::new(affected_batches.len() as u32);
+        progress.update_progress(batch_progress);
        for (batch_id, to_delete_tasks) in affected_batches {
            if let Some(mut tasks) = self.batch_to_tasks_mapping.get(wtxn, &batch_id)? {
                tasks -= &to_delete_tasks;
@ -1780,6 +1899,7 @@ impl IndexScheduler {
                    }
                }
            }
+            atomic_progress.fetch_add(1, Ordering::Relaxed);
        }

        Ok(to_delete_tasks)
@ -1794,21 +1914,36 @@ impl IndexScheduler {
        cancel_task_id: TaskId,
        current_batch: &mut ProcessingBatch,
        matched_tasks: &RoaringBitmap,
+        progress: &Progress,
    ) -> Result<Vec<Task>> {
+        progress.update_progress(TaskCancelationProgress::RetrievingTasks);
+
        // 1. Remove from this list the tasks that we are not allowed to cancel
        //    Notice that only the _enqueued_ ones are cancelable and we should
        //    have already aborted the indexation of the _processing_ ones
        let cancelable_tasks = self.get_status(rtxn, Status::Enqueued)?;
        let tasks_to_cancel = cancelable_tasks & matched_tasks;

-        // 2. We now have a list of tasks to cancel, cancel them
-        let mut tasks = self.get_existing_tasks(rtxn, tasks_to_cancel.iter())?;
+        let (task_progress, progress_obj) = AtomicTaskStep::new(tasks_to_cancel.len() as u32);
+        progress.update_progress(progress_obj);

+        // 2. We now have a list of tasks to cancel, cancel them
+        let mut tasks = self.get_existing_tasks(
+            rtxn,
+            tasks_to_cancel.iter().inspect(|_| {
+                task_progress.fetch_add(1, Ordering::Relaxed);
+            }),
+        )?;
+
+        progress.update_progress(TaskCancelationProgress::UpdatingTasks);
+        let (task_progress, progress_obj) = AtomicTaskStep::new(tasks_to_cancel.len() as u32);
+        progress.update_progress(progress_obj);
        for task in tasks.iter_mut() {
            task.status = Status::Canceled;
            task.canceled_by = Some(cancel_task_id);
            task.details = task.details.as_ref().map(|d| d.to_failed());
            current_batch.processing(Some(task));
+            task_progress.fetch_add(1, Ordering::Relaxed);
        }

        Ok(tasks)
--- a/crates/index-scheduler/src/error.rs
+++ b/crates/index-scheduler/src/error.rs
@ -1,12 +1,13 @@
 use std::fmt::Display;

-use crate::TaskId;
 use meilisearch_types::batches::BatchId;
 use meilisearch_types::error::{Code, ErrorCode};
 use meilisearch_types::tasks::{Kind, Status};
 use meilisearch_types::{heed, milli};
 use thiserror::Error;

+use crate::TaskId;
+
 #[derive(Copy, Clone, Debug, PartialEq, Eq)]
 pub enum DateField {
    BeforeEnqueuedAt,
@ -103,7 +104,7 @@ pub enum Error {
    )]
    InvalidTaskCanceledBy { canceled_by: String },
    #[error(
-        "{index_uid} is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_), and can not be more than 512 bytes."
+        "{index_uid} is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_), and can not be more than 400 bytes."
    )]
    InvalidIndexUid { index_uid: String },
    #[error("Task `{0}` not found.")]
--- a/crates/index-scheduler/src/index_mapper/index_map.rs
+++ b/crates/index-scheduler/src/index_mapper/index_map.rs
@ -1,4 +1,5 @@
 use std::collections::BTreeMap;
+use std::env::VarError;
 use std::path::Path;
 use std::time::Duration;

@ -300,9 +301,19 @@ fn create_or_open_index(
    enable_mdb_writemap: bool,
    map_size: usize,
 ) -> Result<Index> {
+    use std::str::FromStr;
+
    let mut options = EnvOpenOptions::new();
    options.map_size(clamp_to_page_size(map_size));
-    options.max_readers(1024);
+
+    let max_readers = match std::env::var("MEILI_INDEX_MAX_READERS") {
+        Ok(value) => u32::from_str(&value).unwrap(),
+        Err(VarError::NotPresent) => 100 * 1024,
+        Err(VarError::NotUnicode(value)) => {
+            panic!("Invalid unicode for the `MEILI_INDEX_MAX_READERS` env var: {value:?}")
+        }
+    };
+    options.max_readers(max_readers);
    if enable_mdb_writemap {
        unsafe { options.flags(EnvFlags::WRITE_MAP) };
    }
--- a/crates/index-scheduler/src/index_mapper/mod.rs
+++ b/crates/index-scheduler/src/index_mapper/mod.rs
@ -3,10 +3,6 @@ use std::sync::{Arc, RwLock};
 use std::time::Duration;
 use std::{fs, thread};

-use self::index_map::IndexMap;
-use self::IndexStatus::{Available, BeingDeleted, Closing, Missing};
-use crate::uuid_codec::UuidCodec;
-use crate::{Error, Result};
 use meilisearch_types::heed::types::{SerdeJson, Str};
 use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn};
 use meilisearch_types::milli;
@ -17,6 +13,11 @@ use time::OffsetDateTime;
 use tracing::error;
 use uuid::Uuid;

+use self::index_map::IndexMap;
+use self::IndexStatus::{Available, BeingDeleted, Closing, Missing};
+use crate::uuid_codec::UuidCodec;
+use crate::{Error, Result};
+
 mod index_map;

 const INDEX_MAPPING: &str = "index-mapping";
--- a/crates/index-scheduler/src/insta_snapshot.rs
+++ b/crates/index-scheduler/src/insta_snapshot.rs
@ -353,7 +353,7 @@ pub fn snapshot_canceled_by(rtxn: &RoTxn, db: Database<BEU32, RoaringBitmapCodec

 pub fn snapshot_batch(batch: &Batch) -> String {
    let mut snap = String::new();
-    let Batch { uid, details, stats, started_at, finished_at } = batch;
+    let Batch { uid, details, stats, started_at, finished_at, progress: _ } = batch;
    if let Some(finished_at) = finished_at {
        assert!(finished_at > started_at);
    }
--- a/crates/index-scheduler/src/lib.rs
+++ b/crates/index-scheduler/src/lib.rs
@ -26,6 +26,7 @@ mod index_mapper;
 #[cfg(test)]
 mod insta_snapshot;
 mod lru;
+mod processing;
 mod utils;
 pub mod uuid_codec;

@ -56,12 +57,12 @@ use meilisearch_types::heed::types::{SerdeBincode, SerdeJson, Str, I128};
 use meilisearch_types::heed::{self, Database, Env, PutFlags, RoTxn, RwTxn};
 use meilisearch_types::milli::documents::DocumentsBatchBuilder;
 use meilisearch_types::milli::index::IndexEmbeddingConfig;
-use meilisearch_types::milli::update::new::indexer::document_changes::Progress;
 use meilisearch_types::milli::update::IndexerConfig;
 use meilisearch_types::milli::vector::{Embedder, EmbedderOptions, EmbeddingConfigs};
 use meilisearch_types::milli::{self, CboRoaringBitmapCodec, Index, RoaringBitmapCodec, BEU32};
 use meilisearch_types::task_view::TaskView;
-use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task, TaskProgress};
+use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task};
+use processing::ProcessingTasks;
 use rayon::current_num_threads;
 use rayon::prelude::{IntoParallelIterator, ParallelIterator};
 use roaring::RoaringBitmap;
@ -72,7 +73,8 @@ use utils::{filter_out_references_to_newer_tasks, keep_ids_within_datetimes, map
 use uuid::Uuid;

 use crate::index_mapper::IndexMapper;
-use crate::utils::{check_index_swap_validity, clamp_to_page_size, ProcessingBatch};
+use crate::processing::{AtomicTaskStep, BatchProgress};
+use crate::utils::{check_index_swap_validity, clamp_to_page_size};

 pub(crate) type BEI128 = I128<BE>;

@ -163,48 +165,6 @@ impl Query {
    }
 }

-#[derive(Debug, Clone)]
-pub struct ProcessingTasks {
-    batch: Option<ProcessingBatch>,
-    /// The list of tasks ids that are currently running.
-    processing: RoaringBitmap,
-    /// The progress on processing tasks
-    progress: Option<TaskProgress>,
-}
-
-impl ProcessingTasks {
-    /// Creates an empty `ProcessingAt` struct.
-    fn new() -> ProcessingTasks {
-        ProcessingTasks { batch: None, processing: RoaringBitmap::new(), progress: None }
-    }
-
-    /// Stores the currently processing tasks, and the date time at which it started.
-    fn start_processing(&mut self, processing_batch: ProcessingBatch, processing: RoaringBitmap) {
-        self.batch = Some(processing_batch);
-        self.processing = processing;
-    }
-
-    fn update_progress(&mut self, progress: Progress) -> TaskProgress {
-        self.progress.get_or_insert_with(TaskProgress::default).update(progress)
-    }
-
-    /// Set the processing tasks to an empty list
-    fn stop_processing(&mut self) -> Self {
-        self.progress = None;
-
-        Self {
-            batch: std::mem::take(&mut self.batch),
-            processing: std::mem::take(&mut self.processing),
-            progress: None,
-        }
-    }
-
-    /// Returns `true` if there, at least, is one task that is currently processing that we must stop.
-    fn must_cancel_processing_tasks(&self, canceled_tasks: &RoaringBitmap) -> bool {
-        !self.processing.is_disjoint(canceled_tasks)
-    }
-}
-
 #[derive(Default, Clone, Debug)]
 struct MustStopProcessing(Arc<AtomicBool>);

@ -813,7 +773,7 @@ impl IndexScheduler {
            let mut batch_tasks = RoaringBitmap::new();
            for batch_uid in batch_uids {
                if processing_batch.as_ref().map_or(false, |batch| batch.uid == *batch_uid) {
-                    batch_tasks |= &processing_tasks;
+                    batch_tasks |= &*processing_tasks;
                } else {
                    batch_tasks |= self.tasks_in_batch(rtxn, *batch_uid)?;
                }
@ -827,13 +787,13 @@ impl IndexScheduler {
                match status {
                    // special case for Processing tasks
                    Status::Processing => {
-                        status_tasks |= &processing_tasks;
+                        status_tasks |= &*processing_tasks;
                    }
                    status => status_tasks |= &self.get_status(rtxn, *status)?,
                };
            }
            if !status.contains(&Status::Processing) {
-                tasks -= &processing_tasks;
+                tasks -= &*processing_tasks;
            }
            tasks &= status_tasks;
        }
@ -882,7 +842,7 @@ impl IndexScheduler {
        // Once we have filtered the two subsets, we put them back together and assign it back to `tasks`.
        tasks = {
            let (mut filtered_non_processing_tasks, mut filtered_processing_tasks) =
-                (&tasks - &processing_tasks, &tasks & &processing_tasks);
+                (&tasks - &*processing_tasks, &tasks & &*processing_tasks);

            // special case for Processing tasks
            // A closure that clears the filtered_processing_tasks if their started_at date falls outside the given bounds
@ -1090,7 +1050,7 @@ impl IndexScheduler {
        // Once we have filtered the two subsets, we put them back together and assign it back to `batches`.
        batches = {
            let (mut filtered_non_processing_batches, mut filtered_processing_batches) =
-                (&batches - &processing.processing, &batches & &processing.processing);
+                (&batches - &*processing.processing, &batches & &*processing.processing);

            // special case for Processing batches
            // A closure that clears the filtered_processing_batches if their started_at date falls outside the given bounds
@ -1606,7 +1566,8 @@ impl IndexScheduler {

        // We reset the must_stop flag to be sure that we don't stop processing tasks
        self.must_stop_processing.reset();
-        self.processing_tasks
+        let progress = self
+            .processing_tasks
            .write()
            .unwrap()
            // We can clone the processing batch here because we don't want its modification to affect the view of the processing batches
@ -1619,11 +1580,12 @@ impl IndexScheduler {
        let res = {
            let cloned_index_scheduler = self.private_clone();
            let processing_batch = &mut processing_batch;
+            let progress = progress.clone();
            std::thread::scope(|s| {
                let handle = std::thread::Builder::new()
                    .name(String::from("batch-operation"))
                    .spawn_scoped(s, move || {
-                        cloned_index_scheduler.process_batch(batch, processing_batch)
+                        cloned_index_scheduler.process_batch(batch, processing_batch, progress)
                    })
                    .unwrap();
                handle.join().unwrap_or(Err(Error::ProcessBatchPanicked))
@ -1636,6 +1598,7 @@ impl IndexScheduler {
        #[cfg(test)]
        self.maybe_fail(tests::FailureLocation::AcquiringWtxn)?;

+        progress.update_progress(BatchProgress::WritingTasksToDisk);
        processing_batch.finished();
        let mut wtxn = self.env.write_txn().map_err(Error::HeedTransaction)?;
        let mut canceled = RoaringBitmap::new();
@ -1645,12 +1608,15 @@ impl IndexScheduler {
                #[cfg(test)]
                self.breakpoint(Breakpoint::ProcessBatchSucceeded);

+                let (task_progress, task_progress_obj) = AtomicTaskStep::new(tasks.len() as u32);
+                progress.update_progress(task_progress_obj);
                let mut success = 0;
                let mut failure = 0;
                let mut canceled_by = None;

                #[allow(unused_variables)]
                for (i, mut task) in tasks.into_iter().enumerate() {
+                    task_progress.fetch_add(1, Ordering::Relaxed);
                    processing_batch.update(&mut task);
                    if task.status == Status::Canceled {
                        canceled.insert(task.uid);
@ -1718,8 +1684,12 @@ impl IndexScheduler {
            Err(err) => {
                #[cfg(test)]
                self.breakpoint(Breakpoint::ProcessBatchFailed);
+                let (task_progress, task_progress_obj) = AtomicTaskStep::new(ids.len() as u32);
+                progress.update_progress(task_progress_obj);
+
                let error: ResponseError = err.into();
                for id in ids.iter() {
+                    task_progress.fetch_add(1, Ordering::Relaxed);
                    let mut task = self
                        .get_task(&wtxn, id)
                        .map_err(|e| Error::TaskDatabaseUpdate(Box::new(e)))?
--- a/crates/index-scheduler/src/processing.rs
+++ b/crates/index-scheduler/src/processing.rs
@ -0,0 +1,316 @@
+use std::borrow::Cow;
+use std::sync::Arc;
+
+use enum_iterator::Sequence;
+use meilisearch_types::milli::progress::{AtomicSubStep, NamedStep, Progress, ProgressView, Step};
+use meilisearch_types::milli::{make_atomic_progress, make_enum_progress};
+use roaring::RoaringBitmap;
+
+use crate::utils::ProcessingBatch;
+
+#[derive(Clone)]
+pub struct ProcessingTasks {
+    pub batch: Option<Arc<ProcessingBatch>>,
+    /// The list of tasks ids that are currently running.
+    pub processing: Arc<RoaringBitmap>,
+    /// The progress on processing tasks
+    pub progress: Option<Progress>,
+}
+
+impl ProcessingTasks {
+    /// Creates an empty `ProcessingAt` struct.
+    pub fn new() -> ProcessingTasks {
+        ProcessingTasks { batch: None, processing: Arc::new(RoaringBitmap::new()), progress: None }
+    }
+
+    pub fn get_progress_view(&self) -> Option<ProgressView> {
+        Some(self.progress.as_ref()?.as_progress_view())
+    }
+
+    /// Stores the currently processing tasks, and the date time at which it started.
+    pub fn start_processing(
+        &mut self,
+        processing_batch: ProcessingBatch,
+        processing: RoaringBitmap,
+    ) -> Progress {
+        self.batch = Some(Arc::new(processing_batch));
+        self.processing = Arc::new(processing);
+        let progress = Progress::default();
+        progress.update_progress(BatchProgress::ProcessingTasks);
+        self.progress = Some(progress.clone());
+
+        progress
+    }
+
+    /// Set the processing tasks to an empty list
+    pub fn stop_processing(&mut self) -> Self {
+        self.progress = None;
+
+        Self {
+            batch: std::mem::take(&mut self.batch),
+            processing: std::mem::take(&mut self.processing),
+            progress: None,
+        }
+    }
+
+    /// Returns `true` if there, at least, is one task that is currently processing that we must stop.
+    pub fn must_cancel_processing_tasks(&self, canceled_tasks: &RoaringBitmap) -> bool {
+        !self.processing.is_disjoint(canceled_tasks)
+    }
+}
+
+make_enum_progress! {
+    pub enum BatchProgress {
+        ProcessingTasks,
+        WritingTasksToDisk,
+    }
+}
+
+make_enum_progress! {
+    pub enum TaskCancelationProgress {
+        RetrievingTasks,
+        UpdatingTasks,
+    }
+}
+
+make_enum_progress! {
+    pub enum TaskDeletionProgress {
+        DeletingTasksDateTime,
+        DeletingTasksMetadata,
+        DeletingTasks,
+        DeletingBatches,
+    }
+}
+
+make_enum_progress! {
+    pub enum SnapshotCreationProgress {
+        StartTheSnapshotCreation,
+        SnapshotTheIndexScheduler,
+        SnapshotTheUpdateFiles,
+        SnapshotTheIndexes,
+        SnapshotTheApiKeys,
+        CreateTheTarball,
+    }
+}
+
+make_enum_progress! {
+    pub enum DumpCreationProgress {
+        StartTheDumpCreation,
+        DumpTheApiKeys,
+        DumpTheTasks,
+        DumpTheIndexes,
+        DumpTheExperimentalFeatures,
+        CompressTheDump,
+    }
+}
+
+make_enum_progress! {
+    pub enum CreateIndexProgress {
+        CreatingTheIndex,
+    }
+}
+
+make_enum_progress! {
+    pub enum UpdateIndexProgress {
+        UpdatingTheIndex,
+    }
+}
+
+make_enum_progress! {
+    pub enum DeleteIndexProgress {
+        DeletingTheIndex,
+    }
+}
+
+make_enum_progress! {
+    pub enum SwappingTheIndexes {
+        EnsuringCorrectnessOfTheSwap,
+        SwappingTheIndexes,
+    }
+}
+
+make_enum_progress! {
+    pub enum InnerSwappingTwoIndexes {
+        RetrieveTheTasks,
+        UpdateTheTasks,
+        UpdateTheIndexesMetadata,
+    }
+}
+
+make_enum_progress! {
+    pub enum DocumentOperationProgress {
+        RetrievingConfig,
+        ComputingDocumentChanges,
+        Indexing,
+    }
+}
+
+make_enum_progress! {
+    pub enum DocumentEditionProgress {
+        RetrievingConfig,
+        ComputingDocumentChanges,
+        Indexing,
+    }
+}
+
+make_enum_progress! {
+    pub enum DocumentDeletionProgress {
+        RetrievingConfig,
+        DeleteDocuments,
+        Indexing,
+    }
+}
+
+make_enum_progress! {
+    pub enum SettingsProgress {
+        RetrievingAndMergingTheSettings,
+        ApplyTheSettings,
+    }
+}
+
+make_atomic_progress!(Task alias AtomicTaskStep => "task" );
+make_atomic_progress!(Document alias AtomicDocumentStep => "document" );
+make_atomic_progress!(Batch alias AtomicBatchStep => "batch" );
+make_atomic_progress!(UpdateFile alias AtomicUpdateFileStep => "update file" );
+
+pub struct VariableNameStep {
+    name: String,
+    current: u32,
+    total: u32,
+}
+
+impl VariableNameStep {
+    pub fn new(name: impl Into<String>, current: u32, total: u32) -> Self {
+        Self { name: name.into(), current, total }
+    }
+}
+
+impl Step for VariableNameStep {
+    fn name(&self) -> Cow<'static, str> {
+        self.name.clone().into()
+    }
+
+    fn current(&self) -> u32 {
+        self.current
+    }
+
+    fn total(&self) -> u32 {
+        self.total
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use std::sync::atomic::Ordering;
+
+    use meili_snap::{json_string, snapshot};
+
+    use super::*;
+
+    #[test]
+    fn one_level() {
+        let mut processing = ProcessingTasks::new();
+        processing.start_processing(ProcessingBatch::new(0), RoaringBitmap::new());
+        snapshot!(json_string!(processing.get_progress_view()), @r#"
+        {
+          "steps": [
+            {
+              "currentStep": "processing tasks",
+              "finished": 0,
+              "total": 2
+            }
+          ],
+          "percentage": 0.0
+        }
+        "#);
+        processing.progress.as_ref().unwrap().update_progress(BatchProgress::WritingTasksToDisk);
+        snapshot!(json_string!(processing.get_progress_view()), @r#"
+        {
+          "steps": [
+            {
+              "currentStep": "writing tasks to disk",
+              "finished": 1,
+              "total": 2
+            }
+          ],
+          "percentage": 50.0
+        }
+        "#);
+    }
+
+    #[test]
+    fn task_progress() {
+        let mut processing = ProcessingTasks::new();
+        processing.start_processing(ProcessingBatch::new(0), RoaringBitmap::new());
+        let (atomic, tasks) = AtomicTaskStep::new(10);
+        processing.progress.as_ref().unwrap().update_progress(tasks);
+        snapshot!(json_string!(processing.get_progress_view()), @r#"
+        {
+          "steps": [
+            {
+              "currentStep": "processing tasks",
+              "finished": 0,
+              "total": 2
+            },
+            {
+              "currentStep": "task",
+              "finished": 0,
+              "total": 10
+            }
+          ],
+          "percentage": 0.0
+        }
+        "#);
+        atomic.fetch_add(6, Ordering::Relaxed);
+        snapshot!(json_string!(processing.get_progress_view()), @r#"
+        {
+          "steps": [
+            {
+              "currentStep": "processing tasks",
+              "finished": 0,
+              "total": 2
+            },
+            {
+              "currentStep": "task",
+              "finished": 6,
+              "total": 10
+            }
+          ],
+          "percentage": 30.000002
+        }
+        "#);
+        processing.progress.as_ref().unwrap().update_progress(BatchProgress::WritingTasksToDisk);
+        snapshot!(json_string!(processing.get_progress_view()), @r#"
+        {
+          "steps": [
+            {
+              "currentStep": "writing tasks to disk",
+              "finished": 1,
+              "total": 2
+            }
+          ],
+          "percentage": 50.0
+        }
+        "#);
+        let (atomic, tasks) = AtomicTaskStep::new(5);
+        processing.progress.as_ref().unwrap().update_progress(tasks);
+        atomic.fetch_add(4, Ordering::Relaxed);
+        snapshot!(json_string!(processing.get_progress_view()), @r#"
+        {
+          "steps": [
+            {
+              "currentStep": "writing tasks to disk",
+              "finished": 1,
+              "total": 2
+            },
+            {
+              "currentStep": "task",
+              "finished": 4,
+              "total": 5
+            }
+          ],
+          "percentage": 90.0
+        }
+        "#);
+    }
+}
--- a/crates/index-scheduler/src/utils.rs
+++ b/crates/index-scheduler/src/utils.rs
@ -134,6 +134,7 @@ impl ProcessingBatch {
    pub fn to_batch(&self) -> Batch {
        Batch {
            uid: self.uid,
+            progress: None,
            details: self.details.clone(),
            stats: self.stats.clone(),
            started_at: self.started_at,
@ -187,6 +188,7 @@ impl IndexScheduler {
            &batch.uid,
            &Batch {
                uid: batch.uid,
+                progress: None,
                details: batch.details,
                stats: batch.stats,
                started_at: batch.started_at,
@ -273,7 +275,9 @@ impl IndexScheduler {
            .into_iter()
            .map(|batch_id| {
                if Some(batch_id) == processing.batch.as_ref().map(|batch| batch.uid) {
-                    Ok(processing.batch.as_ref().unwrap().to_batch())
+                    let mut batch = processing.batch.as_ref().unwrap().to_batch();
+                    batch.progress = processing.get_progress_view();
+                    Ok(batch)
                } else {
                    self.get_batch(rtxn, batch_id)
                        .and_then(|task| task.ok_or(Error::CorruptedTaskQueue))
--- a/crates/meilisearch-types/Cargo.toml
+++ b/crates/meilisearch-types/Cargo.toml
@ -24,8 +24,9 @@ flate2 = "1.0.30"
 fst = "0.4.7"
 memmap2 = "0.9.4"
 milli = { path = "../milli" }
-raw-collections = { git = "https://github.com/meilisearch/raw-collections.git", version = "0.1.0" }
+bumparaw-collections = "0.1.2"
 roaring = { version = "0.10.7", features = ["serde"] }
+rustc-hash = "2.1.0"
 serde = { version = "1.0.204", features = ["derive"] }
 serde-cs = "0.2.4"
 serde_json = "1.0.120"
--- a/crates/meilisearch-types/src/batch_view.rs
+++ b/crates/meilisearch-types/src/batch_view.rs
@ -1,16 +1,16 @@
+use milli::progress::ProgressView;
 use serde::Serialize;
 use time::{Duration, OffsetDateTime};

-use crate::{
-    batches::{Batch, BatchId, BatchStats},
-    task_view::DetailsView,
-    tasks::serialize_duration,
-};
+use crate::batches::{Batch, BatchId, BatchStats};
+use crate::task_view::DetailsView;
+use crate::tasks::serialize_duration;

 #[derive(Debug, Clone, Serialize)]
 #[serde(rename_all = "camelCase")]
 pub struct BatchView {
    pub uid: BatchId,
+    pub progress: Option<ProgressView>,
    pub details: DetailsView,
    pub stats: BatchStats,
    #[serde(serialize_with = "serialize_duration", default)]
@ -25,6 +25,7 @@ impl BatchView {
    pub fn from_batch(batch: &Batch) -> Self {
        Self {
            uid: batch.uid,
+            progress: batch.progress.clone(),
            details: batch.details.clone(),
            stats: batch.stats.clone(),
            duration: batch.finished_at.map(|finished_at| finished_at - batch.started_at),
--- a/crates/meilisearch-types/src/batches.rs
+++ b/crates/meilisearch-types/src/batches.rs
@ -1,12 +1,11 @@
 use std::collections::BTreeMap;

+use milli::progress::ProgressView;
 use serde::{Deserialize, Serialize};
 use time::OffsetDateTime;

-use crate::{
-    task_view::DetailsView,
-    tasks::{Kind, Status},
-};
+use crate::task_view::DetailsView;
+use crate::tasks::{Kind, Status};

 pub type BatchId = u32;

@ -15,6 +14,8 @@ pub type BatchId = u32;
 pub struct Batch {
    pub uid: BatchId,

+    #[serde(skip)]
+    pub progress: Option<ProgressView>,
    pub details: DetailsView,
    pub stats: BatchStats,

--- a/crates/meilisearch-types/src/document_formats.rs
+++ b/crates/meilisearch-types/src/document_formats.rs
@ -4,10 +4,11 @@ use std::io::{self, BufWriter};
 use std::marker::PhantomData;

 use bumpalo::Bump;
+use bumparaw_collections::RawMap;
 use memmap2::Mmap;
 use milli::documents::Error;
 use milli::Object;
-use raw_collections::RawMap;
+use rustc_hash::FxBuildHasher;
 use serde::de::{SeqAccess, Visitor};
 use serde::{Deserialize, Deserializer};
 use serde_json::error::Category;
@ -220,7 +221,7 @@ pub fn read_json(input: &File, output: impl io::Write) -> Result<u64> {
    let mut deserializer = serde_json::Deserializer::from_slice(&input);
    let res = array_each(&mut deserializer, |obj: &RawValue| {
        doc_alloc.reset();
-        let map = RawMap::from_raw_value(obj, &doc_alloc)?;
+        let map = RawMap::from_raw_value_and_hasher(obj, FxBuildHasher, &doc_alloc)?;
        to_writer(&mut out, &map)
    });
    let count = match res {
@ -250,26 +251,25 @@ pub fn read_json(input: &File, output: impl io::Write) -> Result<u64> {
    }
 }

-/// Reads NDJSON from file and write it in NDJSON in a file checking it along the way.
-pub fn read_ndjson(input: &File, output: impl io::Write) -> Result<u64> {
+/// Reads NDJSON from file and checks it.
+pub fn read_ndjson(input: &File) -> Result<u64> {
    // We memory map to be able to deserialize into a RawMap that
    // does not allocate when possible and only materialize the first/top level.
    let input = unsafe { Mmap::map(input).map_err(DocumentFormatError::Io)? };
-    let mut output = BufWriter::new(output);
-
    let mut bump = Bump::with_capacity(1024 * 1024);

    let mut count = 0;
    for result in serde_json::Deserializer::from_slice(&input).into_iter() {
        bump.reset();
-        count += 1;
-        result
-            .and_then(|raw: &RawValue| {
+        match result {
+            Ok(raw) => {
                // try to deserialize as a map
-                let map = RawMap::from_raw_value(raw, &bump)?;
-                to_writer(&mut output, &map)
-            })
-            .map_err(|e| DocumentFormatError::from((PayloadType::Ndjson, e)))?;
+                RawMap::from_raw_value_and_hasher(raw, FxBuildHasher, &bump)
+                    .map_err(|e| DocumentFormatError::from((PayloadType::Ndjson, e)))?;
+                count += 1;
+            }
+            Err(e) => return Err(DocumentFormatError::from((PayloadType::Ndjson, e))),
+        }
    }

    Ok(count)
--- a/crates/meilisearch-types/src/error.rs
+++ b/crates/meilisearch-types/src/error.rs
@ -550,7 +550,7 @@ impl fmt::Display for deserr_codes::InvalidSimilarId {
            "the value of `id` is invalid. \
            A document identifier can be of type integer or string, \
            only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), \
-            and can not be more than 512 bytes."
+            and can not be more than 511 bytes."
        )
    }
 }
--- a/crates/meilisearch-types/src/tasks.rs
+++ b/crates/meilisearch-types/src/tasks.rs
@ -4,7 +4,6 @@ use std::fmt::{Display, Write};
 use std::str::FromStr;

 use enum_iterator::Sequence;
-use milli::update::new::indexer::document_changes::Progress;
 use milli::update::IndexDocumentsMethod;
 use milli::Object;
 use roaring::RoaringBitmap;
@ -41,62 +40,6 @@ pub struct Task {
    pub kind: KindWithContent,
 }

-#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]
-#[serde(rename_all = "camelCase")]
-pub struct TaskProgress {
-    pub current_step: &'static str,
-    pub finished_steps: u16,
-    pub total_steps: u16,
-    pub finished_substeps: Option<u32>,
-    pub total_substeps: Option<u32>,
-}
-
-impl Default for TaskProgress {
-    fn default() -> Self {
-        Self::new()
-    }
-}
-
-impl TaskProgress {
-    pub fn new() -> Self {
-        Self {
-            current_step: "start",
-            finished_steps: 0,
-            total_steps: 1,
-            finished_substeps: None,
-            total_substeps: None,
-        }
-    }
-
-    pub fn update(&mut self, progress: Progress) -> TaskProgress {
-        if self.finished_steps > progress.finished_steps {
-            return *self;
-        }
-
-        if self.current_step != progress.step_name {
-            self.current_step = progress.step_name
-        }
-
-        self.total_steps = progress.total_steps;
-
-        if self.finished_steps < progress.finished_steps {
-            self.finished_substeps = None;
-            self.total_substeps = None;
-        }
-        self.finished_steps = progress.finished_steps;
-        if let Some((finished_substeps, total_substeps)) = progress.finished_total_substep {
-            if let Some(task_finished_substeps) = self.finished_substeps {
-                if task_finished_substeps > finished_substeps {
-                    return *self;
-                }
-            }
-            self.finished_substeps = Some(finished_substeps);
-            self.total_substeps = Some(total_substeps);
-        }
-        *self
-    }
-}
-
 impl Task {
    pub fn index_uid(&self) -> Option<&str> {
        use KindWithContent::*;
--- a/crates/meilisearch/src/main.rs
+++ b/crates/meilisearch/src/main.rs
@ -129,6 +129,11 @@ async fn try_main() -> anyhow::Result<()> {

    print_launch_resume(&opt, analytics.clone(), config_read_from);

+    tokio::spawn(async move {
+        tokio::signal::ctrl_c().await.unwrap();
+        std::process::exit(130);
+    });
+
    run_http(index_scheduler, auth_controller, opt, log_handle, Arc::new(analytics)).await?;

    Ok(())
--- a/crates/meilisearch/src/routes/batches.rs
+++ b/crates/meilisearch/src/routes/batches.rs
@ -1,18 +1,18 @@
-use actix_web::{
-    web::{self, Data},
-    HttpResponse,
-};
+use actix_web::web::{self, Data};
+use actix_web::HttpResponse;
 use deserr::actix_web::AwebQueryParameter;
 use index_scheduler::{IndexScheduler, Query};
-use meilisearch_types::{
-    batch_view::BatchView, batches::BatchId, deserr::DeserrQueryParamError, error::ResponseError,
-    keys::actions,
-};
+use meilisearch_types::batch_view::BatchView;
+use meilisearch_types::batches::BatchId;
+use meilisearch_types::deserr::DeserrQueryParamError;
+use meilisearch_types::error::ResponseError;
+use meilisearch_types::keys::actions;
 use serde::Serialize;

-use crate::extractors::{authentication::GuardedData, sequential_extractor::SeqHandler};
-
-use super::{tasks::TasksFilterQuery, ActionPolicy};
+use super::tasks::TasksFilterQuery;
+use super::ActionPolicy;
+use crate::extractors::authentication::GuardedData;
+use crate::extractors::sequential_extractor::SeqHandler;

 pub fn configure(cfg: &mut web::ServiceConfig) {
    cfg.service(web::resource("").route(web::get().to(SeqHandler(get_batches))))
--- a/crates/meilisearch/src/routes/indexes/documents.rs
+++ b/crates/meilisearch/src/routes/indexes/documents.rs
@ -1,5 +1,5 @@
 use std::collections::HashSet;
-use std::io::ErrorKind;
+use std::io::{ErrorKind, Seek as _};
 use std::marker::PhantomData;

 use actix_web::http::header::CONTENT_TYPE;
@ -572,7 +572,7 @@ async fn document_addition(
    index_uid: IndexUid,
    primary_key: Option<String>,
    csv_delimiter: Option<u8>,
-    mut body: Payload,
+    body: Payload,
    method: IndexDocumentsMethod,
    task_id: Option<TaskId>,
    dry_run: bool,
@ -609,54 +609,60 @@ async fn document_addition(
    };

    let (uuid, mut update_file) = index_scheduler.create_update_file(dry_run)?;
+    let documents_count = match format {
+        PayloadType::Ndjson => {
+            let (path, file) = update_file.into_parts();
+            let file = match file {
+                Some(file) => {
+                    let (file, path) = file.into_parts();
+                    let mut file = copy_body_to_file(file, body, format).await?;
+                    file.rewind().map_err(|e| {
+                        index_scheduler::Error::FileStore(file_store::Error::IoError(e))
+                    })?;
+                    Some(tempfile::NamedTempFile::from_parts(file, path))
+                }
+                None => None,
+            };

-    let temp_file = match tempfile() {
-        Ok(file) => file,
-        Err(e) => return Err(MeilisearchHttpError::Payload(ReceivePayload(Box::new(e)))),
+            let documents_count = tokio::task::spawn_blocking(move || {
+                let documents_count = file.as_ref().map_or(Ok(0), |ntf| {
+                    read_ndjson(ntf.as_file()).map_err(MeilisearchHttpError::DocumentFormat)
+                })?;
+
+                let update_file = file_store::File::from_parts(path, file);
+                update_file.persist()?;
+
+                Ok(documents_count)
+            })
+            .await?;
+
+            Ok(documents_count)
+        }
+        PayloadType::Json | PayloadType::Csv { delimiter: _ } => {
+            let temp_file = match tempfile() {
+                Ok(file) => file,
+                Err(e) => return Err(MeilisearchHttpError::Payload(ReceivePayload(Box::new(e)))),
+            };
+
+            let read_file = copy_body_to_file(temp_file, body, format).await?;
+            tokio::task::spawn_blocking(move || {
+                let documents_count = match format {
+                    PayloadType::Json => read_json(&read_file, &mut update_file)?,
+                    PayloadType::Csv { delimiter } => {
+                        read_csv(&read_file, &mut update_file, delimiter)?
+                    }
+                    PayloadType::Ndjson => {
+                        unreachable!("We already wrote the user content into the update file")
+                    }
+                };
+                // we NEED to persist the file here because we moved the `udpate_file` in another task.
+                update_file.persist()?;
+                Ok(documents_count)
+            })
+            .await
+        }
    };

-    let async_file = File::from_std(temp_file);
-    let mut buffer = BufWriter::new(async_file);
-
-    let mut buffer_write_size: usize = 0;
-    while let Some(result) = body.next().await {
-        let byte = result?;
-
-        if byte.is_empty() && buffer_write_size == 0 {
-            return Err(MeilisearchHttpError::MissingPayload(format));
-        }
-
-        match buffer.write_all(&byte).await {
-            Ok(()) => buffer_write_size += 1,
-            Err(e) => return Err(MeilisearchHttpError::Payload(ReceivePayload(Box::new(e)))),
-        }
-    }
-
-    if let Err(e) = buffer.flush().await {
-        return Err(MeilisearchHttpError::Payload(ReceivePayload(Box::new(e))));
-    }
-
-    if buffer_write_size == 0 {
-        return Err(MeilisearchHttpError::MissingPayload(format));
-    }
-
-    if let Err(e) = buffer.seek(std::io::SeekFrom::Start(0)).await {
-        return Err(MeilisearchHttpError::Payload(ReceivePayload(Box::new(e))));
-    }
-
-    let read_file = buffer.into_inner().into_std().await;
-    let documents_count = tokio::task::spawn_blocking(move || {
-        let documents_count = match format {
-            PayloadType::Json => read_json(&read_file, &mut update_file)?,
-            PayloadType::Csv { delimiter } => read_csv(&read_file, &mut update_file, delimiter)?,
-            PayloadType::Ndjson => read_ndjson(&read_file, &mut update_file)?,
-        };
-        // we NEED to persist the file here because we moved the `udpate_file` in another task.
-        update_file.persist()?;
-        Ok(documents_count)
-    })
-    .await;
-
    let documents_count = match documents_count {
        Ok(Ok(documents_count)) => documents_count,
        // in this case the file has not possibly be persisted.
@ -703,6 +709,39 @@ async fn document_addition(
    Ok(task.into())
 }

+async fn copy_body_to_file(
+    output: std::fs::File,
+    mut body: Payload,
+    format: PayloadType,
+) -> Result<std::fs::File, MeilisearchHttpError> {
+    let async_file = File::from_std(output);
+    let mut buffer = BufWriter::new(async_file);
+    let mut buffer_write_size: usize = 0;
+    while let Some(result) = body.next().await {
+        let byte = result?;
+
+        if byte.is_empty() && buffer_write_size == 0 {
+            return Err(MeilisearchHttpError::MissingPayload(format));
+        }
+
+        match buffer.write_all(&byte).await {
+            Ok(()) => buffer_write_size += 1,
+            Err(e) => return Err(MeilisearchHttpError::Payload(ReceivePayload(Box::new(e)))),
+        }
+    }
+    if let Err(e) = buffer.flush().await {
+        return Err(MeilisearchHttpError::Payload(ReceivePayload(Box::new(e))));
+    }
+    if buffer_write_size == 0 {
+        return Err(MeilisearchHttpError::MissingPayload(format));
+    }
+    if let Err(e) = buffer.seek(std::io::SeekFrom::Start(0)).await {
+        return Err(MeilisearchHttpError::Payload(ReceivePayload(Box::new(e))));
+    }
+    let read_file = buffer.into_inner().into_std().await;
+    Ok(read_file)
+}
+
 pub async fn delete_documents_batch(
    index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_DELETE }>, Data<IndexScheduler>>,
    index_uid: web::Path<String>,
--- a/crates/meilisearch/tests/batches/mod.rs
+++ b/crates/meilisearch/tests/batches/mod.rs
@ -284,6 +284,7 @@ async fn test_summarized_document_addition_or_update() {
        @r#"
    {
      "uid": 0,
+      "progress": null,
      "details": {
        "receivedDocuments": 1,
        "indexedDocuments": 1
@ -314,6 +315,7 @@ async fn test_summarized_document_addition_or_update() {
        @r#"
    {
      "uid": 1,
+      "progress": null,
      "details": {
        "receivedDocuments": 1,
        "indexedDocuments": 1
@ -349,6 +351,7 @@ async fn test_summarized_delete_documents_by_batch() {
        @r#"
    {
      "uid": 0,
+      "progress": null,
      "details": {
        "providedIds": 3,
        "deletedDocuments": 0
@ -380,6 +383,7 @@ async fn test_summarized_delete_documents_by_batch() {
        @r#"
    {
      "uid": 2,
+      "progress": null,
      "details": {
        "providedIds": 1,
        "deletedDocuments": 0
@ -416,6 +420,7 @@ async fn test_summarized_delete_documents_by_filter() {
        @r#"
    {
      "uid": 0,
+      "progress": null,
      "details": {
        "providedIds": 0,
        "deletedDocuments": 0,
@ -448,6 +453,7 @@ async fn test_summarized_delete_documents_by_filter() {
        @r#"
    {
      "uid": 2,
+      "progress": null,
      "details": {
        "providedIds": 0,
        "deletedDocuments": 0,
@ -480,6 +486,7 @@ async fn test_summarized_delete_documents_by_filter() {
        @r#"
    {
      "uid": 4,
+      "progress": null,
      "details": {
        "providedIds": 0,
        "deletedDocuments": 0,
@ -516,6 +523,7 @@ async fn test_summarized_delete_document_by_id() {
        @r#"
    {
      "uid": 0,
+      "progress": null,
      "details": {
        "providedIds": 1,
        "deletedDocuments": 0
@ -547,6 +555,7 @@ async fn test_summarized_delete_document_by_id() {
        @r#"
    {
      "uid": 2,
+      "progress": null,
      "details": {
        "providedIds": 1,
        "deletedDocuments": 0
@ -594,6 +603,7 @@ async fn test_summarized_settings_update() {
        @r#"
    {
      "uid": 0,
+      "progress": null,
      "details": {
        "displayedAttributes": [
          "doggos",
@ -638,6 +648,7 @@ async fn test_summarized_index_creation() {
        @r#"
    {
      "uid": 0,
+      "progress": null,
      "details": {},
      "stats": {
        "totalNbTasks": 1,
@ -665,6 +676,7 @@ async fn test_summarized_index_creation() {
        @r#"
    {
      "uid": 1,
+      "progress": null,
      "details": {
        "primaryKey": "doggos"
      },
@ -809,6 +821,7 @@ async fn test_summarized_index_update() {
        @r#"
    {
      "uid": 0,
+      "progress": null,
      "details": {},
      "stats": {
        "totalNbTasks": 1,
@ -836,6 +849,7 @@ async fn test_summarized_index_update() {
        @r#"
    {
      "uid": 1,
+      "progress": null,
      "details": {
        "primaryKey": "bones"
      },
@ -868,6 +882,7 @@ async fn test_summarized_index_update() {
        @r#"
    {
      "uid": 3,
+      "progress": null,
      "details": {},
      "stats": {
        "totalNbTasks": 1,
@ -895,6 +910,7 @@ async fn test_summarized_index_update() {
        @r#"
    {
      "uid": 4,
+      "progress": null,
      "details": {
        "primaryKey": "bones"
      },
@ -932,6 +948,7 @@ async fn test_summarized_index_swap() {
        @r#"
    {
      "uid": 0,
+      "progress": null,
      "details": {
        "swaps": [
          {
@ -972,6 +989,7 @@ async fn test_summarized_index_swap() {
        @r#"
    {
      "uid": 3,
+      "progress": null,
      "details": {
        "swaps": [
          {
@ -1014,6 +1032,7 @@ async fn test_summarized_batch_cancelation() {
        @r#"
    {
      "uid": 1,
+      "progress": null,
      "details": {
        "matchedTasks": 1,
        "canceledTasks": 0,
@ -1051,6 +1070,7 @@ async fn test_summarized_batch_deletion() {
        @r#"
    {
      "uid": 1,
+      "progress": null,
      "details": {
        "matchedTasks": 1,
        "deletedTasks": 1,
@ -1084,6 +1104,7 @@ async fn test_summarized_dump_creation() {
        @r#"
    {
      "uid": 0,
+      "progress": null,
      "details": {
        "dumpUid": "[dumpUid]"
      },
--- a/crates/meilisearch/tests/documents/add_documents.rs
+++ b/crates/meilisearch/tests/documents/add_documents.rs
@ -1264,15 +1264,18 @@ async fn error_add_documents_bad_document_id() {
    let server = Server::new().await;
    let index = server.index("test");
    index.create(Some("docid")).await;
+
+    // unsupported characters
+
    let documents = json!([
        {
            "docid": "foo & bar",
            "content": "foobar"
        }
    ]);
-    index.add_documents(documents, None).await;
-    index.wait_task(1).await;
-    let (response, code) = index.get_task(1).await;
+    let (value, _code) = index.add_documents(documents, None).await;
+    index.wait_task(value.uid()).await;
+    let (response, code) = index.get_task(value.uid()).await;
    snapshot!(code, @"200 OK");
    snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }),
        @r###"
@ -1288,7 +1291,81 @@ async fn error_add_documents_bad_document_id() {
        "indexedDocuments": 0
      },
      "error": {
-        "message": "Document identifier `\"foo & bar\"` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 512 bytes.",
+        "message": "Document identifier `\"foo & bar\"` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 511 bytes.",
+        "code": "invalid_document_id",
+        "type": "invalid_request",
+        "link": "https://docs.meilisearch.com/errors#invalid_document_id"
+      },
+      "duration": "[duration]",
+      "enqueuedAt": "[date]",
+      "startedAt": "[date]",
+      "finishedAt": "[date]"
+    }
+    "###);
+
+    // More than 512 bytes
+    let documents = json!([
+        {
+            "docid": "a".repeat(600),
+            "content": "foobar"
+        }
+    ]);
+    let (value, _code) = index.add_documents(documents, None).await;
+    index.wait_task(value.uid()).await;
+    let (response, code) = index.get_task(value.uid()).await;
+    snapshot!(code, @"200 OK");
+    snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }),
+      @r###"
+    {
+      "uid": 2,
+      "batchUid": 2,
+      "indexUid": "test",
+      "status": "failed",
+      "type": "documentAdditionOrUpdate",
+      "canceledBy": null,
+      "details": {
+        "receivedDocuments": 1,
+        "indexedDocuments": 0
+      },
+      "error": {
+        "message": "Document identifier `\"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\"` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 511 bytes.",
+        "code": "invalid_document_id",
+        "type": "invalid_request",
+        "link": "https://docs.meilisearch.com/errors#invalid_document_id"
+      },
+      "duration": "[duration]",
+      "enqueuedAt": "[date]",
+      "startedAt": "[date]",
+      "finishedAt": "[date]"
+    }
+    "###);
+
+    // Exactly 512 bytes
+    let documents = json!([
+        {
+            "docid": "a".repeat(512),
+            "content": "foobar"
+        }
+    ]);
+    let (value, _code) = index.add_documents(documents, None).await;
+    index.wait_task(value.uid()).await;
+    let (response, code) = index.get_task(value.uid()).await;
+    snapshot!(code, @"200 OK");
+    snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }),
+    @r###"
+    {
+      "uid": 3,
+      "batchUid": 3,
+      "indexUid": "test",
+      "status": "failed",
+      "type": "documentAdditionOrUpdate",
+      "canceledBy": null,
+      "details": {
+        "receivedDocuments": 1,
+        "indexedDocuments": 0
+      },
+      "error": {
+        "message": "Document identifier `\"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\"` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 511 bytes.",
        "code": "invalid_document_id",
        "type": "invalid_request",
        "link": "https://docs.meilisearch.com/errors#invalid_document_id"
--- a/crates/meilisearch/tests/documents/update_documents.rs
+++ b/crates/meilisearch/tests/documents/update_documents.rs
@ -172,7 +172,7 @@ async fn error_update_documents_bad_document_id() {
    assert_eq!(
        response["error"]["message"],
        json!(
-            r#"Document identifier `"foo & bar"` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 512 bytes."#
+            r#"Document identifier `"foo & bar"` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 511 bytes."#
        )
    );
    assert_eq!(response["error"]["code"], json!("invalid_document_id"));
--- a/crates/meilisearch/tests/search/facet_search.rs
+++ b/crates/meilisearch/tests/search/facet_search.rs
@ -57,6 +57,116 @@ async fn simple_facet_search() {
    assert_eq!(response["facetHits"].as_array().unwrap().len(), 1);
 }

+#[actix_rt::test]
+async fn simple_facet_search_on_movies() {
+    let server = Server::new().await;
+    let index = server.index("test");
+
+    let documents = json!([
+      {
+        "id": 1,
+        "title": "Carol",
+        "genres": [
+          "Romance",
+          "Drama"
+        ],
+        "color": [
+          "red"
+        ],
+        "platforms": [
+          "MacOS",
+          "Linux",
+          "Windows"
+        ]
+      },
+      {
+        "id": 2,
+        "title": "Wonder Woman",
+        "genres": [
+          "Action",
+          "Adventure"
+        ],
+        "color": [
+          "green"
+        ],
+        "platforms": [
+          "MacOS"
+        ]
+      },
+      {
+        "id": 3,
+        "title": "Life of Pi",
+        "genres": [
+          "Adventure",
+          "Drama"
+        ],
+        "color": [
+          "blue"
+        ],
+        "platforms": [
+          "Windows"
+        ]
+      },
+      {
+        "id": 4,
+        "title": "Mad Max: Fury Road",
+        "genres": [
+          "Adventure",
+          "Science Fiction"
+        ],
+        "color": [
+          "red"
+        ],
+        "platforms": [
+          "MacOS",
+          "Linux"
+        ]
+      },
+      {
+        "id": 5,
+        "title": "Moana",
+        "genres": [
+          "Fantasy",
+          "Action"
+        ],
+        "color": [
+          "red"
+        ],
+        "platforms": [
+          "Windows"
+        ]
+      },
+      {
+        "id": 6,
+        "title": "Philadelphia",
+        "genres": [
+          "Drama"
+        ],
+        "color": [
+          "blue"
+        ],
+        "platforms": [
+          "MacOS",
+          "Linux",
+          "Windows"
+        ]
+      }
+    ]);
+    let (response, code) =
+        index.update_settings_filterable_attributes(json!(["genres", "color"])).await;
+    assert_eq!(202, code, "{:?}", response);
+    index.wait_task(response.uid()).await;
+
+    let (response, _code) = index.add_documents(documents, None).await;
+    index.wait_task(response.uid()).await;
+
+    let (response, code) =
+        index.facet_search(json!({"facetQuery": "", "facetName": "genres", "q": "" })).await;
+
+    assert_eq!(code, 200, "{}", response);
+    snapshot!(response["facetHits"], @r###"[{"value":"Action","count":2},{"value":"Adventure","count":3},{"value":"Drama","count":3},{"value":"Fantasy","count":1},{"value":"Romance","count":1},{"value":"Science Fiction","count":1}]"###);
+}
+
 #[actix_rt::test]
 async fn advanced_facet_search() {
    let server = Server::new().await;
--- a/crates/meilisearch/tests/similar/errors.rs
+++ b/crates/meilisearch/tests/similar/errors.rs
@ -79,7 +79,7 @@ async fn similar_bad_id() {
    snapshot!(code, @"400 Bad Request");
    snapshot!(json_string!(response), @r###"
    {
-      "message": "Invalid value at `.id`: the value of `id` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 512 bytes.",
+      "message": "Invalid value at `.id`: the value of `id` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 511 bytes.",
      "code": "invalid_similar_id",
      "type": "invalid_request",
      "link": "https://docs.meilisearch.com/errors#invalid_similar_id"
@ -172,7 +172,7 @@ async fn similar_invalid_id() {
    snapshot!(code, @"400 Bad Request");
    snapshot!(json_string!(response), @r###"
    {
-      "message": "Invalid value at `.id`: the value of `id` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 512 bytes.",
+      "message": "Invalid value at `.id`: the value of `id` is invalid. A document identifier can be of type integer or string, only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), and can not be more than 511 bytes.",
      "code": "invalid_similar_id",
      "type": "invalid_request",
      "link": "https://docs.meilisearch.com/errors#invalid_similar_id"
--- a/crates/meilitool/Cargo.toml
+++ b/crates/meilitool/Cargo.toml
@ -10,12 +10,15 @@ license.workspace = true

 [dependencies]
 anyhow = "1.0.86"
+arroy_v04_to_v05 = { package = "arroy", git = "https://github.com/meilisearch/arroy/", tag = "DO-NOT-DELETE-upgrade-v04-to-v05" }
 clap = { version = "4.5.9", features = ["derive"] }
 dump = { path = "../dump" }
 file-store = { path = "../file-store" }
+indexmap = {version = "2.7.0", features = ["serde"]}
 meilisearch-auth = { path = "../meilisearch-auth" }
 meilisearch-types = { path = "../meilisearch-types" }
 serde = { version = "1.0.209", features = ["derive"] }
+serde_json = {version = "1.0.133", features = ["preserve_order"]}
+tempfile = "3.14.0"
 time = { version = "0.3.36", features = ["formatting", "parsing", "alloc"] }
 uuid = { version = "1.10.0", features = ["v4"], default-features = false }
-arroy_v04_to_v05 = { package = "arroy", git = "https://github.com/meilisearch/arroy/", tag = "DO-NOT-DELETE-upgrade-v04-to-v05" }
--- a/crates/meilitool/src/main.rs
+++ b/crates/meilitool/src/main.rs
@ -73,7 +73,7 @@ enum Command {
    ///
    /// Supported upgrade paths:
    ///
-    /// - v1.9.x -> v1.10.x -> v1.11.x
+    /// - v1.9.x -> v1.10.x -> v1.11.x -> v1.12.x
    OfflineUpgrade {
        #[arg(long)]
        target_version: String,
--- a/crates/meilitool/src/upgrade/mod.rs
+++ b/crates/meilitool/src/upgrade/mod.rs
@ -1,13 +1,14 @@
 mod v1_10;
 mod v1_11;
+mod v1_12;
 mod v1_9;

 use std::path::{Path, PathBuf};

 use anyhow::{bail, Context};
 use meilisearch_types::versioning::create_version_file;
-
 use v1_10::v1_9_to_v1_10;
+use v1_12::v1_11_to_v1_12;

 use crate::upgrade::v1_11::v1_10_to_v1_11;

@ -22,6 +23,7 @@ impl OfflineUpgrade {
        let upgrade_list = [
            (v1_9_to_v1_10 as fn(&Path) -> Result<(), anyhow::Error>, "1", "10", "0"),
            (v1_10_to_v1_11, "1", "11", "0"),
+            (v1_11_to_v1_12, "1", "12", "0"),
        ];

        let (current_major, current_minor, current_patch) = &self.current_version;
@ -33,6 +35,7 @@ impl OfflineUpgrade {
        ) {
            ("1", "9", _) => 0,
            ("1", "10", _) => 1,
+            ("1", "11", _) => 2,
            _ => {
                bail!("Unsupported current version {current_major}.{current_minor}.{current_patch}. Can only upgrade from v1.9 and v1.10")
            }
@ -43,6 +46,7 @@ impl OfflineUpgrade {
        let ends_at = match (target_major.as_str(), target_minor.as_str(), target_patch.as_str()) {
            ("1", "10", _) => 0,
            ("1", "11", _) => 1,
+            ("1", "12", _) => 2,
            (major, _, _) if major.starts_with('v') => {
                bail!("Target version must not starts with a `v`. Instead of writing `v1.9.0` write `1.9.0` for example.")
            }
--- a/crates/meilitool/src/upgrade/v1_10.rs
+++ b/crates/meilitool/src/upgrade/v1_10.rs
@ -1,18 +1,13 @@
-use anyhow::bail;
 use std::path::Path;

-use anyhow::Context;
-use meilisearch_types::{
-    heed::{
-        types::{SerdeJson, Str},
-        Database, Env, EnvOpenOptions, RoTxn, RwTxn, Unspecified,
-    },
-    milli::index::{db_name, main_key},
-};
-
-use crate::{try_opening_database, try_opening_poly_database, uuid_codec::UuidCodec};
+use anyhow::{bail, Context};
+use meilisearch_types::heed::types::{SerdeJson, Str};
+use meilisearch_types::heed::{Database, Env, EnvOpenOptions, RoTxn, RwTxn, Unspecified};
+use meilisearch_types::milli::index::{db_name, main_key};

 use super::v1_9;
+use crate::uuid_codec::UuidCodec;
+use crate::{try_opening_database, try_opening_poly_database};

 pub type FieldDistribution = std::collections::BTreeMap<String, u64>;

--- a/crates/meilitool/src/upgrade/v1_11.rs
+++ b/crates/meilitool/src/upgrade/v1_11.rs
@ -7,12 +7,12 @@
 use std::path::Path;

 use anyhow::Context;
-use meilisearch_types::{
-    heed::{types::Str, Database, EnvOpenOptions},
-    milli::index::db_name,
-};
+use meilisearch_types::heed::types::Str;
+use meilisearch_types::heed::{Database, EnvOpenOptions};
+use meilisearch_types::milli::index::db_name;

-use crate::{try_opening_database, try_opening_poly_database, uuid_codec::UuidCodec};
+use crate::uuid_codec::UuidCodec;
+use crate::{try_opening_database, try_opening_poly_database};

 pub fn v1_10_to_v1_11(db_path: &Path) -> anyhow::Result<()> {
    println!("Upgrading from v1.10.0 to v1.11.0");
--- a/crates/meilitool/src/upgrade/v1_12.rs
+++ b/crates/meilitool/src/upgrade/v1_12.rs
@ -0,0 +1,79 @@
+//! The breaking changes that happened between the v1.11 and the v1.12 are:
+//! - The new indexer changed the update files format from OBKV to ndjson. https://github.com/meilisearch/meilisearch/pull/4900
+
+use std::io::BufWriter;
+use std::path::Path;
+
+use anyhow::Context;
+use file_store::FileStore;
+use indexmap::IndexMap;
+use meilisearch_types::milli::documents::DocumentsBatchReader;
+use serde_json::value::RawValue;
+use tempfile::NamedTempFile;
+
+pub fn v1_11_to_v1_12(db_path: &Path) -> anyhow::Result<()> {
+    println!("Upgrading from v1.11.0 to v1.12.0");
+
+    convert_update_files(db_path)?;
+
+    Ok(())
+}
+
+/// Convert the update files from OBKV to ndjson format.
+///
+/// 1) List all the update files using the file store.
+/// 2) For each update file, read the update file into a DocumentsBatchReader.
+/// 3) For each document in the update file, convert the document to a JSON object.
+/// 4) Write the JSON object to a tmp file in the update files directory.
+/// 5) Persist the tmp file replacing the old update file.
+fn convert_update_files(db_path: &Path) -> anyhow::Result<()> {
+    let update_files_dir_path = db_path.join("update_files");
+    let file_store = FileStore::new(&update_files_dir_path).with_context(|| {
+        format!("while creating file store for update files dir {update_files_dir_path:?}")
+    })?;
+
+    for uuid in file_store.all_uuids().context("while retrieving uuids from file store")? {
+        let uuid = uuid.context("while retrieving uuid from file store")?;
+        let update_file_path = file_store.get_update_path(uuid);
+        let update_file = file_store
+            .get_update(uuid)
+            .with_context(|| format!("while getting update file for uuid {uuid:?}"))?;
+
+        let mut file =
+            NamedTempFile::new_in(&update_files_dir_path).map(BufWriter::new).with_context(
+                || format!("while creating bufwriter for update file {update_file_path:?}"),
+            )?;
+
+        let reader = DocumentsBatchReader::from_reader(update_file).with_context(|| {
+            format!("while creating documents batch reader for update file {update_file_path:?}")
+        })?;
+        let (mut cursor, index) = reader.into_cursor_and_fields_index();
+
+        while let Some(document) = cursor.next_document().with_context(|| {
+            format!(
+                "while reading documents from batch reader for update file {update_file_path:?}"
+            )
+        })? {
+            let mut json_document = IndexMap::new();
+            for (fid, value) in document {
+                let field_name = index
+                    .name(fid)
+                    .with_context(|| format!("while getting field name for fid {fid} for update file {update_file_path:?}"))?;
+                let value: &RawValue = serde_json::from_slice(value)?;
+                json_document.insert(field_name, value);
+            }
+
+            serde_json::to_writer(&mut file, &json_document)?;
+        }
+
+        let file = file.into_inner().map_err(|e| e.into_error()).context(format!(
+            "while flushing update file bufwriter for update file {update_file_path:?}"
+        ))?;
+        let _ = file
+            // atomically replace the obkv file with the rewritten NDJSON file
+            .persist(&update_file_path)
+            .with_context(|| format!("while persisting update file {update_file_path:?}"))?;
+    }
+
+    Ok(())
+}
--- a/crates/milli/Cargo.toml
+++ b/crates/milli/Cargo.toml
@ -91,8 +91,8 @@ ureq = { version = "2.10.0", features = ["json"] }
 url = "2.5.2"
 rayon-par-bridge = "0.1.0"
 hashbrown = "0.15.0"
-raw-collections = { git = "https://github.com/meilisearch/raw-collections.git", version = "0.1.0" }
 bumpalo = "3.16.0"
+bumparaw-collections = "0.1.2"
 thread_local = "1.1.8"
 allocator-api2 = "0.2.18"
 rustc-hash = "2.0.0"
--- a/crates/milli/src/documents/primary_key.rs
+++ b/crates/milli/src/documents/primary_key.rs
@ -280,7 +280,7 @@ fn starts_with(selector: &str, key: &str) -> bool {

 pub fn validate_document_id_str(document_id: &str) -> Option<&str> {
    if document_id.is_empty()
-        || document_id.len() > 512
+        || document_id.len() >= 512
        || !document_id.chars().all(|c| c.is_ascii_alphanumeric() || c == '-' || c == '_')
    {
        None
--- a/crates/milli/src/error.rs
+++ b/crates/milli/src/error.rs
@ -114,7 +114,7 @@ pub enum UserError {
        "Document identifier `{}` is invalid. \
 A document identifier can be of type integer or string, \
 only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), \
-and can not be more than 512 bytes.", .document_id.to_string()
+and can not be more than 511 bytes.", .document_id.to_string()
    )]
    InvalidDocumentId { document_id: Value },
    #[error("Invalid facet distribution, {}", format_invalid_filter_distribution(.invalid_facets_name, .valid_facets_name))]
--- a/crates/milli/src/index.rs
+++ b/crates/milli/src/index.rs
@ -1734,6 +1734,7 @@ pub(crate) mod tests {

    use crate::error::{Error, InternalError};
    use crate::index::{DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS};
+    use crate::progress::Progress;
    use crate::update::new::indexer;
    use crate::update::settings::InnerIndexSettings;
    use crate::update::{
@ -1810,7 +1811,7 @@ pub(crate) mod tests {
                None,
                &mut new_fields_ids_map,
                &|| false,
-                &|_progress| (),
+                Progress::default(),
            )?;

            if let Some(error) = operation_stats.into_iter().find_map(|stat| stat.error) {
@ -1829,7 +1830,7 @@ pub(crate) mod tests {
                    &document_changes,
                    embedders,
                    &|| false,
-                    &|_| (),
+                    &Progress::default(),
                )
            })
            .unwrap()?;
@ -1901,7 +1902,7 @@ pub(crate) mod tests {
                None,
                &mut new_fields_ids_map,
                &|| false,
-                &|_progress| (),
+                Progress::default(),
            )?;

            if let Some(error) = operation_stats.into_iter().find_map(|stat| stat.error) {
@ -1920,7 +1921,7 @@ pub(crate) mod tests {
                    &document_changes,
                    embedders,
                    &|| false,
-                    &|_| (),
+                    &Progress::default(),
                )
            })
            .unwrap()?;
@ -1982,7 +1983,7 @@ pub(crate) mod tests {
                None,
                &mut new_fields_ids_map,
                &|| false,
-                &|_progress| (),
+                Progress::default(),
            )
            .unwrap();

@ -2001,7 +2002,7 @@ pub(crate) mod tests {
                    &document_changes,
                    embedders,
                    &|| should_abort.load(Relaxed),
-                    &|_| (),
+                    &Progress::default(),
                )
            })
            .unwrap()
--- a/crates/milli/src/lib.rs
+++ b/crates/milli/src/lib.rs
@ -31,6 +31,7 @@ pub mod vector;
 #[macro_use]
 pub mod snapshot_tests;
 mod fieldids_weights_map;
+pub mod progress;

 use std::collections::{BTreeMap, HashMap};
 use std::convert::{TryFrom, TryInto};
--- a/crates/milli/src/progress.rs
+++ b/crates/milli/src/progress.rs
@ -0,0 +1,152 @@
+use std::any::TypeId;
+use std::borrow::Cow;
+use std::sync::atomic::{AtomicU32, Ordering};
+use std::sync::{Arc, RwLock};
+
+use serde::Serialize;
+
+pub trait Step: 'static + Send + Sync {
+    fn name(&self) -> Cow<'static, str>;
+    fn current(&self) -> u32;
+    fn total(&self) -> u32;
+}
+
+#[derive(Clone, Default)]
+pub struct Progress {
+    steps: Arc<RwLock<Vec<(TypeId, Box<dyn Step>)>>>,
+}
+
+impl Progress {
+    pub fn update_progress<P: Step>(&self, sub_progress: P) {
+        let mut steps = self.steps.write().unwrap();
+        let step_type = TypeId::of::<P>();
+        if let Some(idx) = steps.iter().position(|(id, _)| *id == step_type) {
+            steps.truncate(idx);
+        }
+        steps.push((step_type, Box::new(sub_progress)));
+    }
+
+    // TODO: This code should be in meilisearch_types but cannot because milli can't depend on meilisearch_types
+    pub fn as_progress_view(&self) -> ProgressView {
+        let steps = self.steps.read().unwrap();
+
+        let mut percentage = 0.0;
+        let mut prev_factors = 1.0;
+
+        let mut step_view = Vec::with_capacity(steps.len());
+        for (_, step) in steps.iter() {
+            prev_factors *= step.total() as f32;
+            percentage += step.current() as f32 / prev_factors;
+
+            step_view.push(ProgressStepView {
+                current_step: step.name(),
+                finished: step.current(),
+                total: step.total(),
+            });
+        }
+
+        ProgressView { steps: step_view, percentage: percentage * 100.0 }
+    }
+}
+
+/// This trait lets you use the AtomicSubStep defined right below.
+/// The name must be a const that never changed but that can't be enforced by the type system because it make the trait non object-safe.
+/// By forcing the Default trait + the &'static str we make it harder to miss-use the trait.
+pub trait NamedStep: 'static + Send + Sync + Default {
+    fn name(&self) -> &'static str;
+}
+
+/// Structure to quickly define steps that need very quick, lockless updating of their current step.
+/// You can use this struct if:
+/// - The name of the step doesn't change
+/// - The total number of steps doesn't change
+pub struct AtomicSubStep<Name: NamedStep> {
+    unit_name: Name,
+    current: Arc<AtomicU32>,
+    total: u32,
+}
+
+impl<Name: NamedStep> AtomicSubStep<Name> {
+    pub fn new(total: u32) -> (Arc<AtomicU32>, Self) {
+        let current = Arc::new(AtomicU32::new(0));
+        (current.clone(), Self { current, total, unit_name: Name::default() })
+    }
+}
+
+impl<Name: NamedStep> Step for AtomicSubStep<Name> {
+    fn name(&self) -> Cow<'static, str> {
+        self.unit_name.name().into()
+    }
+
+    fn current(&self) -> u32 {
+        self.current.load(Ordering::Relaxed)
+    }
+
+    fn total(&self) -> u32 {
+        self.total
+    }
+}
+
+#[macro_export]
+macro_rules! make_enum_progress {
+    ($visibility:vis enum $name:ident { $($variant:ident,)+ }) => {
+        #[repr(u8)]
+        #[derive(Debug, Clone, Copy, PartialEq, Eq, Sequence)]
+        #[allow(clippy::enum_variant_names)]
+        $visibility enum $name {
+            $($variant),+
+        }
+
+        impl Step for $name {
+            fn name(&self) -> Cow<'static, str> {
+                use convert_case::Casing;
+
+                match self {
+                    $(
+                        $name::$variant => stringify!($variant).from_case(convert_case::Case::Camel).to_case(convert_case::Case::Lower).into()
+                    ),+
+                }
+            }
+
+            fn current(&self) -> u32 {
+                *self as u32
+            }
+
+            fn total(&self) -> u32 {
+                Self::CARDINALITY as u32
+            }
+        }
+    };
+}
+
+#[macro_export]
+macro_rules! make_atomic_progress {
+    ($struct_name:ident alias $atomic_struct_name:ident => $step_name:literal) => {
+        #[derive(Default, Debug, Clone, Copy)]
+        pub struct $struct_name {}
+        impl NamedStep for $struct_name {
+            fn name(&self) -> &'static str {
+                $step_name
+            }
+        }
+        pub type $atomic_struct_name = AtomicSubStep<$struct_name>;
+    };
+}
+
+make_atomic_progress!(Document alias AtomicDocumentStep => "document" );
+make_atomic_progress!(Payload alias AtomicPayloadStep => "payload" );
+
+#[derive(Debug, Serialize, Clone)]
+#[serde(rename_all = "camelCase")]
+pub struct ProgressView {
+    pub steps: Vec<ProgressStepView>,
+    pub percentage: f32,
+}
+
+#[derive(Debug, Serialize, Clone)]
+#[serde(rename_all = "camelCase")]
+pub struct ProgressStepView {
+    pub current_step: Cow<'static, str>,
+    pub finished: u32,
+    pub total: u32,
+}
--- a/crates/milli/src/prompt/document.rs
+++ b/crates/milli/src/prompt/document.rs
@ -3,12 +3,13 @@ use std::collections::BTreeMap;
 use std::fmt::{self, Debug};

 use bumpalo::Bump;
+use bumparaw_collections::{RawMap, RawVec, Value};
 use liquid::model::{
    ArrayView, DisplayCow, KString, KStringCow, ObjectRender, ObjectSource, ScalarCow, State,
    Value as LiquidValue,
 };
 use liquid::{ObjectView, ValueView};
-use raw_collections::{RawMap, RawVec};
+use rustc_hash::FxBuildHasher;
 use serde_json::value::RawValue;

 use crate::update::del_add::{DelAdd, KvReaderDelAdd};
@ -195,7 +196,7 @@ impl<'doc, D: DocumentTrait<'doc> + Debug> ObjectView for ParseableDocument<'doc
 }

 impl<'doc, D: DocumentTrait<'doc> + Debug> ValueView for ParseableDocument<'doc, D> {
-    fn as_debug(&self) -> &dyn fmt::Debug {
+    fn as_debug(&self) -> &dyn Debug {
        self
    }
    fn render(&self) -> liquid::model::DisplayCow<'_> {
@ -243,14 +244,13 @@ impl<'doc, D: DocumentTrait<'doc> + Debug> ValueView for ParseableDocument<'doc,
    }
 }

-#[derive(Debug)]
 struct ParseableValue<'doc> {
-    value: raw_collections::Value<'doc>,
+    value: Value<'doc, FxBuildHasher>,
 }

 impl<'doc> ParseableValue<'doc> {
    pub fn new(value: &'doc RawValue, doc_alloc: &'doc Bump) -> Self {
-        let value = raw_collections::Value::from_raw_value(value, doc_alloc).unwrap();
+        let value = Value::from_raw_value_and_hasher(value, FxBuildHasher, doc_alloc).unwrap();
        Self { value }
    }

@ -260,19 +260,19 @@ impl<'doc> ParseableValue<'doc> {
 }

 // transparent newtype for implementing ValueView
-#[repr(transparent)]
 #[derive(Debug)]
-struct ParseableMap<'doc>(RawMap<'doc>);
+#[repr(transparent)]
+struct ParseableMap<'doc>(RawMap<'doc, FxBuildHasher>);

 // transparent newtype for implementing ValueView
-#[repr(transparent)]
 #[derive(Debug)]
+#[repr(transparent)]
 struct ParseableArray<'doc>(RawVec<'doc>);

 impl<'doc> ParseableMap<'doc> {
-    pub fn as_parseable<'a>(map: &'a RawMap<'doc>) -> &'a ParseableMap<'doc> {
+    pub fn as_parseable<'a>(map: &'a RawMap<'doc, FxBuildHasher>) -> &'a ParseableMap<'doc> {
        // SAFETY: repr(transparent)
-        unsafe { &*(map as *const RawMap as *const Self) }
+        unsafe { &*(map as *const RawMap<FxBuildHasher> as *const Self) }
    }
 }

@ -447,8 +447,9 @@ impl<'doc> ValueView for ParseableValue<'doc> {
    }

    fn render(&self) -> DisplayCow<'_> {
-        use raw_collections::value::Number;
-        use raw_collections::Value;
+        use bumparaw_collections::value::Number;
+        use bumparaw_collections::Value;
+
        match &self.value {
            Value::Null => LiquidValue::Nil.render(),
            Value::Bool(v) => v.render(),
@ -464,8 +465,9 @@ impl<'doc> ValueView for ParseableValue<'doc> {
    }

    fn source(&self) -> DisplayCow<'_> {
-        use raw_collections::value::Number;
-        use raw_collections::Value;
+        use bumparaw_collections::value::Number;
+        use bumparaw_collections::Value;
+
        match &self.value {
            Value::Null => LiquidValue::Nil.source(),
            Value::Bool(v) => ValueView::source(v),
@ -481,8 +483,9 @@ impl<'doc> ValueView for ParseableValue<'doc> {
    }

    fn type_name(&self) -> &'static str {
-        use raw_collections::value::Number;
-        use raw_collections::Value;
+        use bumparaw_collections::value::Number;
+        use bumparaw_collections::Value;
+
        match &self.value {
            Value::Null => LiquidValue::Nil.type_name(),
            Value::Bool(v) => v.type_name(),
@ -498,7 +501,8 @@ impl<'doc> ValueView for ParseableValue<'doc> {
    }

    fn query_state(&self, state: State) -> bool {
-        use raw_collections::Value;
+        use bumparaw_collections::Value;
+
        match &self.value {
            Value::Null => ValueView::query_state(&LiquidValue::Nil, state),
            Value::Bool(v) => ValueView::query_state(v, state),
@ -515,7 +519,8 @@ impl<'doc> ValueView for ParseableValue<'doc> {
    }

    fn to_kstr(&self) -> KStringCow<'_> {
-        use raw_collections::Value;
+        use bumparaw_collections::Value;
+
        match &self.value {
            Value::Null => ValueView::to_kstr(&LiquidValue::Nil),
            Value::Bool(v) => ValueView::to_kstr(v),
@ -527,12 +532,14 @@ impl<'doc> ValueView for ParseableValue<'doc> {
    }

    fn to_value(&self) -> LiquidValue {
-        use raw_collections::Value;
+        use bumparaw_collections::value::Number;
+        use bumparaw_collections::Value;
+
        match &self.value {
            Value::Null => LiquidValue::Nil,
            Value::Bool(v) => LiquidValue::Scalar(liquid::model::ScalarCow::new(*v)),
            Value::Number(number) => match number {
-                raw_collections::value::Number::PosInt(number) => {
+                Number::PosInt(number) => {
                    let number: i64 = match (*number).try_into() {
                        Ok(number) => number,
                        Err(_) => {
@ -541,12 +548,8 @@ impl<'doc> ValueView for ParseableValue<'doc> {
                    };
                    LiquidValue::Scalar(ScalarCow::new(number))
                }
-                raw_collections::value::Number::NegInt(number) => {
-                    LiquidValue::Scalar(ScalarCow::new(*number))
-                }
-                raw_collections::value::Number::Finite(number) => {
-                    LiquidValue::Scalar(ScalarCow::new(*number))
-                }
+                Number::NegInt(number) => LiquidValue::Scalar(ScalarCow::new(*number)),
+                Number::Finite(number) => LiquidValue::Scalar(ScalarCow::new(*number)),
            },
            Value::String(s) => LiquidValue::Scalar(liquid::model::ScalarCow::new(s.to_string())),
            Value::Array(raw_vec) => ParseableArray::as_parseable(raw_vec).to_value(),
@ -555,8 +558,9 @@ impl<'doc> ValueView for ParseableValue<'doc> {
    }

    fn as_scalar(&self) -> Option<liquid::model::ScalarCow<'_>> {
-        use raw_collections::value::Number;
-        use raw_collections::Value;
+        use bumparaw_collections::value::Number;
+        use bumparaw_collections::Value;
+
        match &self.value {
            Value::Bool(v) => Some(liquid::model::ScalarCow::new(*v)),
            Value::Number(number) => match number {
@ -576,34 +580,41 @@ impl<'doc> ValueView for ParseableValue<'doc> {
    }

    fn is_scalar(&self) -> bool {
-        use raw_collections::Value;
+        use bumparaw_collections::Value;
+
        matches!(&self.value, Value::Bool(_) | Value::Number(_) | Value::String(_))
    }

    fn as_array(&self) -> Option<&dyn liquid::model::ArrayView> {
-        if let raw_collections::Value::Array(array) = &self.value {
+        if let Value::Array(array) = &self.value {
            return Some(ParseableArray::as_parseable(array) as _);
        }
        None
    }

    fn is_array(&self) -> bool {
-        matches!(&self.value, raw_collections::Value::Array(_))
+        matches!(&self.value, bumparaw_collections::Value::Array(_))
    }

    fn as_object(&self) -> Option<&dyn ObjectView> {
-        if let raw_collections::Value::Object(object) = &self.value {
+        if let Value::Object(object) = &self.value {
            return Some(ParseableMap::as_parseable(object) as _);
        }
        None
    }

    fn is_object(&self) -> bool {
-        matches!(&self.value, raw_collections::Value::Object(_))
+        matches!(&self.value, bumparaw_collections::Value::Object(_))
    }

    fn is_nil(&self) -> bool {
-        matches!(&self.value, raw_collections::Value::Null)
+        matches!(&self.value, bumparaw_collections::Value::Null)
+    }
+}
+
+impl Debug for ParseableValue<'_> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_struct("ParseableValue").field("value", &self.value).finish()
    }
 }

--- a/crates/milli/src/prompt/error.rs
+++ b/crates/milli/src/prompt/error.rs
@ -38,6 +38,16 @@ pub struct RenderPromptError {
    pub fault: FaultSource,
 }
 impl RenderPromptError {
+    pub(crate) fn missing_context_with_external_docid(
+        external_docid: String,
+        inner: liquid::Error,
+    ) -> RenderPromptError {
+        Self {
+            kind: RenderPromptErrorKind::MissingContextWithExternalDocid(external_docid, inner),
+            fault: FaultSource::User,
+        }
+    }
+
    pub(crate) fn missing_context(inner: liquid::Error) -> RenderPromptError {
        Self { kind: RenderPromptErrorKind::MissingContext(inner), fault: FaultSource::User }
    }
@ -47,6 +57,8 @@ impl RenderPromptError {
 pub enum RenderPromptErrorKind {
    #[error("missing field in document: {0}")]
    MissingContext(liquid::Error),
+    #[error("missing field in document `{0}`: {1}")]
+    MissingContextWithExternalDocid(String, liquid::Error),
 }

 impl From<RenderPromptError> for crate::Error {
--- a/crates/milli/src/prompt/mod.rs
+++ b/crates/milli/src/prompt/mod.rs
@ -119,6 +119,7 @@ impl Prompt {
        'doc: 'a, // lifetime of the allocator, will live for an entire chunk of documents
    >(
        &self,
+        external_docid: &str,
        document: impl crate::update::new::document::Document<'a> + Debug,
        field_id_map: &RefCell<GlobalFieldsIdsMap>,
        doc_alloc: &'doc Bump,
@ -130,9 +131,12 @@ impl Prompt {
            self.max_bytes.unwrap_or_else(default_max_bytes).get(),
            doc_alloc,
        );
-        self.template
-            .render_to(&mut rendered, &context)
-            .map_err(RenderPromptError::missing_context)?;
+        self.template.render_to(&mut rendered, &context).map_err(|liquid_error| {
+            RenderPromptError::missing_context_with_external_docid(
+                external_docid.to_owned(),
+                liquid_error,
+            )
+        })?;
        Ok(std::str::from_utf8(rendered.into_bump_slice())
            .expect("render can only write UTF-8 because all inputs and processing preserve utf-8"))
    }
--- a/crates/milli/src/search/new/tests/integration.rs
+++ b/crates/milli/src/search/new/tests/integration.rs
@ -5,6 +5,7 @@ use bumpalo::Bump;
 use heed::EnvOpenOptions;
 use maplit::{btreemap, hashset};

+use crate::progress::Progress;
 use crate::update::new::indexer;
 use crate::update::{IndexDocumentsMethod, IndexerConfig, Settings};
 use crate::vector::EmbeddingConfigs;
@ -72,7 +73,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
            None,
            &mut new_fields_ids_map,
            &|| false,
-            &|_progress| (),
+            Progress::default(),
        )
        .unwrap();

@ -91,7 +92,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
        &document_changes,
        embedders,
        &|| false,
-        &|_| (),
+        &Progress::default(),
    )
    .unwrap();

--- a/crates/milli/src/update/index_documents/mod.rs
+++ b/crates/milli/src/update/index_documents/mod.rs
@ -766,6 +766,7 @@ mod tests {
    use crate::documents::mmap_from_objects;
    use crate::index::tests::TempIndex;
    use crate::index::IndexEmbeddingConfig;
+    use crate::progress::Progress;
    use crate::search::TermsMatchingStrategy;
    use crate::update::new::indexer;
    use crate::update::Setting;
@ -1964,7 +1965,7 @@ mod tests {
                None,
                &mut new_fields_ids_map,
                &|| false,
-                &|_progress| (),
+                Progress::default(),
            )
            .unwrap();

@ -2148,7 +2149,7 @@ mod tests {
                None,
                &mut new_fields_ids_map,
                &|| false,
-                &|_progress| (),
+                Progress::default(),
            )
            .unwrap();

@ -2163,7 +2164,7 @@ mod tests {
            &document_changes,
            embedders,
            &|| false,
-            &|_| (),
+            &Progress::default(),
        )
        .unwrap();
        wtxn.commit().unwrap();
@ -2210,7 +2211,7 @@ mod tests {
                None,
                &mut new_fields_ids_map,
                &|| false,
-                &|_progress| (),
+                Progress::default(),
            )
            .unwrap();

@ -2225,7 +2226,7 @@ mod tests {
            &document_changes,
            embedders,
            &|| false,
-            &|_| (),
+            &Progress::default(),
        )
        .unwrap();
        wtxn.commit().unwrap();
@ -2263,7 +2264,7 @@ mod tests {
                None,
                &mut new_fields_ids_map,
                &|| false,
-                &|_progress| (),
+                Progress::default(),
            )
            .unwrap();

@ -2278,7 +2279,7 @@ mod tests {
            &document_changes,
            embedders,
            &|| false,
-            &|_| (),
+            &Progress::default(),
        )
        .unwrap();
        wtxn.commit().unwrap();
@ -2315,7 +2316,7 @@ mod tests {
                None,
                &mut new_fields_ids_map,
                &|| false,
-                &|_progress| (),
+                Progress::default(),
            )
            .unwrap();

@ -2330,7 +2331,7 @@ mod tests {
            &document_changes,
            embedders,
            &|| false,
-            &|_| (),
+            &Progress::default(),
        )
        .unwrap();
        wtxn.commit().unwrap();
@ -2369,7 +2370,7 @@ mod tests {
                None,
                &mut new_fields_ids_map,
                &|| false,
-                &|_progress| (),
+                Progress::default(),
            )
            .unwrap();

@ -2384,7 +2385,7 @@ mod tests {
            &document_changes,
            embedders,
            &|| false,
-            &|_| (),
+            &Progress::default(),
        )
        .unwrap();
        wtxn.commit().unwrap();
@ -2428,7 +2429,7 @@ mod tests {
                None,
                &mut new_fields_ids_map,
                &|| false,
-                &|_progress| (),
+                Progress::default(),
            )
            .unwrap();

@ -2443,7 +2444,7 @@ mod tests {
            &document_changes,
            embedders,
            &|| false,
-            &|_| (),
+            &Progress::default(),
        )
        .unwrap();
        wtxn.commit().unwrap();
@ -2480,7 +2481,7 @@ mod tests {
                None,
                &mut new_fields_ids_map,
                &|| false,
-                &|_progress| (),
+                Progress::default(),
            )
            .unwrap();

@ -2495,7 +2496,7 @@ mod tests {
            &document_changes,
            embedders,
            &|| false,
-            &|_| (),
+            &Progress::default(),
        )
        .unwrap();
        wtxn.commit().unwrap();
@ -2532,7 +2533,7 @@ mod tests {
                None,
                &mut new_fields_ids_map,
                &|| false,
-                &|_progress| (),
+                Progress::default(),
            )
            .unwrap();

@ -2547,7 +2548,7 @@ mod tests {
            &document_changes,
            embedders,
            &|| false,
-            &|_| (),
+            &Progress::default(),
        )
        .unwrap();
        wtxn.commit().unwrap();
@ -2726,7 +2727,7 @@ mod tests {
                None,
                &mut new_fields_ids_map,
                &|| false,
-                &|_progress| (),
+                Progress::default(),
            )
            .unwrap();

@ -2741,7 +2742,7 @@ mod tests {
            &document_changes,
            embedders,
            &|| false,
-            &|_| (),
+            &Progress::default(),
        )
        .unwrap();
        wtxn.commit().unwrap();
@ -2785,7 +2786,7 @@ mod tests {
                None,
                &mut new_fields_ids_map,
                &|| false,
-                &|_progress| (),
+                Progress::default(),
            )
            .unwrap();

@ -2800,7 +2801,7 @@ mod tests {
            &document_changes,
            embedders,
            &|| false,
-            &|_| (),
+            &Progress::default(),
        )
        .unwrap();
        wtxn.commit().unwrap();
@ -2841,7 +2842,7 @@ mod tests {
                None,
                &mut new_fields_ids_map,
                &|| false,
-                &|_progress| (),
+                Progress::default(),
            )
            .unwrap();

@ -2856,7 +2857,7 @@ mod tests {
            &document_changes,
            embedders,
            &|| false,
-            &|_| (),
+            &Progress::default(),
        )
        .unwrap();
        wtxn.commit().unwrap();
--- a/crates/milli/src/update/new/document.rs
+++ b/crates/milli/src/update/new/document.rs
@ -1,7 +1,8 @@
 use std::collections::{BTreeMap, BTreeSet};

+use bumparaw_collections::RawMap;
 use heed::RoTxn;
-use raw_collections::RawMap;
+use rustc_hash::FxBuildHasher;
 use serde_json::value::RawValue;

 use super::vector_document::VectorDocument;
@ -385,12 +386,12 @@ pub type Entry<'doc> = (&'doc str, &'doc RawValue);

 #[derive(Debug)]
 pub struct Versions<'doc> {
-    data: RawMap<'doc>,
+    data: RawMap<'doc, FxBuildHasher>,
 }

 impl<'doc> Versions<'doc> {
    pub fn multiple(
-        mut versions: impl Iterator<Item = Result<RawMap<'doc>>>,
+        mut versions: impl Iterator<Item = Result<RawMap<'doc, FxBuildHasher>>>,
    ) -> Result<Option<Self>> {
        let Some(data) = versions.next() else { return Ok(None) };
        let mut data = data?;
@ -403,7 +404,7 @@ impl<'doc> Versions<'doc> {
        Ok(Some(Self::single(data)))
    }

-    pub fn single(version: RawMap<'doc>) -> Self {
+    pub fn single(version: RawMap<'doc, FxBuildHasher>) -> Self {
        Self { data: version }
    }

--- a/crates/milli/src/update/new/extract/cache.rs
+++ b/crates/milli/src/update/new/extract/cache.rs
@ -69,12 +69,12 @@ use std::io::BufReader;
 use std::{io, iter, mem};

 use bumpalo::Bump;
+use bumparaw_collections::bbbul::{BitPacker, BitPacker4x};
+use bumparaw_collections::map::FrozenMap;
+use bumparaw_collections::{Bbbul, FrozenBbbul};
 use grenad::ReaderCursor;
 use hashbrown::hash_map::RawEntryMut;
 use hashbrown::HashMap;
-use raw_collections::bbbul::{BitPacker, BitPacker4x};
-use raw_collections::map::FrozenMap;
-use raw_collections::{Bbbul, FrozenBbbul};
 use roaring::RoaringBitmap;
 use rustc_hash::FxBuildHasher;

@ -177,12 +177,12 @@ impl<'extractor> BalancedCaches<'extractor> {
        Ok(())
    }

-    pub fn freeze(&mut self) -> Result<Vec<FrozenCache<'_, 'extractor>>> {
+    pub fn freeze(&mut self, source_id: usize) -> Result<Vec<FrozenCache<'_, 'extractor>>> {
        match &mut self.caches {
            InnerCaches::Normal(NormalCaches { caches }) => caches
                .iter_mut()
                .enumerate()
-                .map(|(bucket, map)| {
+                .map(|(bucket_id, map)| {
                    // safety: we are transmuting the Bbbul into a FrozenBbbul
                    //         that are the same size.
                    let map = unsafe {
@ -201,14 +201,19 @@ impl<'extractor> BalancedCaches<'extractor> {
                            >,
                        >(map)
                    };
-                    Ok(FrozenCache { bucket, cache: FrozenMap::new(map), spilled: Vec::new() })
+                    Ok(FrozenCache {
+                        source_id,
+                        bucket_id,
+                        cache: FrozenMap::new(map),
+                        spilled: Vec::new(),
+                    })
                })
                .collect(),
            InnerCaches::Spilling(SpillingCaches { caches, spilled_entries, .. }) => caches
                .iter_mut()
                .zip(mem::take(spilled_entries))
                .enumerate()
-                .map(|(bucket, (map, sorter))| {
+                .map(|(bucket_id, (map, sorter))| {
                    let spilled = sorter
                        .into_reader_cursors()?
                        .into_iter()
@ -234,7 +239,7 @@ impl<'extractor> BalancedCaches<'extractor> {
                            >,
                        >(map)
                    };
-                    Ok(FrozenCache { bucket, cache: FrozenMap::new(map), spilled })
+                    Ok(FrozenCache { source_id, bucket_id, cache: FrozenMap::new(map), spilled })
                })
                .collect(),
        }
@ -440,7 +445,8 @@ fn spill_entry_to_sorter(
 }

 pub struct FrozenCache<'a, 'extractor> {
-    bucket: usize,
+    bucket_id: usize,
+    source_id: usize,
    cache: FrozenMap<
        'a,
        'extractor,
@ -457,9 +463,9 @@ pub fn transpose_and_freeze_caches<'a, 'extractor>(
    let width = caches.first().map(BalancedCaches::buckets).unwrap_or(0);
    let mut bucket_caches: Vec<_> = iter::repeat_with(Vec::new).take(width).collect();

-    for thread_cache in caches {
-        for frozen in thread_cache.freeze()? {
-            bucket_caches[frozen.bucket].push(frozen);
+    for (thread_index, thread_cache) in caches.iter_mut().enumerate() {
+        for frozen in thread_cache.freeze(thread_index)? {
+            bucket_caches[frozen.bucket_id].push(frozen);
        }
    }

@ -477,21 +483,16 @@ where
    F: for<'a> FnMut(&'a [u8], DelAddRoaringBitmap) -> Result<()>,
 {
    let mut maps = Vec::new();
-    let mut readers = Vec::new();
-    let mut current_bucket = None;
-    for FrozenCache { bucket, cache, ref mut spilled } in frozen {
-        assert_eq!(*current_bucket.get_or_insert(bucket), bucket);
-        maps.push(cache);
-        readers.append(spilled);
-    }
-
-    // First manage the spilled entries by looking into the HashMaps,
-    // merge them and mark them as dummy.
    let mut heap = BinaryHeap::new();
-    for (source_index, source) in readers.into_iter().enumerate() {
-        let mut cursor = source.into_cursor()?;
-        if cursor.move_on_next()?.is_some() {
-            heap.push(Entry { cursor, source_index });
+    let mut current_bucket = None;
+    for FrozenCache { source_id, bucket_id, cache, spilled } in frozen {
+        assert_eq!(*current_bucket.get_or_insert(bucket_id), bucket_id);
+        maps.push((source_id, cache));
+        for reader in spilled {
+            let mut cursor = reader.into_cursor()?;
+            if cursor.move_on_next()?.is_some() {
+                heap.push(Entry { cursor, source_id });
+            }
        }
    }

@ -508,25 +509,29 @@ where

        let mut output = DelAddRoaringBitmap::from_bytes(first_value)?;
        while let Some(mut entry) = heap.peek_mut() {
-            if let Some((key, _value)) = entry.cursor.current() {
-                if first_key == key {
-                    let new = DelAddRoaringBitmap::from_bytes(first_value)?;
-                    output = output.merge(new);
-                    // When we are done we the current value of this entry move make
-                    // it move forward and let the heap reorganize itself (on drop)
-                    if entry.cursor.move_on_next()?.is_none() {
-                        PeekMut::pop(entry);
-                    }
-                } else {
+            if let Some((key, value)) = entry.cursor.current() {
+                if first_key != key {
                    break;
                }
+
+                let new = DelAddRoaringBitmap::from_bytes(value)?;
+                output = output.merge(new);
+                // When we are done we the current value of this entry move make
+                // it move forward and let the heap reorganize itself (on drop)
+                if entry.cursor.move_on_next()?.is_none() {
+                    PeekMut::pop(entry);
+                }
            }
        }

        // Once we merged all of the spilled bitmaps we must also
        // fetch the entries from the non-spilled entries (the HashMaps).
-        for (map_index, map) in maps.iter_mut().enumerate() {
-            if first_entry.source_index != map_index {
+        for (source_id, map) in maps.iter_mut() {
+            debug_assert!(
+                !(map.get(first_key).is_some() && first_entry.source_id == *source_id),
+                "A thread should not have spiled a key that has been inserted in the cache"
+            );
+            if first_entry.source_id != *source_id {
                if let Some(new) = map.get_mut(first_key) {
                    output.union_and_clear_bbbul(new);
                }
@ -538,12 +543,12 @@ where

        // Don't forget to put the first entry back into the heap.
        if first_entry.cursor.move_on_next()?.is_some() {
-            heap.push(first_entry)
+            heap.push(first_entry);
        }
    }

    // Then manage the content on the HashMap entries that weren't taken (mem::take).
-    while let Some(mut map) = maps.pop() {
+    while let Some((_, mut map)) = maps.pop() {
        // Make sure we don't try to work with entries already managed by the spilled
        let mut ordered_entries: Vec<_> =
            map.iter_mut().filter(|(_, bbbul)| !bbbul.is_empty()).collect();
@ -553,7 +558,7 @@ where
            let mut output = DelAddRoaringBitmap::empty();
            output.union_and_clear_bbbul(bbbul);

-            for rhs in maps.iter_mut() {
+            for (_, rhs) in maps.iter_mut() {
                if let Some(new) = rhs.get_mut(key) {
                    output.union_and_clear_bbbul(new);
                }
@ -569,14 +574,14 @@ where

 struct Entry<R> {
    cursor: ReaderCursor<R>,
-    source_index: usize,
+    source_id: usize,
 }

 impl<R> Ord for Entry<R> {
    fn cmp(&self, other: &Entry<R>) -> Ordering {
        let skey = self.cursor.current().map(|(k, _)| k);
        let okey = other.cursor.current().map(|(k, _)| k);
-        skey.cmp(&okey).then(self.source_index.cmp(&other.source_index)).reverse()
+        skey.cmp(&okey).then(self.source_id.cmp(&other.source_id)).reverse()
    }
 }

--- a/crates/milli/src/update/new/extract/faceted/extract_facets.rs
+++ b/crates/milli/src/update/new/extract/faceted/extract_facets.rs
@ -16,10 +16,10 @@ use crate::update::del_add::DelAdd;
 use crate::update::new::channel::FieldIdDocidFacetSender;
 use crate::update::new::extract::perm_json_p;
 use crate::update::new::indexer::document_changes::{
-    extract, DocumentChangeContext, DocumentChanges, Extractor, IndexingContext, Progress,
+    extract, DocumentChangeContext, DocumentChanges, Extractor, IndexingContext,
 };
 use crate::update::new::ref_cell_ext::RefCellExt as _;
-use crate::update::new::steps::Step;
+use crate::update::new::steps::IndexingStep;
 use crate::update::new::thread_local::{FullySend, ThreadLocal};
 use crate::update::new::DocumentChange;
 use crate::update::GrenadParameters;
@ -373,26 +373,16 @@ fn truncate_str(s: &str) -> &str {

 impl FacetedDocidsExtractor {
    #[tracing::instrument(level = "trace", skip_all, target = "indexing::extract::faceted")]
-    pub fn run_extraction<
-        'pl,
-        'fid,
-        'indexer,
-        'index,
-        'extractor,
-        DC: DocumentChanges<'pl>,
-        MSP,
-        SP,
-    >(
+    pub fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP>(
        grenad_parameters: GrenadParameters,
        document_changes: &DC,
-        indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP, SP>,
+        indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP>,
        extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>,
        sender: &FieldIdDocidFacetSender,
-        step: Step,
+        step: IndexingStep,
    ) -> Result<Vec<BalancedCaches<'extractor>>>
    where
        MSP: Fn() -> bool + Sync,
-        SP: Fn(Progress) + Sync,
    {
        let index = indexing_context.index;
        let rtxn = index.read_txn()?;
--- a/crates/milli/src/update/new/extract/mod.rs
+++ b/crates/milli/src/update/new/extract/mod.rs
@ -15,23 +15,22 @@ pub use geo::*;
 pub use searchable::*;
 pub use vectors::EmbeddingExtractor;

-use super::indexer::document_changes::{DocumentChanges, IndexingContext, Progress};
-use super::steps::Step;
+use super::indexer::document_changes::{DocumentChanges, IndexingContext};
+use super::steps::IndexingStep;
 use super::thread_local::{FullySend, ThreadLocal};
 use crate::update::GrenadParameters;
 use crate::Result;

 pub trait DocidsExtractor {
-    fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP, SP>(
+    fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP>(
        grenad_parameters: GrenadParameters,
        document_changes: &DC,
-        indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP, SP>,
+        indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP>,
        extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>,
-        step: Step,
+        step: IndexingStep,
    ) -> Result<Vec<BalancedCaches<'extractor>>>
    where
-        MSP: Fn() -> bool + Sync,
-        SP: Fn(Progress) + Sync;
+        MSP: Fn() -> bool + Sync;
 }

 /// TODO move in permissive json pointer
--- a/crates/milli/src/update/new/extract/searchable/extract_word_docids.rs
+++ b/crates/milli/src/update/new/extract/searchable/extract_word_docids.rs
@ -11,10 +11,10 @@ use super::tokenize_document::{tokenizer_builder, DocumentTokenizer};
 use crate::update::new::extract::cache::BalancedCaches;
 use crate::update::new::extract::perm_json_p::contained_in;
 use crate::update::new::indexer::document_changes::{
-    extract, DocumentChangeContext, DocumentChanges, Extractor, IndexingContext, Progress,
+    extract, DocumentChangeContext, DocumentChanges, Extractor, IndexingContext,
 };
 use crate::update::new::ref_cell_ext::RefCellExt as _;
-use crate::update::new::steps::Step;
+use crate::update::new::steps::IndexingStep;
 use crate::update::new::thread_local::{FullySend, MostlySend, ThreadLocal};
 use crate::update::new::DocumentChange;
 use crate::update::GrenadParameters;
@ -239,25 +239,15 @@ impl<'a, 'extractor> Extractor<'extractor> for WordDocidsExtractorData<'a> {
 pub struct WordDocidsExtractors;

 impl WordDocidsExtractors {
-    pub fn run_extraction<
-        'pl,
-        'fid,
-        'indexer,
-        'index,
-        'extractor,
-        DC: DocumentChanges<'pl>,
-        MSP,
-        SP,
-    >(
+    pub fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP>(
        grenad_parameters: GrenadParameters,
        document_changes: &DC,
-        indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP, SP>,
+        indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP>,
        extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>,
-        step: Step,
+        step: IndexingStep,
    ) -> Result<WordDocidsCaches<'extractor>>
    where
        MSP: Fn() -> bool + Sync,
-        SP: Fn(Progress) + Sync,
    {
        let index = indexing_context.index;
        let rtxn = index.read_txn()?;
--- a/crates/milli/src/update/new/extract/searchable/mod.rs
+++ b/crates/milli/src/update/new/extract/searchable/mod.rs
@ -14,9 +14,9 @@ use tokenize_document::{tokenizer_builder, DocumentTokenizer};
 use super::cache::BalancedCaches;
 use super::DocidsExtractor;
 use crate::update::new::indexer::document_changes::{
-    extract, DocumentChangeContext, DocumentChanges, Extractor, IndexingContext, Progress,
+    extract, DocumentChangeContext, DocumentChanges, Extractor, IndexingContext,
 };
-use crate::update::new::steps::Step;
+use crate::update::new::steps::IndexingStep;
 use crate::update::new::thread_local::{FullySend, ThreadLocal};
 use crate::update::new::DocumentChange;
 use crate::update::GrenadParameters;
@ -56,16 +56,15 @@ impl<'a, 'extractor, EX: SearchableExtractor + Sync> Extractor<'extractor>
 }

 pub trait SearchableExtractor: Sized + Sync {
-    fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP, SP>(
+    fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP>(
        grenad_parameters: GrenadParameters,
        document_changes: &DC,
-        indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP, SP>,
+        indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP>,
        extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>,
-        step: Step,
+        step: IndexingStep,
    ) -> Result<Vec<BalancedCaches<'extractor>>>
    where
        MSP: Fn() -> bool + Sync,
-        SP: Fn(Progress) + Sync,
    {
        let rtxn = indexing_context.index.read_txn()?;
        let stop_words = indexing_context.index.stop_words(&rtxn)?;
@ -134,16 +133,15 @@ pub trait SearchableExtractor: Sized + Sync {
 }

 impl<T: SearchableExtractor> DocidsExtractor for T {
-    fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP, SP>(
+    fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP>(
        grenad_parameters: GrenadParameters,
        document_changes: &DC,
-        indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP, SP>,
+        indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP>,
        extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>,
-        step: Step,
+        step: IndexingStep,
    ) -> Result<Vec<BalancedCaches<'extractor>>>
    where
        MSP: Fn() -> bool + Sync,
-        SP: Fn(Progress) + Sync,
    {
        Self::run_extraction(
            grenad_parameters,
--- a/crates/milli/src/update/new/extract/searchable/tokenize_document.rs
+++ b/crates/milli/src/update/new/extract/searchable/tokenize_document.rs
@ -176,9 +176,10 @@ pub fn tokenizer_builder<'a>(
 #[cfg(test)]
 mod test {
    use bumpalo::Bump;
+    use bumparaw_collections::RawMap;
    use charabia::TokenizerBuilder;
    use meili_snap::snapshot;
-    use raw_collections::RawMap;
+    use rustc_hash::FxBuildHasher;
    use serde_json::json;
    use serde_json::value::RawValue;

@ -234,7 +235,7 @@ mod test {

        let bump = Bump::new();
        let document: &RawValue = serde_json::from_str(&document).unwrap();
-        let document = RawMap::from_raw_value(document, &bump).unwrap();
+        let document = RawMap::from_raw_value_and_hasher(document, FxBuildHasher, &bump).unwrap();

        let document = Versions::single(document);
        let document = DocumentFromVersions::new(&document);
--- a/crates/milli/src/update/new/extract/vectors/mod.rs
+++ b/crates/milli/src/update/new/extract/vectors/mod.rs
@ -130,6 +130,7 @@ impl<'a, 'b, 'extractor> Extractor<'extractor> for EmbeddingExtractor<'a, 'b> {
                                );
                            } else if new_vectors.regenerate {
                                let new_rendered = prompt.render_document(
+                                    update.external_document_id(),
                                    update.current(
                                        &context.rtxn,
                                        context.index,
@ -139,6 +140,7 @@ impl<'a, 'b, 'extractor> Extractor<'extractor> for EmbeddingExtractor<'a, 'b> {
                                    &context.doc_alloc,
                                )?;
                                let old_rendered = prompt.render_document(
+                                    update.external_document_id(),
                                    update.merged(
                                        &context.rtxn,
                                        context.index,
@ -158,6 +160,7 @@ impl<'a, 'b, 'extractor> Extractor<'extractor> for EmbeddingExtractor<'a, 'b> {
                            }
                        } else if old_vectors.regenerate {
                            let old_rendered = prompt.render_document(
+                                update.external_document_id(),
                                update.current(
                                    &context.rtxn,
                                    context.index,
@ -167,6 +170,7 @@ impl<'a, 'b, 'extractor> Extractor<'extractor> for EmbeddingExtractor<'a, 'b> {
                                &context.doc_alloc,
                            )?;
                            let new_rendered = prompt.render_document(
+                                update.external_document_id(),
                                update.merged(
                                    &context.rtxn,
                                    context.index,
@ -216,6 +220,7 @@ impl<'a, 'b, 'extractor> Extractor<'extractor> for EmbeddingExtractor<'a, 'b> {
                                );
                            } else if new_vectors.regenerate {
                                let rendered = prompt.render_document(
+                                    insertion.external_document_id(),
                                    insertion.inserted(),
                                    context.new_fields_ids_map,
                                    &context.doc_alloc,
@ -229,6 +234,7 @@ impl<'a, 'b, 'extractor> Extractor<'extractor> for EmbeddingExtractor<'a, 'b> {
                            }
                        } else {
                            let rendered = prompt.render_document(
+                                insertion.external_document_id(),
                                insertion.inserted(),
                                context.new_fields_ids_map,
                                &context.doc_alloc,
--- a/crates/milli/src/update/new/facet_search_builder.rs
+++ b/crates/milli/src/update/new/facet_search_builder.rs
@ -103,6 +103,8 @@ impl<'indexer> FacetSearchBuilder<'indexer> {

    #[tracing::instrument(level = "trace", skip_all, target = "indexing::facet_fst")]
    pub fn merge_and_write(self, index: &Index, wtxn: &mut RwTxn, rtxn: &RoTxn) -> Result<()> {
+        tracing::trace!("merge facet strings for facet search: {:?}", self.registered_facets);
+
        let reader = self.normalized_facet_string_docids_sorter.into_reader_cursors()?;
        let mut builder = grenad::MergerBuilder::new(MergeDeladdBtreesetString);
        builder.extend(reader);
@ -118,12 +120,15 @@ impl<'indexer> FacetSearchBuilder<'indexer> {
                BEU16StrCodec::bytes_decode(key).map_err(heed::Error::Encoding)?;

            if current_field_id != Some(field_id) {
-                if let Some(fst_merger_builder) = fst_merger_builder {
+                if let (Some(current_field_id), Some(fst_merger_builder)) =
+                    (current_field_id, fst_merger_builder)
+                {
                    let mmap = fst_merger_builder.build(&mut callback)?;
-                    index
-                        .facet_id_string_fst
-                        .remap_data_type::<Bytes>()
-                        .put(wtxn, &field_id, &mmap)?;
+                    index.facet_id_string_fst.remap_data_type::<Bytes>().put(
+                        wtxn,
+                        &current_field_id,
+                        &mmap,
+                    )?;
                }

                fst = index.facet_id_string_fst.get(rtxn, &field_id)?;
--- a/crates/milli/src/update/new/indexer/de.rs
+++ b/crates/milli/src/update/new/indexer/de.rs
@ -1,6 +1,8 @@
 use std::ops::ControlFlow;

 use bumpalo::Bump;
+use bumparaw_collections::RawVec;
+use rustc_hash::FxBuildHasher;
 use serde::de::{DeserializeSeed, Deserializer as _, Visitor};
 use serde_json::value::RawValue;

@ -360,7 +362,7 @@ impl<'a> DeserrRawValue<'a> {
 }

 pub struct DeserrRawVec<'a> {
-    vec: raw_collections::RawVec<'a>,
+    vec: RawVec<'a>,
    alloc: &'a Bump,
 }

@ -379,7 +381,7 @@ impl<'a> deserr::Sequence for DeserrRawVec<'a> {
 }

 pub struct DeserrRawVecIter<'a> {
-    it: raw_collections::vec::iter::IntoIter<'a>,
+    it: bumparaw_collections::vec::iter::IntoIter<'a>,
    alloc: &'a Bump,
 }

@ -393,7 +395,7 @@ impl<'a> Iterator for DeserrRawVecIter<'a> {
 }

 pub struct DeserrRawMap<'a> {
-    map: raw_collections::RawMap<'a>,
+    map: bumparaw_collections::RawMap<'a, FxBuildHasher>,
    alloc: &'a Bump,
 }

@ -416,7 +418,7 @@ impl<'a> deserr::Map for DeserrRawMap<'a> {
 }

 pub struct DeserrRawMapIter<'a> {
-    it: raw_collections::map::iter::IntoIter<'a>,
+    it: bumparaw_collections::map::iter::IntoIter<'a>,
    alloc: &'a Bump,
 }

@ -615,7 +617,7 @@ impl<'de> Visitor<'de> for DeserrRawValueVisitor<'de> {
    where
        A: serde::de::SeqAccess<'de>,
    {
-        let mut raw_vec = raw_collections::RawVec::new_in(self.alloc);
+        let mut raw_vec = RawVec::new_in(self.alloc);
        while let Some(next) = seq.next_element()? {
            raw_vec.push(next);
        }
--- a/crates/milli/src/update/new/indexer/document_changes.rs
+++ b/crates/milli/src/update/new/indexer/document_changes.rs
@ -1,4 +1,5 @@
 use std::cell::{Cell, RefCell};
+use std::sync::atomic::Ordering;
 use std::sync::{Arc, RwLock};

 use bumpalo::Bump;
@ -7,8 +8,9 @@ use rayon::iter::IndexedParallelIterator;

 use super::super::document_change::DocumentChange;
 use crate::fields_ids_map::metadata::FieldIdMapWithMetadata;
+use crate::progress::{AtomicDocumentStep, Progress};
 use crate::update::new::parallel_iterator_ext::ParallelIteratorExt as _;
-use crate::update::new::steps::Step;
+use crate::update::new::steps::IndexingStep;
 use crate::update::new::thread_local::{FullySend, MostlySend, ThreadLocal};
 use crate::{FieldsIdsMap, GlobalFieldsIdsMap, Index, InternalError, Result};

@ -133,10 +135,8 @@ pub struct IndexingContext<
    'indexer, // covariant lifetime of objects that are borrowed  during the entire indexing operation
    'index,   // covariant lifetime of the index
    MSP,
-    SP,
 > where
    MSP: Fn() -> bool + Sync,
-    SP: Fn(Progress) + Sync,
 {
    pub index: &'index Index,
    pub db_fields_ids_map: &'indexer FieldsIdsMap,
@ -144,7 +144,7 @@ pub struct IndexingContext<
    pub doc_allocs: &'indexer ThreadLocal<FullySend<Cell<Bump>>>,
    pub fields_ids_map_store: &'indexer ThreadLocal<FullySend<RefCell<GlobalFieldsIdsMap<'fid>>>>,
    pub must_stop_processing: &'indexer MSP,
-    pub send_progress: &'indexer SP,
+    pub progress: &'indexer Progress,
 }

 impl<
@ -152,18 +152,15 @@ impl<
        'indexer, // covariant lifetime of objects that are borrowed  during the entire indexing operation
        'index,   // covariant lifetime of the index
        MSP,
-        SP,
    > Copy
    for IndexingContext<
        'fid,     // invariant lifetime of fields ids map
        'indexer, // covariant lifetime of objects that are borrowed  during the entire indexing operation
        'index,   // covariant lifetime of the index
        MSP,
-        SP,
    >
 where
    MSP: Fn() -> bool + Sync,
-    SP: Fn(Progress) + Sync,
 {
 }

@ -172,18 +169,15 @@ impl<
        'indexer, // covariant lifetime of objects that are borrowed  during the entire indexing operation
        'index,   // covariant lifetime of the index
        MSP,
-        SP,
    > Clone
    for IndexingContext<
        'fid,     // invariant lifetime of fields ids map
        'indexer, // covariant lifetime of objects that are borrowed  during the entire indexing operation
        'index,   // covariant lifetime of the index
        MSP,
-        SP,
    >
 where
    MSP: Fn() -> bool + Sync,
-    SP: Fn(Progress) + Sync,
 {
    fn clone(&self) -> Self {
        *self
@ -202,7 +196,6 @@ pub fn extract<
    EX,
    DC: DocumentChanges<'pl>,
    MSP,
-    SP,
 >(
    document_changes: &DC,
    extractor: &EX,
@ -213,18 +206,18 @@ pub fn extract<
        doc_allocs,
        fields_ids_map_store,
        must_stop_processing,
-        send_progress,
-    }: IndexingContext<'fid, 'indexer, 'index, MSP, SP>,
+        progress,
+    }: IndexingContext<'fid, 'indexer, 'index, MSP>,
    extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>,
    datastore: &'data ThreadLocal<EX::Data>,
-    step: Step,
+    step: IndexingStep,
 ) -> Result<()>
 where
    EX: Extractor<'extractor>,
    MSP: Fn() -> bool + Sync,
-    SP: Fn(Progress) + Sync,
 {
    tracing::trace!("We are resetting the extractor allocators");
+    progress.update_progress(step);
    // Clean up and reuse the extractor allocs
    for extractor_alloc in extractor_allocs.iter_mut() {
        tracing::trace!("\tWith {} bytes reset", extractor_alloc.0.allocated_bytes());
@ -232,9 +225,11 @@ where
    }

    let total_documents = document_changes.len() as u32;
+    let (step, progress_step) = AtomicDocumentStep::new(total_documents);
+    progress.update_progress(progress_step);

    let pi = document_changes.iter(CHUNK_SIZE);
-    pi.enumerate().try_arc_for_each_try_init(
+    pi.try_arc_for_each_try_init(
        || {
            DocumentChangeContext::new(
                index,
@ -247,13 +242,10 @@ where
                move |index_alloc| extractor.init_data(index_alloc),
            )
        },
-        |context, (finished_documents, items)| {
+        |context, items| {
            if (must_stop_processing)() {
                return Err(Arc::new(InternalError::AbortedIndexation.into()));
            }
-            let finished_documents = (finished_documents * CHUNK_SIZE) as u32;
-
-            (send_progress)(Progress::from_step_substep(step, finished_documents, total_documents));

            // Clean up and reuse the document-specific allocator
            context.doc_alloc.reset();
@ -264,6 +256,7 @@ where
            });

            let res = extractor.process(changes, context).map_err(Arc::new);
+            step.fetch_add(items.as_ref().len() as u32, Ordering::Relaxed);

            // send back the doc_alloc in the pool
            context.doc_allocs.get_or_default().0.set(std::mem::take(&mut context.doc_alloc));
@ -271,32 +264,7 @@ where
            res
        },
    )?;
-
-    (send_progress)(Progress::from_step_substep(step, total_documents, total_documents));
+    step.store(total_documents, Ordering::Relaxed);

    Ok(())
 }
-
-pub struct Progress {
-    pub finished_steps: u16,
-    pub total_steps: u16,
-    pub step_name: &'static str,
-    pub finished_total_substep: Option<(u32, u32)>,
-}
-
-impl Progress {
-    pub fn from_step(step: Step) -> Self {
-        Self {
-            finished_steps: step.finished_steps(),
-            total_steps: Step::total_steps(),
-            step_name: step.name(),
-            finished_total_substep: None,
-        }
-    }
-    pub fn from_step_substep(step: Step, finished_substep: u32, total_substep: u32) -> Self {
-        Self {
-            finished_total_substep: Some((finished_substep, total_substep)),
-            ..Progress::from_step(step)
-        }
-    }
-}
--- a/crates/milli/src/update/new/indexer/document_deletion.rs
+++ b/crates/milli/src/update/new/indexer/document_deletion.rs
@ -92,11 +92,12 @@ mod test {

    use crate::fields_ids_map::metadata::{FieldIdMapWithMetadata, MetadataBuilder};
    use crate::index::tests::TempIndex;
+    use crate::progress::Progress;
    use crate::update::new::indexer::document_changes::{
        extract, DocumentChangeContext, Extractor, IndexingContext,
    };
    use crate::update::new::indexer::DocumentDeletion;
-    use crate::update::new::steps::Step;
+    use crate::update::new::steps::IndexingStep;
    use crate::update::new::thread_local::{MostlySend, ThreadLocal};
    use crate::update::new::DocumentChange;
    use crate::DocumentId;
@ -164,7 +165,7 @@ mod test {
            doc_allocs: &doc_allocs,
            fields_ids_map_store: &fields_ids_map_store,
            must_stop_processing: &(|| false),
-            send_progress: &(|_progress| {}),
+            progress: &Progress::default(),
        };

        for _ in 0..3 {
@ -176,7 +177,7 @@ mod test {
                context,
                &mut extractor_allocs,
                &datastore,
-                Step::ExtractingDocuments,
+                IndexingStep::ExtractingDocuments,
            )
            .unwrap();

--- a/crates/milli/src/update/new/indexer/document_operation.rs
+++ b/crates/milli/src/update/new/indexer/document_operation.rs
@ -1,19 +1,23 @@
+use std::sync::atomic::Ordering;
+
 use bumpalo::collections::CollectIn;
 use bumpalo::Bump;
+use bumparaw_collections::RawMap;
 use hashbrown::hash_map::Entry;
 use heed::RoTxn;
 use memmap2::Mmap;
-use raw_collections::RawMap;
 use rayon::slice::ParallelSlice;
+use rustc_hash::FxBuildHasher;
 use serde_json::value::RawValue;
 use serde_json::Deserializer;

 use super::super::document_change::DocumentChange;
-use super::document_changes::{DocumentChangeContext, DocumentChanges, Progress};
+use super::document_changes::{DocumentChangeContext, DocumentChanges};
 use super::retrieve_or_guess_primary_key;
 use crate::documents::PrimaryKey;
+use crate::progress::{AtomicPayloadStep, Progress};
 use crate::update::new::document::Versions;
-use crate::update::new::steps::Step;
+use crate::update::new::steps::IndexingStep;
 use crate::update::new::thread_local::MostlySend;
 use crate::update::new::{Deletion, Insertion, Update};
 use crate::update::{AvailableIds, IndexDocumentsMethod};
@ -44,7 +48,7 @@ impl<'pl> DocumentOperation<'pl> {

    #[allow(clippy::too_many_arguments)]
    #[tracing::instrument(level = "trace", skip_all, target = "indexing::document_operation")]
-    pub fn into_changes<MSP, SP>(
+    pub fn into_changes<MSP>(
        self,
        indexer: &'pl Bump,
        index: &Index,
@ -52,12 +56,12 @@ impl<'pl> DocumentOperation<'pl> {
        primary_key_from_op: Option<&'pl str>,
        new_fields_ids_map: &mut FieldsIdsMap,
        must_stop_processing: &MSP,
-        send_progress: &SP,
+        progress: Progress,
    ) -> Result<(DocumentOperationChanges<'pl>, Vec<PayloadStats>, Option<PrimaryKey<'pl>>)>
    where
        MSP: Fn() -> bool,
-        SP: Fn(Progress),
    {
+        progress.update_progress(IndexingStep::PreparingPayloads);
        let Self { operations, method } = self;

        let documents_ids = index.documents_ids(rtxn)?;
@ -67,16 +71,14 @@ impl<'pl> DocumentOperation<'pl> {
        let mut primary_key = None;

        let payload_count = operations.len();
+        let (step, progress_step) = AtomicPayloadStep::new(payload_count as u32);
+        progress.update_progress(progress_step);

        for (payload_index, operation) in operations.into_iter().enumerate() {
            if must_stop_processing() {
                return Err(InternalError::AbortedIndexation.into());
            }
-            send_progress(Progress::from_step_substep(
-                Step::PreparingPayloads,
-                payload_index as u32,
-                payload_count as u32,
-            ));
+            step.store(payload_index as u32, Ordering::Relaxed);

            let mut bytes = 0;
            let result = match operation {
@ -117,12 +119,7 @@ impl<'pl> DocumentOperation<'pl> {
            };
            operations_stats.push(PayloadStats { document_count, bytes, error });
        }
-
-        send_progress(Progress::from_step_substep(
-            Step::PreparingPayloads,
-            payload_count as u32,
-            payload_count as u32,
-        ));
+        step.store(payload_count as u32, Ordering::Relaxed);

        // TODO We must drain the HashMap into a Vec because rayon::hash_map::IntoIter: !Clone
        let mut docids_version_offsets: bumpalo::collections::vec::Vec<_> =
@ -166,8 +163,9 @@ fn extract_addition_payload_changes<'r, 'pl: 'r>(

        // Only guess the primary key if it is the first document
        let retrieved_primary_key = if previous_offset == 0 {
-            let doc =
-                RawMap::from_raw_value(doc, indexer).map(Some).map_err(UserError::SerdeJson)?;
+            let doc = RawMap::from_raw_value_and_hasher(doc, FxBuildHasher, indexer)
+                .map(Some)
+                .map_err(UserError::SerdeJson)?;

            let result = retrieve_or_guess_primary_key(
                rtxn,
@ -545,8 +543,9 @@ impl MergeChanges for MergeDocumentForReplacement {
        match operations.last() {
            Some(InnerDocOp::Addition(DocumentOffset { content })) => {
                let document = serde_json::from_slice(content).unwrap();
-                let document = raw_collections::RawMap::from_raw_value(document, doc_alloc)
-                    .map_err(UserError::SerdeJson)?;
+                let document =
+                    RawMap::from_raw_value_and_hasher(document, FxBuildHasher, doc_alloc)
+                        .map_err(UserError::SerdeJson)?;

                if is_new {
                    Ok(Some(DocumentChange::Insertion(Insertion::create(
@ -632,8 +631,9 @@ impl MergeChanges for MergeDocumentForUpdates {
                    }
                };
                let document = serde_json::from_slice(content).unwrap();
-                let document = raw_collections::RawMap::from_raw_value(document, doc_alloc)
-                    .map_err(UserError::SerdeJson)?;
+                let document =
+                    RawMap::from_raw_value_and_hasher(document, FxBuildHasher, doc_alloc)
+                        .map_err(UserError::SerdeJson)?;

                Some(Versions::single(document))
            }
@ -647,8 +647,9 @@ impl MergeChanges for MergeDocumentForUpdates {
                    };

                    let document = serde_json::from_slice(content).unwrap();
-                    let document = raw_collections::RawMap::from_raw_value(document, doc_alloc)
-                        .map_err(UserError::SerdeJson)?;
+                    let document =
+                        RawMap::from_raw_value_and_hasher(document, FxBuildHasher, doc_alloc)
+                            .map_err(UserError::SerdeJson)?;
                    Ok(document)
                });
                Versions::multiple(versions)?
--- a/crates/milli/src/update/new/indexer/mod.rs
+++ b/crates/milli/src/update/new/indexer/mod.rs
@ -4,7 +4,8 @@ use std::sync::{OnceLock, RwLock};
 use std::thread::{self, Builder};

 use big_s::S;
-use document_changes::{extract, DocumentChanges, IndexingContext, Progress};
+use bumparaw_collections::RawMap;
+use document_changes::{extract, DocumentChanges, IndexingContext};
 pub use document_deletion::DocumentDeletion;
 pub use document_operation::{DocumentOperation, PayloadStats};
 use hashbrown::HashMap;
@ -13,7 +14,7 @@ use heed::{RoTxn, RwTxn};
 use itertools::{merge_join_by, EitherOrBoth};
 pub use partial_dump::PartialDump;
 use rand::SeedableRng as _;
-use raw_collections::RawMap;
+use rustc_hash::FxBuildHasher;
 use time::OffsetDateTime;
 pub use update_by_function::UpdateByFunction;

@ -21,7 +22,7 @@ use super::channel::*;
 use super::extract::*;
 use super::facet_search_builder::FacetSearchBuilder;
 use super::merger::FacetFieldIdsDelta;
-use super::steps::Step;
+use super::steps::IndexingStep;
 use super::thread_local::ThreadLocal;
 use super::word_fst_builder::{PrefixData, PrefixDelta, WordFstBuilder};
 use super::words_prefix_docids::{
@ -32,6 +33,7 @@ use crate::documents::{PrimaryKey, DEFAULT_PRIMARY_KEY};
 use crate::facet::FacetType;
 use crate::fields_ids_map::metadata::{FieldIdMapWithMetadata, MetadataBuilder};
 use crate::index::main_key::{WORDS_FST_KEY, WORDS_PREFIXES_FST_KEY};
+use crate::progress::Progress;
 use crate::proximity::ProximityPrecision;
 use crate::update::del_add::DelAdd;
 use crate::update::new::extract::EmbeddingExtractor;
@ -59,7 +61,7 @@ mod update_by_function;
 ///
 /// TODO return stats
 #[allow(clippy::too_many_arguments)] // clippy: 😝
-pub fn index<'pl, 'indexer, 'index, DC, MSP, SP>(
+pub fn index<'pl, 'indexer, 'index, DC, MSP>(
    wtxn: &mut RwTxn,
    index: &'index Index,
    pool: &ThreadPoolNoAbort,
@ -70,12 +72,11 @@ pub fn index<'pl, 'indexer, 'index, DC, MSP, SP>(
    document_changes: &DC,
    embedders: EmbeddingConfigs,
    must_stop_processing: &'indexer MSP,
-    send_progress: &'indexer SP,
+    progress: &'indexer Progress,
 ) -> Result<()>
 where
    DC: DocumentChanges<'pl>,
    MSP: Fn() -> bool + Sync,
-    SP: Fn(Progress) + Sync,
 {
    let mut bbbuffers = Vec::new();
    let finished_extraction = AtomicBool::new(false);
@ -124,7 +125,7 @@ where
        doc_allocs: &doc_allocs,
        fields_ids_map_store: &fields_ids_map_store,
        must_stop_processing,
-        send_progress,
+        progress,
    };

    let mut index_embeddings = index.embedding_configs(wtxn)?;
@ -158,7 +159,7 @@ where
                        indexing_context,
                        &mut extractor_allocs,
                        &datastore,
-                        Step::ExtractingDocuments,
+                        IndexingStep::ExtractingDocuments,
                    )?;
                }
                {
@ -190,7 +191,7 @@ where
                                indexing_context,
                                &mut extractor_allocs,
                                &extractor_sender.field_id_docid_facet_sender(),
-                                Step::ExtractingFacets
+                                IndexingStep::ExtractingFacets
                            )?
                    };

@ -223,7 +224,7 @@ where
                            document_changes,
                            indexing_context,
                            &mut extractor_allocs,
-                            Step::ExtractingWords
+                            IndexingStep::ExtractingWords
                        )?
                    };

@ -301,7 +302,7 @@ where
                            document_changes,
                            indexing_context,
                            &mut extractor_allocs,
-                            Step::ExtractingWordProximity,
+                            IndexingStep::ExtractingWordProximity,
                        )?
                    };

@ -337,7 +338,7 @@ where
                            indexing_context,
                            &mut extractor_allocs,
                            &datastore,
-                            Step::ExtractingEmbeddings,
+                            IndexingStep::ExtractingEmbeddings,
                        )?;
                    }
                    {
@ -370,7 +371,7 @@ where
                            indexing_context,
                            &mut extractor_allocs,
                            &datastore,
-                            Step::WritingGeoPoints
+                            IndexingStep::WritingGeoPoints
                        )?;
                    }

@ -382,9 +383,7 @@ where
                        &indexing_context.must_stop_processing,
                    )?;
                }
-
-                (indexing_context.send_progress)(Progress::from_step(Step::WritingToDatabase));
-
+                indexing_context.progress.update_progress(IndexingStep::WritingToDatabase);
                finished_extraction.store(true, std::sync::atomic::Ordering::Relaxed);

                Result::Ok((facet_field_ids_delta, index_embeddings))
@ -484,7 +483,7 @@ where
            )?;
        }

-        (indexing_context.send_progress)(Progress::from_step(Step::WaitingForExtractors));
+        indexing_context.progress.update_progress(IndexingStep::WaitingForExtractors);

        let (facet_field_ids_delta, index_embeddings) = extractor_handle.join().unwrap()?;

@ -497,10 +496,7 @@ where
                break 'vectors;
            }

-            (indexing_context.send_progress)(Progress::from_step(
-                Step::WritingEmbeddingsToDatabase,
-            ));
-
+            indexing_context.progress.update_progress(IndexingStep::WritingEmbeddingsToDatabase);
            let mut rng = rand::rngs::StdRng::seed_from_u64(42);
            for (_index, (_embedder_name, _embedder, writer, dimensions)) in &mut arroy_writers {
                let dimensions = *dimensions;
@ -516,21 +512,19 @@ where
            index.put_embedding_configs(wtxn, index_embeddings)?;
        }

-        (indexing_context.send_progress)(Progress::from_step(Step::PostProcessingFacets));
-
+        indexing_context.progress.update_progress(IndexingStep::PostProcessingFacets);
        if index.facet_search(wtxn)? {
            compute_facet_search_database(index, wtxn, global_fields_ids_map)?;
        }

        compute_facet_level_database(index, wtxn, facet_field_ids_delta)?;

-        (indexing_context.send_progress)(Progress::from_step(Step::PostProcessingWords));
-
+        indexing_context.progress.update_progress(IndexingStep::PostProcessingWords);
        if let Some(prefix_delta) = compute_word_fst(index, wtxn)? {
            compute_prefix_database(index, wtxn, prefix_delta, grenad_parameters)?;
        }

-        (indexing_context.send_progress)(Progress::from_step(Step::Finalizing));
+        indexing_context.progress.update_progress(IndexingStep::Finalizing);

        Ok(()) as Result<_>
    })?;
@ -776,7 +770,7 @@ pub fn retrieve_or_guess_primary_key<'a>(
    index: &Index,
    new_fields_ids_map: &mut FieldsIdsMap,
    primary_key_from_op: Option<&'a str>,
-    first_document: Option<RawMap<'a>>,
+    first_document: Option<RawMap<'a, FxBuildHasher>>,
 ) -> Result<StdResult<(PrimaryKey<'a>, bool), UserError>> {
    // make sure that we have a declared primary key, either fetching it from the index or attempting to guess it.

--- a/crates/milli/src/update/new/indexer/partial_dump.rs
+++ b/crates/milli/src/update/new/indexer/partial_dump.rs
@ -1,6 +1,8 @@
 use std::ops::DerefMut;

+use bumparaw_collections::RawMap;
 use rayon::iter::IndexedParallelIterator;
+use rustc_hash::FxBuildHasher;
 use serde_json::value::RawValue;

 use super::document_changes::{DocumentChangeContext, DocumentChanges};
@ -75,7 +77,7 @@ where
            self.primary_key.extract_fields_and_docid(document, fields_ids_map, doc_alloc)?;
        let external_document_id = external_document_id.to_de();

-        let document = raw_collections::RawMap::from_raw_value(document, doc_alloc)
+        let document = RawMap::from_raw_value_and_hasher(document, FxBuildHasher, doc_alloc)
            .map_err(InternalError::SerdeJson)?;

        let insertion = Insertion::create(docid, external_document_id, Versions::single(document));
--- a/crates/milli/src/update/new/indexer/update_by_function.rs
+++ b/crates/milli/src/update/new/indexer/update_by_function.rs
@ -1,8 +1,9 @@
-use raw_collections::RawMap;
+use bumparaw_collections::RawMap;
 use rayon::iter::IndexedParallelIterator;
 use rayon::slice::ParallelSlice as _;
 use rhai::{Dynamic, Engine, OptimizationLevel, Scope, AST};
 use roaring::RoaringBitmap;
+use rustc_hash::FxBuildHasher;

 use super::document_changes::DocumentChangeContext;
 use super::DocumentChanges;
@ -160,8 +161,12 @@ impl<'index> DocumentChanges<'index> for UpdateByFunctionChanges<'index> {
                        if document_id != new_document_id {
                            Err(Error::UserError(UserError::DocumentEditionCannotModifyPrimaryKey))
                        } else {
-                            let raw_new_doc = RawMap::from_raw_value(raw_new_doc, doc_alloc)
-                                .map_err(InternalError::SerdeJson)?;
+                            let raw_new_doc = RawMap::from_raw_value_and_hasher(
+                                raw_new_doc,
+                                FxBuildHasher,
+                                doc_alloc,
+                            )
+                            .map_err(InternalError::SerdeJson)?;

                            Ok(Some(DocumentChange::Update(Update::create(
                                docid,
--- a/crates/milli/src/update/new/steps.rs
+++ b/crates/milli/src/update/new/steps.rs
@ -1,8 +1,12 @@
+use std::borrow::Cow;
+
 use enum_iterator::Sequence;

+use crate::progress::Step;
+
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Sequence)]
-#[repr(u16)]
-pub enum Step {
+#[repr(u8)]
+pub enum IndexingStep {
    PreparingPayloads,
    ExtractingDocuments,
    ExtractingFacets,
@ -18,30 +22,31 @@ pub enum Step {
    Finalizing,
 }

-impl Step {
-    pub fn name(&self) -> &'static str {
+impl Step for IndexingStep {
+    fn name(&self) -> Cow<'static, str> {
        match self {
-            Step::PreparingPayloads => "preparing update file",
-            Step::ExtractingDocuments => "extracting documents",
-            Step::ExtractingFacets => "extracting facets",
-            Step::ExtractingWords => "extracting words",
-            Step::ExtractingWordProximity => "extracting word proximity",
-            Step::ExtractingEmbeddings => "extracting embeddings",
-            Step::WritingGeoPoints => "writing geo points",
-            Step::WritingToDatabase => "writing to database",
-            Step::WaitingForExtractors => "waiting for extractors",
-            Step::WritingEmbeddingsToDatabase => "writing embeddings to database",
-            Step::PostProcessingFacets => "post-processing facets",
-            Step::PostProcessingWords => "post-processing words",
-            Step::Finalizing => "finalizing",
+            IndexingStep::PreparingPayloads => "preparing update file",
+            IndexingStep::ExtractingDocuments => "extracting documents",
+            IndexingStep::ExtractingFacets => "extracting facets",
+            IndexingStep::ExtractingWords => "extracting words",
+            IndexingStep::ExtractingWordProximity => "extracting word proximity",
+            IndexingStep::ExtractingEmbeddings => "extracting embeddings",
+            IndexingStep::WritingGeoPoints => "writing geo points",
+            IndexingStep::WritingToDatabase => "writing to database",
+            IndexingStep::WaitingForExtractors => "waiting for extractors",
+            IndexingStep::WritingEmbeddingsToDatabase => "writing embeddings to database",
+            IndexingStep::PostProcessingFacets => "post-processing facets",
+            IndexingStep::PostProcessingWords => "post-processing words",
+            IndexingStep::Finalizing => "finalizing",
        }
+        .into()
    }

-    pub fn finished_steps(self) -> u16 {
-        self as u16
+    fn current(&self) -> u32 {
+        *self as u32
    }

-    pub const fn total_steps() -> u16 {
-        Self::CARDINALITY as u16
+    fn total(&self) -> u32 {
+        Self::CARDINALITY as u32
    }
 }
--- a/crates/milli/src/update/new/vector_document.rs
+++ b/crates/milli/src/update/new/vector_document.rs
@ -1,9 +1,10 @@
 use std::collections::BTreeSet;

 use bumpalo::Bump;
+use bumparaw_collections::RawMap;
 use deserr::{Deserr, IntoValue};
 use heed::RoTxn;
-use raw_collections::RawMap;
+use rustc_hash::FxBuildHasher;
 use serde::Serialize;
 use serde_json::value::RawValue;

@ -84,7 +85,7 @@ pub struct VectorDocumentFromDb<'t> {
    docid: DocumentId,
    embedding_config: Vec<IndexEmbeddingConfig>,
    index: &'t Index,
-    vectors_field: Option<RawMap<'t>>,
+    vectors_field: Option<RawMap<'t, FxBuildHasher>>,
    rtxn: &'t RoTxn<'t>,
    doc_alloc: &'t Bump,
 }
@ -102,9 +103,10 @@ impl<'t> VectorDocumentFromDb<'t> {
        };
        let vectors = document.vectors_field()?;
        let vectors_field = match vectors {
-            Some(vectors) => {
-                Some(RawMap::from_raw_value(vectors, doc_alloc).map_err(InternalError::SerdeJson)?)
-            }
+            Some(vectors) => Some(
+                RawMap::from_raw_value_and_hasher(vectors, FxBuildHasher, doc_alloc)
+                    .map_err(InternalError::SerdeJson)?,
+            ),
            None => None,
        };

@ -220,7 +222,7 @@ fn entry_from_raw_value(

 pub struct VectorDocumentFromVersions<'doc> {
    external_document_id: &'doc str,
-    vectors: RawMap<'doc>,
+    vectors: RawMap<'doc, FxBuildHasher>,
    embedders: &'doc EmbeddingConfigs,
 }

@ -233,8 +235,8 @@ impl<'doc> VectorDocumentFromVersions<'doc> {
    ) -> Result<Option<Self>> {
        let document = DocumentFromVersions::new(versions);
        if let Some(vectors_field) = document.vectors_field()? {
-            let vectors =
-                RawMap::from_raw_value(vectors_field, bump).map_err(UserError::SerdeJson)?;
+            let vectors = RawMap::from_raw_value_and_hasher(vectors_field, FxBuildHasher, bump)
+                .map_err(UserError::SerdeJson)?;
            Ok(Some(Self { external_document_id, vectors, embedders }))
        } else {
            Ok(None)
--- a/crates/milli/tests/search/facet_distribution.rs
+++ b/crates/milli/tests/search/facet_distribution.rs
@ -3,6 +3,7 @@ use bumpalo::Bump;
 use heed::EnvOpenOptions;
 use maplit::hashset;
 use milli::documents::mmap_from_objects;
+use milli::progress::Progress;
 use milli::update::new::indexer;
 use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings};
 use milli::vector::EmbeddingConfigs;
@ -57,7 +58,7 @@ fn test_facet_distribution_with_no_facet_values() {
            None,
            &mut new_fields_ids_map,
            &|| false,
-            &|_progress| (),
+            Progress::default(),
        )
        .unwrap();

@ -72,7 +73,7 @@ fn test_facet_distribution_with_no_facet_values() {
        &document_changes,
        embedders,
        &|| false,
-        &|_| (),
+        &Progress::default(),
    )
    .unwrap();

--- a/crates/milli/tests/search/mod.rs
+++ b/crates/milli/tests/search/mod.rs
@ -7,6 +7,7 @@ use bumpalo::Bump;
 use either::{Either, Left, Right};
 use heed::EnvOpenOptions;
 use maplit::{btreemap, hashset};
+use milli::progress::Progress;
 use milli::update::new::indexer;
 use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings};
 use milli::vector::EmbeddingConfigs;
@ -90,7 +91,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
            None,
            &mut new_fields_ids_map,
            &|| false,
-            &|_progress| (),
+            Progress::default(),
        )
        .unwrap();

@ -109,7 +110,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
        &document_changes,
        embedders,
        &|| false,
-        &|_| (),
+        &Progress::default(),
    )
    .unwrap();

--- a/crates/milli/tests/search/query_criteria.rs
+++ b/crates/milli/tests/search/query_criteria.rs
@ -5,6 +5,7 @@ use bumpalo::Bump;
 use heed::EnvOpenOptions;
 use itertools::Itertools;
 use maplit::hashset;
+use milli::progress::Progress;
 use milli::update::new::indexer;
 use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings};
 use milli::vector::EmbeddingConfigs;
@ -326,7 +327,7 @@ fn criteria_ascdesc() {
            None,
            &mut new_fields_ids_map,
            &|| false,
-            &|_progress| (),
+            Progress::default(),
        )
        .unwrap();

@ -341,7 +342,7 @@ fn criteria_ascdesc() {
        &document_changes,
        embedders,
        &|| false,
-        &|_| (),
+        &Progress::default(),
    )
    .unwrap();

--- a/crates/milli/tests/search/typo_tolerance.rs
+++ b/crates/milli/tests/search/typo_tolerance.rs
@ -3,6 +3,7 @@ use std::collections::BTreeSet;
 use bumpalo::Bump;
 use heed::EnvOpenOptions;
 use milli::documents::mmap_from_objects;
+use milli::progress::Progress;
 use milli::update::new::indexer;
 use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings};
 use milli::vector::EmbeddingConfigs;
@ -135,7 +136,7 @@ fn test_typo_disabled_on_word() {
            None,
            &mut new_fields_ids_map,
            &|| false,
-            &|_progress| (),
+            Progress::default(),
        )
        .unwrap();

@ -150,7 +151,7 @@ fn test_typo_disabled_on_word() {
        &document_changes,
        embedders,
        &|| false,
-        &|_| (),
+        &Progress::default(),
    )
    .unwrap();

--- a/crates/xtask/src/bench/meili_process.rs
+++ b/crates/xtask/src/bench/meili_process.rs
@ -1,23 +1,56 @@
 use std::collections::BTreeMap;
+use std::time::Duration;

 use anyhow::{bail, Context as _};
+use tokio::process::Command;
+use tokio::time;

 use super::assets::Asset;
 use super::client::Client;
 use super::workload::Workload;

 pub async fn kill(mut meilisearch: tokio::process::Child) {
-    if let Err(error) = meilisearch.kill().await {
-        tracing::warn!(
-            error = &error as &dyn std::error::Error,
-            "while terminating Meilisearch server"
-        )
+    let Some(id) = meilisearch.id() else { return };
+
+    match Command::new("kill").args(["--signal=TERM", &id.to_string()]).spawn() {
+        Ok(mut cmd) => {
+            let Err(error) = cmd.wait().await else { return };
+            tracing::warn!(
+                error = &error as &dyn std::error::Error,
+                "while awaiting the Meilisearch server kill"
+            );
+        }
+        Err(error) => {
+            tracing::warn!(
+                error = &error as &dyn std::error::Error,
+                "while terminating Meilisearch server with a kill -s TERM"
+            );
+            if let Err(error) = meilisearch.kill().await {
+                tracing::warn!(
+                    error = &error as &dyn std::error::Error,
+                    "while terminating Meilisearch server"
+                )
+            }
+            return;
+        }
+    };
+
+    match time::timeout(Duration::from_secs(5), meilisearch.wait()).await {
+        Ok(_) => (),
+        Err(_) => {
+            if let Err(error) = meilisearch.kill().await {
+                tracing::warn!(
+                    error = &error as &dyn std::error::Error,
+                    "while terminating Meilisearch server"
+                )
+            }
+        }
    }
 }

 #[tracing::instrument]
 pub async fn build() -> anyhow::Result<()> {
-    let mut command = tokio::process::Command::new("cargo");
+    let mut command = Command::new("cargo");
    command.arg("build").arg("--release").arg("-p").arg("meilisearch");

    command.kill_on_drop(true);
@ -37,17 +70,8 @@ pub async fn start(
    master_key: Option<&str>,
    workload: &Workload,
    asset_folder: &str,
+    mut command: Command,
 ) -> anyhow::Result<tokio::process::Child> {
-    let mut command = tokio::process::Command::new("cargo");
-    command
-        .arg("run")
-        .arg("--release")
-        .arg("-p")
-        .arg("meilisearch")
-        .arg("--bin")
-        .arg("meilisearch")
-        .arg("--");
-
    command.arg("--db-path").arg("./_xtask_benchmark.ms");
    if let Some(master_key) = master_key {
        command.arg("--master-key").arg(master_key);
@ -86,7 +110,7 @@ async fn wait_for_health(

            return Ok(());
        }
-        tokio::time::sleep(std::time::Duration::from_millis(500)).await;
+        time::sleep(Duration::from_millis(500)).await;
        // check whether the Meilisearch instance exited early (cut the wait)
        if let Some(exit_code) =
            meilisearch.try_wait().context("cannot check Meilisearch server process status")?
--- a/crates/xtask/src/bench/mod.rs
+++ b/crates/xtask/src/bench/mod.rs
@ -86,6 +86,12 @@ pub struct BenchDeriveArgs {
    /// The maximum time in seconds we allow for fetching the task queue before timing out.
    #[arg(long, default_value_t = 60)]
    tasks_queue_timeout_secs: u64,
+
+    /// The path to the binary to run.
+    ///
+    /// If unspecified, runs `cargo run` after building Meilisearch with `cargo build`.
+    #[arg(long)]
+    binary_path: Option<PathBuf>,
 }

 pub fn run(args: BenchDeriveArgs) -> anyhow::Result<()> {
@ -170,6 +176,7 @@ pub fn run(args: BenchDeriveArgs) -> anyhow::Result<()> {
                    args.master_key.as_deref(),
                    workload,
                    &args,
+                    args.binary_path.as_deref(),
                )
                .await?;

--- a/crates/xtask/src/bench/workload.rs
+++ b/crates/xtask/src/bench/workload.rs
@ -1,6 +1,7 @@
 use std::collections::BTreeMap;
 use std::fs::File;
 use std::io::{Seek as _, Write as _};
+use std::path::Path;

 use anyhow::{bail, Context as _};
 use futures_util::TryStreamExt as _;
@ -85,13 +86,13 @@ pub async fn execute(
    master_key: Option<&str>,
    workload: Workload,
    args: &BenchDeriveArgs,
+    binary_path: Option<&Path>,
 ) -> anyhow::Result<()> {
    assets::fetch_assets(assets_client, &workload.assets, &args.asset_folder).await?;

    let workload_uuid = dashboard_client.create_workload(invocation_uuid, &workload).await?;

    let mut tasks = Vec::new();
-
    for i in 0..workload.run_count {
        tasks.push(
            execute_run(
@ -102,6 +103,7 @@ pub async fn execute(
                master_key,
                &workload,
                args,
+                binary_path,
                i,
            )
            .await?,
@ -109,7 +111,6 @@ pub async fn execute(
    }

    let mut reports = Vec::with_capacity(workload.run_count as usize);
-
    for task in tasks {
        reports.push(
            task.await
@ -133,13 +134,31 @@ async fn execute_run(
    master_key: Option<&str>,
    workload: &Workload,
    args: &BenchDeriveArgs,
+    binary_path: Option<&Path>,
    run_number: u16,
 ) -> anyhow::Result<tokio::task::JoinHandle<anyhow::Result<std::fs::File>>> {
    meili_process::delete_db();

-    meili_process::build().await?;
+    let run_command = match binary_path {
+        Some(binary_path) => tokio::process::Command::new(binary_path),
+        None => {
+            meili_process::build().await?;
+            let mut command = tokio::process::Command::new("cargo");
+            command
+                .arg("run")
+                .arg("--release")
+                .arg("-p")
+                .arg("meilisearch")
+                .arg("--bin")
+                .arg("meilisearch")
+                .arg("--");
+            command
+        }
+    };
+
    let meilisearch =
-        meili_process::start(meili_client, master_key, workload, &args.asset_folder).await?;
+        meili_process::start(meili_client, master_key, workload, &args.asset_folder, run_command)
+            .await?;

    let processor = run_commands(
        dashboard_client,
Author	SHA1	Message	Date
Kerollmops	a985b4bc29	Accept the max readers param by env var and increase it	2025-01-28 14:48:01 +01:00
meili-bors[bot]	ba11121cfc	Merge #5159 Some checks failed Test suite / Tests almost all features (push) Has been skipped Test suite / Test disabled tokenization (push) Has been skipped Test suite / Tests on ubuntu-20.04 (push) Failing after 11s Test suite / Run tests in debug (push) Failing after 10s Test suite / Tests on ${{ matrix.os }} (windows-2022) (push) Failing after 22s Test suite / Run Rustfmt (push) Successful in 1m18s Test suite / Run Clippy (push) Successful in 5m30s Test suite / Tests on ${{ matrix.os }} (macos-13) (push) Has been cancelled 5159: Fix the New Indexer Spilling r=irevoire a=Kerollmops Fix two bugs in the merging of the spilled caches. Thanks to `@ManyTheFish` and `@irevoire` 👏 Co-authored-by: Kerollmops <clement@meilisearch.com> Co-authored-by: ManyTheFish <many@meilisearch.com>	2024-12-12 17:16:53 +00:00
ManyTheFish	acdd5aa6ea	Use the thread source id instead of the destination id when filtering on the cache to merge	2024-12-12 18:12:00 +01:00
Kerollmops	2f3cc8cdd2	Fix the merge_caches_sorted function	2024-12-12 16:15:37 +01:00
meili-bors[bot]	7a95fed23f	Merge #5158 Some checks failed Test suite / Tests on ${{ matrix.os }} (macos-13) (push) Waiting to run Test suite / Tests on ${{ matrix.os }} (windows-2022) (push) Failing after 46s Test suite / Tests on ubuntu-20.04 (push) Failing after 13s Test suite / Tests almost all features (push) Has been skipped Test suite / Test disabled tokenization (push) Has been skipped Test suite / Run tests in debug (push) Failing after 15s Test suite / Run Rustfmt (push) Successful in 9m49s Test suite / Run Clippy (push) Successful in 46m15s 5158: Indexer edition 2024 fix facet fst r=Kerollmops a=ManyTheFish # Pull Request Fix a regression in the new indexer; when several filterable attributes containing strings were set, all the field IDs were shifted, and the last one was overwriting the previous FST. ## What does this PR do? - Add a test reproducing the bug - fix the bug Co-authored-by: ManyTheFish <many@meilisearch.com>	2024-12-12 14:14:44 +00:00
ManyTheFish	961de4d34e	Fix facet fst	2024-12-12 15:12:28 +01:00
ManyTheFish	18ce95dcbf	Add test reproducing the bug	2024-12-12 14:56:45 +01:00
meili-bors[bot]	c177210b1b	Merge #5152 5152: Make xtasks be able to use the specified binary r=dureuill a=Kerollmops Makes it possible to specify the binary to run. It is useful to run PGO optimized binaries. Co-authored-by: Kerollmops <clement@meilisearch.com> Co-authored-by: Clément Renault <clement@meilisearch.com>	2024-12-12 12:28:16 +00:00
meili-bors[bot]	1fc90fbacb	Merge #5147 5147: Batch progress r=dureuill a=irevoire # Pull Request ## Related issue Fixes https://github.com/meilisearch/meilisearch/issues/5068 ## What does this PR do? - ... ## PR checklist Please check if your PR fulfills the following requirements: - [ ] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)? - [ ] Have you read the contributing guidelines? - [ ] Have you made sure that the title is accurate and descriptive of the changes? Thank you so much for contributing to Meilisearch! Co-authored-by: Tamo <tamo@meilisearch.com>	2024-12-12 09:15:54 +00:00
Clément Renault	6c72559457	Update the binary-path description Co-authored-by: Louis Dureuil <louis@meilisearch.com>	2024-12-12 09:39:39 +01:00
Kerollmops	1fdfa3f208	Change the exit code to 130 when Ctrl-Ced	2024-12-12 09:26:14 +01:00
Tamo	0d0c18f519	rename the Step::name into Step::current_step	2024-12-11 18:41:03 +01:00
Tamo	d12364c1e0	fix the tests	2024-12-11 18:30:48 +01:00
Tamo	8cd3a1aa57	fmt	2024-12-11 18:18:40 +01:00
Tamo	08fd026ebd	fix warning	2024-12-11 18:18:13 +01:00
Tamo	75d5cea624	use a with_capacity while allocating the progress view	2024-12-11 18:17:33 +01:00
Tamo	ab9213fa94	ensure we never write the progress to the db	2024-12-11 18:16:20 +01:00
Tamo	45d5d4bf40	make the progressview public	2024-12-11 18:15:33 +01:00
Tamo	fa885e75b4	rename the send_progress in progress	2024-12-11 18:13:12 +01:00
Tamo	29fc77ee5b	remove usuless print	2024-12-11 18:11:19 +01:00
Tamo	ad4dc70720	rename the ComputingTheChanges to ComputingDocumentChanges in the edit document progress	2024-12-11 18:09:54 +01:00
Tamo	5d682b4700	rename the ComputingTheChanges to ComputingDocumentChanges	2024-12-11 18:08:45 +01:00
Tamo	f1beb60204	make the progress use payload instead of documents	2024-12-11 18:07:45 +01:00
Tamo	85577e70cd	reuse the enqueued	2024-12-11 18:05:34 +01:00
Tamo	c5536c37b5	rename the atomic::name to unit_name	2024-12-11 18:03:06 +01:00
Tamo	9245c89cfe	move the macros to milli	2024-12-11 18:00:46 +01:00
meili-bors[bot]	eaabc1af2f	Merge #5144 Some checks failed Test suite / Tests on ${{ matrix.os }} (macos-13) (push) Waiting to run Test suite / Tests almost all features (push) Has been skipped Test suite / Test disabled tokenization (push) Has been skipped Test suite / Tests on ubuntu-20.04 (push) Failing after 11s Test suite / Run tests in debug (push) Failing after 10s Test suite / Tests on ${{ matrix.os }} (windows-2022) (push) Failing after 47s Test suite / Run Rustfmt (push) Successful in 1m17s Test suite / Run Clippy (push) Successful in 5m30s 5144: Exactly 512 bytes docid fails r=Kerollmops a=dureuill # Pull Request ## Related issue Fixes #5050 ## What does this PR do? - Return a user error rather than an internal one for docids of exactly 512 bytes - Fix up error message to indicate that exactly 512 bytes long docids are not supported. - Fix up error message to reflect that index uids are actually limited to 400 bytes in length ## Impact - Impacts docs: - update [this paragraph](https://www.meilisearch.com/docs/learn/resources/known_limitations#length-of-primary-key-values) to say 511 bytes instead of 512 Co-authored-by: Louis Dureuil <louis@meilisearch.com>	2024-12-11 15:41:05 +00:00
Kerollmops	04a24a9239	Kill Meilisearch with a TERM signal	2024-12-11 16:27:07 +01:00
Tamo	1f54dfa883	update the macro to look more like an enum	2024-12-11 16:26:09 +01:00
Tamo	786b0fabea	implement the progress for almost all the tasks	2024-12-11 16:26:08 +01:00
Tamo	26733c705d	add progress for the task deletion and task cancelation	2024-12-11 16:25:02 +01:00
Tamo	ab75f53efd	update all snapshots	2024-12-11 16:25:02 +01:00
Tamo	867e6a8f1d	rename the send_progress field to progress since it s not sending anything	2024-12-11 16:25:01 +01:00
Tamo	6f4823fc97	make the number of document in the document tasks more incremental	2024-12-11 16:25:01 +01:00
Tamo	df9b68f8ed	inital implementation of the progress	2024-12-11 16:25:01 +01:00
meili-bors[bot]	5bc6391700	Merge #5153 5153: Return docid in case of errors while rendering the document template r=Kerollmops a=dureuill Improves error message: Before: ``` ERROR index_scheduler: Batch failed Index `mieli`: user error: missing field in document: liquid: Unknown index with: variable=doc requested index=title available indexes=by, id, kids, parent, text, time, type ``` After: ``` ERROR index_scheduler: Batch failed Index `mieli`: user error: missing field in document `11345147`: liquid: Unknown index with: variable=doc requested index=title available indexes=by, id, kids, parent, text, time, type ``` Co-authored-by: Louis Dureuil <louis@meilisearch.com>	2024-12-11 15:01:40 +00:00
Kerollmops	eaa897d983	Avoid compiling when unecessary	2024-12-11 15:57:16 +01:00
Louis Dureuil	bfca54cc2c	Return docid in case of errors while rendering the document template	2024-12-11 15:26:18 +01:00
Kerollmops	04a62d2b97	Compile Meilisearch or run the dedicated binary file	2024-12-11 14:57:07 +01:00
meili-bors[bot]	8c19cb0a0b	Merge #5146 Some checks failed Test suite / Tests on ${{ matrix.os }} (macos-13) (push) Waiting to run Test suite / Tests almost all features (push) Has been skipped Test suite / Test disabled tokenization (push) Has been skipped Test suite / Tests on ubuntu-20.04 (push) Failing after 19s Test suite / Run tests in debug (push) Failing after 14s Test suite / Tests on ${{ matrix.os }} (windows-2022) (push) Failing after 23s Test suite / Run Rustfmt (push) Successful in 4m52s Test suite / Run Clippy (push) Failing after 8m9s 5146: Offline upgrade v1.12 r=irevoire a=ManyTheFish # Pull Request ## Related issue Fixes #4978 ## What does this PR do? - add v1_11_to_v1_12 function to upgrade Meilisearch from v1.11 to v1.12 - Convert the update files from OBKV to ndjson format Co-authored-by: ManyTheFish <many@meilisearch.com> Co-authored-by: Many the fish <many@meilisearch.com>	2024-12-11 13:39:14 +00:00
Many the fish	5c492031d9	Update crates/meilitool/src/upgrade/v1_12.rs Co-authored-by: Louis Dureuil <louis@meilisearch.com>	2024-12-11 14:34:18 +01:00
meili-bors[bot]	fb1caa4724	Merge #5148 5148: Do not duplicate NDJson data when unecessary r=dureuill a=Kerollmops This PR improves the NDJSON support. Usually, we save all of the user's document content into a temporary file, validate its content, and then convert everything into NDJSON in the file store (update files in the tasks). It is a waste of time when users are already sending NDJSON. So, this PR removes the last copy and directly stores the user content in the file store, validating it from the file store. If an issue arises, the file will not persist and will be dropped/deleted instead. Related to #5078. Co-authored-by: Kerollmops <clement@meilisearch.com>	2024-12-11 13:00:50 +00:00
Kerollmops	5622b9607d	Wrap the read NDJSON pass into a tokio blocking	2024-12-11 12:18:36 +01:00
Kerollmops	01bcc601be	Use a nonrandom hasher when decoding JSON	2024-12-11 12:04:29 +01:00
Kerollmops	93fbdc06d3	Use a nonrandom hasher when decoding NDJSON	2024-12-11 12:03:09 +01:00
Kerollmops	69c931334f	Fix the error messages categorization with invalid NDJson	2024-12-11 12:02:48 +01:00
Kerollmops	d683f5980c	Do not duplicate NDJson when unecessary	2024-12-11 12:02:48 +01:00
meili-bors[bot]	f8ba112f66	Merge #5150 5150: Reintroduce the Document Addition Logs r=dureuill a=Kerollmops This PR reintroduces lost tracing logs showing some information about the number of indexed documents. Related to #5078. Resolves [this comment](https://github.com/meilisearch/meilisearch/pull/4900/files?show-deleted-files=true&show-viewed-files=true&file-filters%5B%5D=#r1852158338) and [this other one](https://github.com/meilisearch/meilisearch/pull/4900/files?show-deleted-files=true&show-viewed-files=true&file-filters%5B%5D=#r1852159073). Co-authored-by: Kerollmops <clement@meilisearch.com>	2024-12-11 10:48:48 +00:00
ManyTheFish	c614d0dd35	Add context when returning an error	2024-12-11 10:55:39 +01:00
ManyTheFish	479607e5dd	Convert update files from OBKV to ndjson	2024-12-11 10:55:39 +01:00
Kerollmops	bb00e70087	Reintroduce the document addition logs	2024-12-11 10:39:04 +01:00
meili-bors[bot]	e974be9518	Merge #5145 Some checks failed Test suite / Tests on ${{ matrix.os }} (macos-13) (push) Waiting to run Test suite / Tests almost all features (push) Has been skipped Test suite / Test disabled tokenization (push) Has been skipped Test suite / Tests on ubuntu-20.04 (push) Failing after 11s Test suite / Run tests in debug (push) Failing after 9s Test suite / Tests on ${{ matrix.os }} (windows-2022) (push) Failing after 22s Test suite / Run Rustfmt (push) Successful in 1m18s Test suite / Run Clippy (push) Successful in 5m32s 5145: Use bumparaw-collections in Meilisearch/milli r=dureuill a=Kerollmops This PR is related to #5078. It uses the now published bumparaw-collections and (soon) makes the `RawMap` hasher nonrandom. Co-authored-by: Kerollmops <clement@meilisearch.com>	2024-12-10 15:51:01 +00:00
Kerollmops	aeb6b74725	Make sure we use an FxHashBuilder on the Value	2024-12-10 15:52:22 +01:00
Kerollmops	a751972c57	Prefer using a stable than a random hash builder	2024-12-10 14:25:53 +01:00
Kerollmops	6b269795d2	Update bumparaw-collections to 0.1.2	2024-12-10 14:25:13 +01:00
Louis Dureuil	d075be798a	Fix tests	2024-12-10 13:39:07 +01:00
Kerollmops	89637bcaaf	Use bumparaw-collections in Meilisearch/milli	2024-12-10 11:52:20 +01:00
Louis Dureuil	866ac91be3	Fix error messages	2024-12-10 11:06:58 +01:00
Louis Dureuil	e610af36aa	User failure for documents with docid of ==512 bytes	2024-12-10 11:06:24 +01:00
Louis Dureuil	7cf6707ed3	Extend test to add the ==512 bytes case	2024-12-10 11:05:42 +01:00
meili-bors[bot]	1995040846	Merge #5142 Some checks failed Test suite / Tests on ${{ matrix.os }} (macos-13) (push) Waiting to run Test suite / Tests almost all features (push) Has been skipped Test suite / Test disabled tokenization (push) Has been skipped Test suite / Tests on ubuntu-20.04 (push) Failing after 10s Test suite / Run tests in debug (push) Failing after 11s Test suite / Tests on ${{ matrix.os }} (windows-2022) (push) Failing after 22s Test suite / Run Rustfmt (push) Successful in 1m19s Test suite / Run Clippy (push) Successful in 5m49s 5142: Try merge optimisation r=dureuill a=ManyTheFish ![Capture_decran_2024-12-09_a_11 59 42](https://github.com/user-attachments/assets/0dfc7e30-a603-4546-98d2-791990bdfcce) Co-authored-by: ManyTheFish <many@meilisearch.com>	2024-12-09 14:48:26 +00:00