Do not create too many rayon tasks when processing the settings

panic on serde json
Remove a log that would log too much
2025-12-10 06:35:43 +00:00 · 2025-01-29 17:02:06 +01:00 · 2025-01-29 10:13:02 +01:00 · 2025-01-28 21:31:01 +01:00 · 2025-01-28 21:31:01 +01:00 · 2025-01-28 21:31:01 +01:00
30 changed files with 475 additions and 232 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -496,7 +496,7 @@ source = "git+https://github.com/meilisearch/bbqueue#cbb87cc707b5af415ef203bdaf2

 [[package]]
 name = "benchmarks"
-version = "1.12.4"
+version = "1.12.7"
 dependencies = [
 "anyhow",
 "bumpalo",
@@ -689,7 +689,7 @@ dependencies = [

 [[package]]
 name = "build-info"
-version = "1.12.4"
+version = "1.12.7"
 dependencies = [
 "anyhow",
 "time",
@@ -1664,7 +1664,7 @@ dependencies = [

 [[package]]
 name = "dump"
-version = "1.12.4"
+version = "1.12.7"
 dependencies = [
 "anyhow",
 "big_s",
@@ -1876,7 +1876,7 @@ checksum = "486f806e73c5707928240ddc295403b1b93c96a02038563881c4a2fd84b81ac4"

 [[package]]
 name = "file-store"
-version = "1.12.4"
+version = "1.12.7"
 dependencies = [
 "tempfile",
 "thiserror",
@@ -1898,7 +1898,7 @@ dependencies = [

 [[package]]
 name = "filter-parser"
-version = "1.12.4"
+version = "1.12.7"
 dependencies = [
 "insta",
 "nom",
@@ -1918,7 +1918,7 @@ dependencies = [

 [[package]]
 name = "flatten-serde-json"
-version = "1.12.4"
+version = "1.12.7"
 dependencies = [
 "criterion",
 "serde_json",
@@ -2057,7 +2057,7 @@ dependencies = [

 [[package]]
 name = "fuzzers"
-version = "1.12.4"
+version = "1.12.7"
 dependencies = [
 "arbitrary",
 "bumpalo",
@@ -2624,7 +2624,7 @@ checksum = "206ca75c9c03ba3d4ace2460e57b189f39f43de612c2f85836e65c929701bb2d"

 [[package]]
 name = "index-scheduler"
-version = "1.12.4"
+version = "1.12.7"
 dependencies = [
 "anyhow",
 "arroy 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
@@ -2822,7 +2822,7 @@ dependencies = [

 [[package]]
 name = "json-depth-checker"
-version = "1.12.4"
+version = "1.12.7"
 dependencies = [
 "criterion",
 "serde_json",
@@ -3441,7 +3441,7 @@ checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771"

 [[package]]
 name = "meili-snap"
-version = "1.12.4"
+version = "1.12.7"
 dependencies = [
 "insta",
 "md5",
@@ -3450,7 +3450,7 @@ dependencies = [

 [[package]]
 name = "meilisearch"
-version = "1.12.4"
+version = "1.12.7"
 dependencies = [
 "actix-cors",
 "actix-http",
@@ -3540,7 +3540,7 @@ dependencies = [

 [[package]]
 name = "meilisearch-auth"
-version = "1.12.4"
+version = "1.12.7"
 dependencies = [
 "base64 0.22.1",
 "enum-iterator",
@@ -3559,7 +3559,7 @@ dependencies = [

 [[package]]
 name = "meilisearch-types"
-version = "1.12.4"
+version = "1.12.7"
 dependencies = [
 "actix-web",
 "anyhow",
@@ -3592,7 +3592,7 @@ dependencies = [

 [[package]]
 name = "meilitool"
-version = "1.12.4"
+version = "1.12.7"
 dependencies = [
 "anyhow",
 "arroy 0.5.0 (git+https://github.com/meilisearch/arroy/?tag=DO-NOT-DELETE-upgrade-v04-to-v05)",
@@ -3627,7 +3627,7 @@ dependencies = [

 [[package]]
 name = "milli"
-version = "1.12.4"
+version = "1.12.7"
 dependencies = [
 "allocator-api2",
 "arroy 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
@@ -4083,7 +4083,7 @@ checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e"

 [[package]]
 name = "permissive-json-pointer"
-version = "1.12.4"
+version = "1.12.7"
 dependencies = [
 "big_s",
 "serde_json",
@@ -6486,7 +6486,7 @@ dependencies = [

 [[package]]
 name = "xtask"
-version = "1.12.4"
+version = "1.12.7"
 dependencies = [
 "anyhow",
 "build-info",
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -22,7 +22,7 @@ members = [
 ]

 [workspace.package]
-version = "1.12.4"
+version = "1.12.7"
 authors = [
    "Quentin de Quelen <quentin@dequelen.me>",
    "Clément Renault <clement@meilisearch.com>",
--- a/crates/index-scheduler/src/batch.rs
+++ b/crates/index-scheduler/src/batch.rs
@@ -29,7 +29,7 @@ use bumpalo::Bump;
 use dump::IndexMetadata;
 use meilisearch_types::batches::BatchId;
 use meilisearch_types::heed::{RoTxn, RwTxn};
-use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader, PrimaryKey};
+use meilisearch_types::milli::documents::PrimaryKey;
 use meilisearch_types::milli::heed::CompactionOption;
 use meilisearch_types::milli::progress::Progress;
 use meilisearch_types::milli::update::new::indexer::{self, UpdateByFunction};
@@ -819,6 +819,13 @@ impl IndexScheduler {
                        t.started_at = Some(started_at);
                        t.finished_at = Some(finished_at);
                    }
+
+                    // Patch the task to remove the batch uid, because as of v1.12.5 batches are not persisted.
+                    // This prevent from referencing *future* batches not actually associated with the task.
+                    //
+                    // See <https://github.com/meilisearch/meilisearch/issues/5247> for details.
+                    t.batch_uid = None;
+
                    let mut dump_content_file = dump_tasks.push_task(&t.into())?;

                    // 2.1. Dump the `content_file` associated with the task if there is one and the task is not finished yet.
@@ -829,21 +836,20 @@ impl IndexScheduler {
                        if status == Status::Enqueued {
                            let content_file = self.file_store.get_update(content_file)?;

-                            let reader = DocumentsBatchReader::from_reader(content_file)
-                                .map_err(|e| Error::from_milli(e.into(), None))?;
-
-                            let (mut cursor, documents_batch_index) =
-                                reader.into_cursor_and_fields_index();
-
-                            while let Some(doc) = cursor
-                                .next_document()
-                                .map_err(|e| Error::from_milli(e.into(), None))?
+                            for document in
+                                serde_json::de::Deserializer::from_reader(content_file).into_iter()
                            {
-                                dump_content_file.push_document(
-                                    &obkv_to_object(doc, &documents_batch_index)
-                                        .map_err(|e| Error::from_milli(e, None))?,
-                                )?;
+                                let document = document
+                                    .map_err(|e| {
+                                        Error::from_milli(
+                                            milli::InternalError::SerdeJson(e).into(),
+                                            None,
+                                        )
+                                    })
+                                    .unwrap();
+                                dump_content_file.push_document(&document)?;
                            }
+
                            dump_content_file.flush()?;
                        }
                    }
--- a/crates/index-scheduler/src/index_mapper/index_map.rs
+++ b/crates/index-scheduler/src/index_mapper/index_map.rs
@@ -1,5 +1,7 @@
 use std::collections::BTreeMap;
+use std::env::VarError;
 use std::path::Path;
+use std::str::FromStr;
 use std::time::Duration;

 use meilisearch_types::heed::{EnvClosingEvent, EnvFlags, EnvOpenOptions};
@@ -302,7 +304,15 @@ fn create_or_open_index(
 ) -> Result<Index> {
    let mut options = EnvOpenOptions::new();
    options.map_size(clamp_to_page_size(map_size));
-    options.max_readers(1024);
+
+    let max_readers = match std::env::var("MEILI_EXPERIMENTAL_INDEX_MAX_READERS") {
+        Ok(value) => u32::from_str(&value).unwrap(),
+        Err(VarError::NotPresent) => 1024,
+        Err(VarError::NotUnicode(value)) => panic!(
+            "Invalid unicode for the `MEILI_EXPERIMENTAL_INDEX_MAX_READERS` env var: {value:?}"
+        ),
+    };
+    options.max_readers(max_readers);
    if enable_mdb_writemap {
        unsafe { options.flags(EnvFlags::WRITE_MAP) };
    }
--- a/crates/index-scheduler/src/lib.rs
+++ b/crates/index-scheduler/src/lib.rs
@@ -55,7 +55,6 @@ use meilisearch_types::features::{InstanceTogglableFeatures, RuntimeTogglableFea
 use meilisearch_types::heed::byteorder::BE;
 use meilisearch_types::heed::types::{SerdeBincode, SerdeJson, Str, I128};
 use meilisearch_types::heed::{self, Database, Env, PutFlags, RoTxn, RwTxn};
-use meilisearch_types::milli::documents::DocumentsBatchBuilder;
 use meilisearch_types::milli::index::IndexEmbeddingConfig;
 use meilisearch_types::milli::update::IndexerConfig;
 use meilisearch_types::milli::vector::{Embedder, EmbedderOptions, EmbeddingConfigs};
@@ -2017,14 +2016,21 @@ impl<'a> Dump<'a> {
        task: TaskDump,
        content_file: Option<Box<UpdateFile>>,
    ) -> Result<Task> {
+        let task_has_no_docs = matches!(task.kind, KindDump::DocumentImport { documents_count, .. } if documents_count == 0);
+
        let content_uuid = match content_file {
            Some(content_file) if task.status == Status::Enqueued => {
-                let (uuid, mut file) = self.index_scheduler.create_update_file(false)?;
-                let mut builder = DocumentsBatchBuilder::new(&mut file);
+                let (uuid, file) = self.index_scheduler.create_update_file(false)?;
+                let mut writer = io::BufWriter::new(file);
                for doc in content_file {
-                    builder.append_json_object(&doc?)?;
+                    let doc = doc?;
+                    serde_json::to_writer(&mut writer, &doc)
+                        .map_err(|e| {
+                            Error::from_milli(milli::InternalError::SerdeJson(e).into(), None)
+                        })
+                        .unwrap();
                }
-                builder.into_inner()?;
+                let file = writer.into_inner().map_err(|e| e.into_error())?;
                file.persist()?;

                Some(uuid)
@@ -2032,6 +2038,12 @@ impl<'a> Dump<'a> {
            // If the task isn't `Enqueued` then just generate a recognisable `Uuid`
            // in case we try to open it later.
            _ if task.status != Status::Enqueued => Some(Uuid::nil()),
+            None if task.status == Status::Enqueued && task_has_no_docs => {
+                let (uuid, file) = self.index_scheduler.create_update_file(false)?;
+                file.persist()?;
+
+                Some(uuid)
+            }
            _ => None,
        };

--- a/crates/meilisearch/src/search/mod.rs
+++ b/crates/meilisearch/src/search/mod.rs
@@ -1337,7 +1337,7 @@ impl<'a> HitMaker<'a> {
                    ExplicitVectors { embeddings: Some(vector.into()), regenerate: !user_provided };
                vectors.insert(
                    name,
-                    serde_json::to_value(embeddings).map_err(InternalError::SerdeJson)?,
+                    serde_json::to_value(embeddings).map_err(InternalError::SerdeJson).unwrap(),
                );
            }
            document.insert("_vectors".into(), vectors.into());
@@ -1717,7 +1717,7 @@ fn make_document(

    // recreate the original json
    for (key, value) in obkv.iter() {
-        let value = serde_json::from_slice(value).map_err(InternalError::SerdeJson)?;
+        let value = serde_json::from_slice(value).map_err(InternalError::SerdeJson).unwrap();
        let key = field_ids_map.name(key).expect("Missing field name").to_string();

        document.insert(key, value);
--- a/crates/meilisearch/tests/search/mod.rs
+++ b/crates/meilisearch/tests/search/mod.rs
@@ -1746,3 +1746,57 @@ async fn change_attributes_settings() {
        )
        .await;
 }
+
+/// Modifying facets with different casing should work correctly
+#[actix_rt::test]
+async fn change_facet_casing() {
+    let server = Server::new().await;
+    let index = server.index("test");
+
+    let (response, code) = index
+        .update_settings(json!({
+            "filterableAttributes": ["dog"],
+        }))
+        .await;
+    assert_eq!("202", code.as_str(), "{:?}", response);
+    index.wait_task(response.uid()).await;
+
+    let (response, _code) = index
+        .add_documents(
+            json!([
+                {
+                    "id": 1,
+                    "dog": "Bouvier Bernois"
+                }
+            ]),
+            None,
+        )
+        .await;
+    index.wait_task(response.uid()).await;
+
+    let (response, _code) = index
+        .add_documents(
+            json!([
+                {
+                    "id": 1,
+                    "dog": "bouvier bernois"
+                }
+            ]),
+            None,
+        )
+        .await;
+    index.wait_task(response.uid()).await;
+
+    index
+        .search(json!({ "facets": ["dog"] }), |response, code| {
+            meili_snap::snapshot!(code, @"200 OK");
+            meili_snap::snapshot!(meili_snap::json_string!(response["facetDistribution"]), @r###"
+            {
+              "dog": {
+                "bouvier bernois": 1
+              }
+            }
+            "###);
+        })
+        .await;
+}
--- a/crates/meilitool/src/main.rs
+++ b/crates/meilitool/src/main.rs
@@ -88,7 +88,7 @@ fn main() -> anyhow::Result<()> {
    match command {
        Command::ClearTaskQueue => clear_task_queue(db_path),
        Command::ExportADump { dump_dir, skip_enqueued_tasks } => {
-            export_a_dump(db_path, dump_dir, skip_enqueued_tasks)
+            export_a_dump(db_path, dump_dir, skip_enqueued_tasks, detected_version)
        }
        Command::OfflineUpgrade { target_version } => {
            let target_version = parse_version(&target_version).context("While parsing `--target-version`. Make sure `--target-version` is in the format MAJOR.MINOR.PATCH")?;
@@ -187,6 +187,7 @@ fn export_a_dump(
    db_path: PathBuf,
    dump_dir: PathBuf,
    skip_enqueued_tasks: bool,
+    detected_version: (String, String, String),
 ) -> Result<(), anyhow::Error> {
    let started_at = OffsetDateTime::now_utc();

@@ -238,9 +239,6 @@ fn export_a_dump(
    if skip_enqueued_tasks {
        eprintln!("Skip dumping the enqueued tasks...");
    } else {
-        eprintln!("Dumping the enqueued tasks...");
-
-        // 3. dump the tasks
        let mut dump_tasks = dump.create_tasks_queue()?;
        let mut count = 0;
        for ret in all_tasks.iter(&rtxn)? {
@@ -254,18 +252,39 @@ fn export_a_dump(
                if status == Status::Enqueued {
                    let content_file = file_store.get_update(content_file_uuid)?;

-                    let reader =
-                        DocumentsBatchReader::from_reader(content_file).with_context(|| {
-                            format!("While reading content file {:?}", content_file_uuid)
-                        })?;
-
-                    let (mut cursor, documents_batch_index) = reader.into_cursor_and_fields_index();
-                    while let Some(doc) = cursor.next_document().with_context(|| {
-                        format!("While iterating on content file {:?}", content_file_uuid)
-                    })? {
-                        dump_content_file
-                            .push_document(&obkv_to_object(doc, &documents_batch_index)?)?;
+                    if (
+                        detected_version.0.as_str(),
+                        detected_version.1.as_str(),
+                        detected_version.2.as_str(),
+                    ) < ("1", "12", "0")
+                    {
+                        eprintln!("Dumping the enqueued tasks reading them in obkv format...");
+                        let reader =
+                            DocumentsBatchReader::from_reader(content_file).with_context(|| {
+                                format!("While reading content file {:?}", content_file_uuid)
+                            })?;
+                        let (mut cursor, documents_batch_index) =
+                            reader.into_cursor_and_fields_index();
+                        while let Some(doc) = cursor.next_document().with_context(|| {
+                            format!("While iterating on content file {:?}", content_file_uuid)
+                        })? {
+                            dump_content_file
+                                .push_document(&obkv_to_object(doc, &documents_batch_index)?)?;
+                        }
+                    } else {
+                        eprintln!(
+                            "Dumping the enqueued tasks reading them in JSON stream format..."
+                        );
+                        for document in
+                            serde_json::de::Deserializer::from_reader(content_file).into_iter()
+                        {
+                            let document = document.with_context(|| {
+                                format!("While reading content file {:?}", content_file_uuid)
+                            })?;
+                            dump_content_file.push_document(&document)?;
+                        }
                    }
+
                    dump_content_file.flush()?;
                    count += 1;
                }
--- a/crates/meilitool/src/upgrade/mod.rs
+++ b/crates/meilitool/src/upgrade/mod.rs
@@ -20,6 +20,34 @@ pub struct OfflineUpgrade {

 impl OfflineUpgrade {
    pub fn upgrade(self) -> anyhow::Result<()> {
+        // Adding a version?
+        //
+        // 1. Update the LAST_SUPPORTED_UPGRADE_FROM_VERSION and LAST_SUPPORTED_UPGRADE_TO_VERSION.
+        // 2. Add new version to the upgrade list if necessary
+        // 3. Use `no_upgrade` as index for versions that are compatible.
+
+        if self.current_version == self.target_version {
+            println!("Database is already at the target version. Exiting.");
+            return Ok(());
+        }
+
+        if self.current_version > self.target_version {
+            bail!(
+                "Cannot downgrade from {}.{}.{} to {}.{}.{}. Downgrade not supported",
+                self.current_version.0,
+                self.current_version.1,
+                self.current_version.2,
+                self.target_version.0,
+                self.target_version.1,
+                self.target_version.2
+            );
+        }
+
+        const FIRST_SUPPORTED_UPGRADE_FROM_VERSION: &str = "1.9.0";
+        const LAST_SUPPORTED_UPGRADE_FROM_VERSION: &str = "1.12.7";
+        const FIRST_SUPPORTED_UPGRADE_TO_VERSION: &str = "1.10.0";
+        const LAST_SUPPORTED_UPGRADE_TO_VERSION: &str = "1.12.7";
+
        let upgrade_list = [
            (
                v1_9_to_v1_10 as fn(&Path, &str, &str, &str) -> Result<(), anyhow::Error>,
@@ -32,6 +60,8 @@ impl OfflineUpgrade {
            (v1_12_to_v1_12_3, "1", "12", "3"),
        ];

+        let no_upgrade: usize = upgrade_list.len();
+
        let (current_major, current_minor, current_patch) = &self.current_version;

        let start_at = match (
@@ -42,9 +72,12 @@ impl OfflineUpgrade {
            ("1", "9", _) => 0,
            ("1", "10", _) => 1,
            ("1", "11", _) => 2,
-            ("1", "12", x) if x == "0" || x == "1" || x == "2" => 3,
+            ("1", "12", "0" | "1" | "2") => 3,
+            ("1", "12", "3" | "4" | "5" | "6" | "7") => no_upgrade,
            _ => {
-                bail!("Unsupported current version {current_major}.{current_minor}.{current_patch}. Can only upgrade from v1.9 and v1.10")
+                bail!("Unsupported current version {current_major}.{current_minor}.{current_patch}. Can only upgrade from versions in range [{}-{}]",
+                      FIRST_SUPPORTED_UPGRADE_FROM_VERSION,
+                      LAST_SUPPORTED_UPGRADE_FROM_VERSION);
            }
        };

@@ -53,18 +86,28 @@ impl OfflineUpgrade {
        let ends_at = match (target_major.as_str(), target_minor.as_str(), target_patch.as_str()) {
            ("1", "10", _) => 0,
            ("1", "11", _) => 1,
-            ("1", "12", x) if x == "0" || x == "1" || x == "2" => 2,
-            ("1", "12", "3") => 3,
+            ("1", "12", "0" | "1" | "2") => 2,
+            ("1", "12", "3" | "4" | "5" | "6" | "7") => 3,
            (major, _, _) if major.starts_with('v') => {
                bail!("Target version must not starts with a `v`. Instead of writing `v1.9.0` write `1.9.0` for example.")
            }
            _ => {
-                bail!("Unsupported target version {target_major}.{target_minor}.{target_patch}. Can only upgrade to v1.10 and v1.11")
+                bail!("Unsupported target version {target_major}.{target_minor}.{target_patch}. Can only upgrade to versions in range [{}-{}]",
+                      FIRST_SUPPORTED_UPGRADE_TO_VERSION,
+                      LAST_SUPPORTED_UPGRADE_TO_VERSION);
            }
        };

        println!("Starting the upgrade from {current_major}.{current_minor}.{current_patch} to {target_major}.{target_minor}.{target_patch}");

+        if start_at == no_upgrade {
+            println!("No upgrade operation to perform, writing VERSION file");
+            create_version_file(&self.db_path, target_major, target_minor, target_patch)
+                .context("while writing VERSION file after the upgrade")?;
+            println!("Success");
+            return Ok(());
+        }
+
        #[allow(clippy::needless_range_loop)]
        for index in start_at..=ends_at {
            let (func, major, minor, patch) = upgrade_list[index];
--- a/crates/milli/src/documents/mod.rs
+++ b/crates/milli/src/documents/mod.rs
@@ -33,7 +33,7 @@ pub fn obkv_to_object(obkv: &KvReader<FieldId>, index: &DocumentsBatchIndex) ->
            let field_name = index
                .name(field_id)
                .ok_or(FieldIdMapMissingEntry::FieldId { field_id, process: "obkv_to_object" })?;
-            let value = serde_json::from_slice(value).map_err(InternalError::SerdeJson)?;
+            let value = serde_json::from_slice(value).map_err(InternalError::SerdeJson).unwrap();
            Ok((field_name.to_string(), value))
        })
        .collect()
@@ -84,7 +84,8 @@ impl DocumentsBatchIndex {
            let key =
                self.0.get_by_left(&k).ok_or(crate::error::InternalError::DatabaseClosing)?.clone();
            let value = serde_json::from_slice::<serde_json::Value>(v)
-                .map_err(crate::error::InternalError::SerdeJson)?;
+                .map_err(crate::error::InternalError::SerdeJson)
+                .unwrap();
            map.insert(key, value);
        }

--- a/crates/milli/src/documents/primary_key.rs
+++ b/crates/milli/src/documents/primary_key.rs
@@ -92,7 +92,8 @@ impl<'a> PrimaryKey<'a> {
            PrimaryKey::Flat { name: _, field_id } => match document.get(*field_id) {
                Some(document_id_bytes) => {
                    let document_id = serde_json::from_slice(document_id_bytes)
-                        .map_err(InternalError::SerdeJson)?;
+                        .map_err(InternalError::SerdeJson)
+                        .unwrap();
                    match validate_document_id_value(document_id) {
                        Ok(document_id) => Ok(Ok(document_id)),
                        Err(user_error) => {
@@ -108,7 +109,8 @@ impl<'a> PrimaryKey<'a> {
                    if let Some(field_id) = fields.id(first_level_name) {
                        if let Some(value_bytes) = document.get(field_id) {
                            let object = serde_json::from_slice(value_bytes)
-                                .map_err(InternalError::SerdeJson)?;
+                                .map_err(InternalError::SerdeJson)
+                                .unwrap();
                            fetch_matching_values(object, right, &mut matching_documents_ids);

                            if matching_documents_ids.len() >= 2 {
@@ -151,11 +153,12 @@ impl<'a> PrimaryKey<'a> {
                };

                let document_id: &RawValue =
-                    serde_json::from_slice(document_id).map_err(InternalError::SerdeJson)?;
+                    serde_json::from_slice(document_id).map_err(InternalError::SerdeJson).unwrap();

                let document_id = document_id
                    .deserialize_any(crate::update::new::indexer::de::DocumentIdVisitor(indexer))
-                    .map_err(InternalError::SerdeJson)?;
+                    .map_err(InternalError::SerdeJson)
+                    .unwrap();

                let external_document_id = match document_id {
                    Ok(document_id) => Ok(document_id),
@@ -173,7 +176,7 @@ impl<'a> PrimaryKey<'a> {

                    let Some(value) = document.get(fid) else { continue };
                    let value: &RawValue =
-                        serde_json::from_slice(value).map_err(InternalError::SerdeJson)?;
+                        serde_json::from_slice(value).map_err(InternalError::SerdeJson).unwrap();
                    match match_component(first_level, right, value, indexer, &mut docid) {
                        ControlFlow::Continue(()) => continue,
                        ControlFlow::Break(Ok(_)) => {
@@ -183,7 +186,7 @@ impl<'a> PrimaryKey<'a> {
                            .into())
                        }
                        ControlFlow::Break(Err(err)) => {
-                            return Err(InternalError::SerdeJson(err).into())
+                            panic!("{err}");
                        }
                    }
                }
--- a/crates/milli/src/lib.rs
+++ b/crates/milli/src/lib.rs
@@ -228,7 +228,8 @@ pub fn obkv_to_json(
                field_id: id,
                process: "obkv_to_json",
            })?;
-            let value = serde_json::from_slice(value).map_err(error::InternalError::SerdeJson)?;
+            let value =
+                serde_json::from_slice(value).map_err(error::InternalError::SerdeJson).unwrap();
            Ok((name.to_owned(), value))
        })
        .collect()
--- a/crates/milli/src/search/facet/facet_distribution.rs
+++ b/crates/milli/src/search/facet/facet_distribution.rs
@@ -219,12 +219,19 @@ impl<'a> FacetDistribution<'a> {
                let facet_key = StrRefCodec::bytes_decode(facet_key).unwrap();

                let key: (FieldId, _, &str) = (field_id, any_docid, facet_key);
-                let original_string = self
-                    .index
-                    .field_id_docid_facet_strings
-                    .get(self.rtxn, &key)?
-                    .unwrap()
-                    .to_owned();
+                let optional_original_string =
+                    self.index.field_id_docid_facet_strings.get(self.rtxn, &key)?;
+
+                let original_string = match optional_original_string {
+                    Some(original_string) => original_string.to_owned(),
+                    None => {
+                        tracing::error!(
+                            "Missing original facet string. Using the normalized facet {} instead",
+                            facet_key
+                        );
+                        facet_key.to_string()
+                    }
+                };

                distribution.insert(original_string, nbr_docids);
                if distribution.len() == self.max_values_per_facet {
--- a/crates/milli/src/thread_pool_no_abort.rs
+++ b/crates/milli/src/thread_pool_no_abort.rs
@@ -1,4 +1,4 @@
-use std::sync::atomic::{AtomicBool, Ordering};
+use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
 use std::sync::Arc;

 use rayon::{ThreadPool, ThreadPoolBuilder};
@@ -9,6 +9,8 @@ use thiserror::Error;
 #[derive(Debug)]
 pub struct ThreadPoolNoAbort {
    thread_pool: ThreadPool,
+    /// The number of active operations.
+    active_operations: AtomicUsize,
    /// Set to true if the thread pool catched a panic.
    pool_catched_panic: Arc<AtomicBool>,
 }
@@ -19,7 +21,9 @@ impl ThreadPoolNoAbort {
        OP: FnOnce() -> R + Send,
        R: Send,
    {
+        self.active_operations.fetch_add(1, Ordering::Relaxed);
        let output = self.thread_pool.install(op);
+        self.active_operations.fetch_sub(1, Ordering::Relaxed);
        // While reseting the pool panic catcher we return an error if we catched one.
        if self.pool_catched_panic.swap(false, Ordering::SeqCst) {
            Err(PanicCatched)
@@ -31,6 +35,11 @@ impl ThreadPoolNoAbort {
    pub fn current_num_threads(&self) -> usize {
        self.thread_pool.current_num_threads()
    }
+
+    /// The number of active operations.
+    pub fn active_operations(&self) -> usize {
+        self.active_operations.load(Ordering::Relaxed)
+    }
 }

 #[derive(Error, Debug)]
@@ -64,6 +73,10 @@ impl ThreadPoolNoAbortBuilder {
            let catched_panic = pool_catched_panic.clone();
            move |_result| catched_panic.store(true, Ordering::SeqCst)
        });
-        Ok(ThreadPoolNoAbort { thread_pool: self.0.build()?, pool_catched_panic })
+        Ok(ThreadPoolNoAbort {
+            thread_pool: self.0.build()?,
+            active_operations: AtomicUsize::new(0),
+            pool_catched_panic,
+        })
    }
 }
--- a/crates/milli/src/update/index_documents/enrich.rs
+++ b/crates/milli/src/update/index_documents/enrich.rs
@@ -123,7 +123,8 @@ pub fn enrich_documents_batch<R: Read + Seek>(
            }
        }

-        let document_id = serde_json::to_vec(&document_id).map_err(InternalError::SerdeJson)?;
+        let document_id =
+            serde_json::to_vec(&document_id).map_err(InternalError::SerdeJson).unwrap();
        external_ids.insert(count.to_be_bytes(), document_id)?;

        count += 1;
@@ -237,7 +238,7 @@ pub fn validate_geo_from_json(id: &DocumentId, bytes: &[u8]) -> Result<StdResult
    let debug_id = || {
        serde_json::from_slice(id.value().as_bytes()).unwrap_or_else(|_| Value::from(id.debug()))
    };
-    match serde_json::from_slice(bytes).map_err(InternalError::SerdeJson)? {
+    match serde_json::from_slice(bytes).map_err(InternalError::SerdeJson).unwrap() {
        Value::Object(mut object) => match (object.remove("lat"), object.remove("lng")) {
            (Some(lat), Some(lng)) => {
                match (extract_finite_float_from_value(lat), extract_finite_float_from_value(lng)) {
--- a/crates/milli/src/update/index_documents/extract/extract_docid_word_positions.rs
+++ b/crates/milli/src/update/index_documents/extract/extract_docid_word_positions.rs
@@ -206,7 +206,7 @@ fn tokens_from_document<'a>(
            if let Some(field_bytes) = KvReaderDelAdd::from_slice(field_bytes).get(del_add) {
                // parse json.
                let value =
-                    serde_json::from_slice(field_bytes).map_err(InternalError::SerdeJson)?;
+                    serde_json::from_slice(field_bytes).map_err(InternalError::SerdeJson).unwrap();

                // prepare writing destination.
                buffers.obkv_positions_buffer.clear();
--- a/crates/milli/src/update/new/channel.rs
+++ b/crates/milli/src/update/new/channel.rs
@@ -27,6 +27,12 @@ use crate::update::new::KvReaderFieldId;
 use crate::vector::Embedding;
 use crate::{CboRoaringBitmapCodec, DocumentId, Error, Index, InternalError};

+/// Note that the FrameProducer requires up to 9 bytes to
+/// encode the length, the max grant has been computed accordingly.
+///
+/// <https://docs.rs/bbqueue/latest/bbqueue/framed/index.html#frame-header>
+const MAX_FRAME_HEADER_SIZE: usize = 9;
+
 /// Creates a tuple of senders/receiver to be used by
 /// the extractors and the writer loop.
 ///
@@ -53,8 +59,9 @@ pub fn extractor_writer_bbqueue(
    bbbuffers.resize_with(current_num_threads, || BBBuffer::new(bbbuffer_capacity));

    let capacity = bbbuffers.first().unwrap().capacity();
-    // Read the field description to understand this
-    let capacity = capacity.checked_sub(9).unwrap();
+    // 1. Due to fragmentation in the bbbuffer, we can only accept up to half the capacity in a single message.
+    // 2. Read the documentation for `MAX_FRAME_HEADER_SIZE` for more information about why it is here.
+    let max_grant = capacity.saturating_div(2).checked_sub(MAX_FRAME_HEADER_SIZE).unwrap();

    let producers = ThreadLocal::with_capacity(bbbuffers.len());
    let consumers = rayon::broadcast(|bi| {
@@ -65,7 +72,7 @@ pub fn extractor_writer_bbqueue(
    });

    let (sender, receiver) = flume::bounded(channel_capacity);
-    let sender = ExtractorBbqueueSender { sender, producers, capacity };
+    let sender = ExtractorBbqueueSender { sender, producers, max_grant };
    let receiver = WriterBbqueueReceiver {
        receiver,
        look_at_consumer: (0..consumers.len()).cycle(),
@@ -81,13 +88,10 @@ pub struct ExtractorBbqueueSender<'a> {
    /// A memory buffer, one by thread, is used to serialize
    /// the entries directly in this shared, lock-free space.
    producers: ThreadLocal<FullySend<RefCell<FrameProducer<'a>>>>,
-    /// The capacity of this frame producer, will never be able to store more than that.
-    ///
-    /// Note that the FrameProducer requires up to 9 bytes to encode the length,
-    /// the capacity has been shrunk accordingly.
-    ///
-    /// <https://docs.rs/bbqueue/latest/bbqueue/framed/index.html#frame-header>
-    capacity: usize,
+    /// The maximum frame grant that a producer can reserve.
+    /// It will never be able to store more than that as the
+    /// buffer cannot split data into two parts.
+    max_grant: usize,
 }

 pub struct WriterBbqueueReceiver<'a> {
@@ -443,14 +447,14 @@ impl<'b> ExtractorBbqueueSender<'b> {
    }

    fn delete_vector(&self, docid: DocumentId) -> crate::Result<()> {
-        let capacity = self.capacity;
+        let max_grant = self.max_grant;
        let refcell = self.producers.get().unwrap();
        let mut producer = refcell.0.borrow_mut_or_yield();

        let payload_header = EntryHeader::ArroyDeleteVector(ArroyDeleteVector { docid });
        let total_length = EntryHeader::total_delete_vector_size();
-        if total_length > capacity {
-            panic!("The entry is larger ({total_length} bytes) than the BBQueue capacity ({capacity} bytes)");
+        if total_length > max_grant {
+            panic!("The entry is larger ({total_length} bytes) than the BBQueue max grant ({max_grant} bytes)");
        }

        // Spin loop to have a frame the size we requested.
@@ -468,7 +472,7 @@ impl<'b> ExtractorBbqueueSender<'b> {
        embedder_id: u8,
        embeddings: &[Vec<f32>],
    ) -> crate::Result<()> {
-        let capacity = self.capacity;
+        let max_grant = self.max_grant;
        let refcell = self.producers.get().unwrap();
        let mut producer = refcell.0.borrow_mut_or_yield();

@@ -479,7 +483,7 @@ impl<'b> ExtractorBbqueueSender<'b> {
        let arroy_set_vector = ArroySetVectors { docid, embedder_id, _padding: [0; 3] };
        let payload_header = EntryHeader::ArroySetVectors(arroy_set_vector);
        let total_length = EntryHeader::total_set_vectors_size(embeddings.len(), dimensions);
-        if total_length > capacity {
+        if total_length > max_grant {
            let mut value_file = tempfile::tempfile().map(BufWriter::new)?;
            for embedding in embeddings {
                let mut embedding_bytes = bytemuck::cast_slice(embedding);
@@ -540,14 +544,14 @@ impl<'b> ExtractorBbqueueSender<'b> {
    where
        F: FnOnce(&mut [u8], &mut [u8]) -> crate::Result<()>,
    {
-        let capacity = self.capacity;
+        let max_grant = self.max_grant;
        let refcell = self.producers.get().unwrap();
        let mut producer = refcell.0.borrow_mut_or_yield();

        let operation = DbOperation { database, key_length: Some(key_length) };
        let payload_header = EntryHeader::DbOperation(operation);
        let total_length = EntryHeader::total_key_value_size(key_length, value_length);
-        if total_length > capacity {
+        if total_length > max_grant {
            let mut key_buffer = vec![0; key_length.get() as usize].into_boxed_slice();
            let value_file = tempfile::tempfile()?;
            value_file.set_len(value_length.try_into().unwrap())?;
@@ -601,7 +605,7 @@ impl<'b> ExtractorBbqueueSender<'b> {
    where
        F: FnOnce(&mut [u8]) -> crate::Result<()>,
    {
-        let capacity = self.capacity;
+        let max_grant = self.max_grant;
        let refcell = self.producers.get().unwrap();
        let mut producer = refcell.0.borrow_mut_or_yield();

@@ -610,8 +614,8 @@ impl<'b> ExtractorBbqueueSender<'b> {
        let operation = DbOperation { database, key_length: None };
        let payload_header = EntryHeader::DbOperation(operation);
        let total_length = EntryHeader::total_key_size(key_length);
-        if total_length > capacity {
-            panic!("The entry is larger ({total_length} bytes) than the BBQueue capacity ({capacity} bytes)");
+        if total_length > max_grant {
+            panic!("The entry is larger ({total_length} bytes) than the BBQueue max grant ({max_grant} bytes)");
        }

        // Spin loop to have a frame the size we requested.
--- a/crates/milli/src/update/new/document.rs
+++ b/crates/milli/src/update/new/document.rs
@@ -86,7 +86,7 @@ impl<'t, Mapper: FieldIdMapper> Document<'t> for DocumentFromDb<'t, Mapper> {

            let res = (|| {
                let value =
-                    serde_json::from_slice(value).map_err(crate::InternalError::SerdeJson)?;
+                    serde_json::from_slice(value).map_err(crate::InternalError::SerdeJson).unwrap();

                Ok((name, value))
            })();
@@ -139,7 +139,7 @@ impl<'t, Mapper: FieldIdMapper> DocumentFromDb<'t, Mapper> {
            return Ok(None);
        };
        let Some(value) = self.content.get(fid) else { return Ok(None) };
-        Ok(Some(serde_json::from_slice(value).map_err(InternalError::SerdeJson)?))
+        Ok(Some(serde_json::from_slice(value).map_err(InternalError::SerdeJson).unwrap()))
    }
 }

--- a/crates/milli/src/update/new/extract/faceted/extract_facets.rs
+++ b/crates/milli/src/update/new/extract/faceted/extract_facets.rs
@@ -283,42 +283,60 @@ impl FacetedDocidsExtractor {
 }

 struct DelAddFacetValue<'doc> {
-    strings: HashMap<(FieldId, BVec<'doc, u8>), DelAdd, hashbrown::DefaultHashBuilder, &'doc Bump>,
+    strings: HashMap<
+        (FieldId, &'doc str),
+        Option<BVec<'doc, u8>>,
+        hashbrown::DefaultHashBuilder,
+        &'doc Bump,
+    >,
    f64s: HashMap<(FieldId, BVec<'doc, u8>), DelAdd, hashbrown::DefaultHashBuilder, &'doc Bump>,
+    doc_alloc: &'doc Bump,
 }

 impl<'doc> DelAddFacetValue<'doc> {
    fn new(doc_alloc: &'doc Bump) -> Self {
-        Self { strings: HashMap::new_in(doc_alloc), f64s: HashMap::new_in(doc_alloc) }
+        Self { strings: HashMap::new_in(doc_alloc), f64s: HashMap::new_in(doc_alloc), doc_alloc }
    }

    fn insert_add(&mut self, fid: FieldId, value: BVec<'doc, u8>, kind: FacetKind) {
-        let cache = match kind {
-            FacetKind::String => &mut self.strings,
-            FacetKind::Number => &mut self.f64s,
-            _ => return,
-        };
-
-        let key = (fid, value);
-        if let Some(DelAdd::Deletion) = cache.get(&key) {
-            cache.remove(&key);
-        } else {
-            cache.insert(key, DelAdd::Addition);
+        match kind {
+            FacetKind::Number => {
+                let key = (fid, value);
+                if let Some(DelAdd::Deletion) = self.f64s.get(&key) {
+                    self.f64s.remove(&key);
+                } else {
+                    self.f64s.insert(key, DelAdd::Addition);
+                }
+            }
+            FacetKind::String => {
+                if let Ok(s) = std::str::from_utf8(&value) {
+                    let normalized = crate::normalize_facet(s);
+                    let truncated = self.doc_alloc.alloc_str(truncate_str(&normalized));
+                    self.strings.insert((fid, truncated), Some(value));
+                }
+            }
+            _ => (),
        }
    }

    fn insert_del(&mut self, fid: FieldId, value: BVec<'doc, u8>, kind: FacetKind) {
-        let cache = match kind {
-            FacetKind::String => &mut self.strings,
-            FacetKind::Number => &mut self.f64s,
-            _ => return,
-        };
-
-        let key = (fid, value);
-        if let Some(DelAdd::Addition) = cache.get(&key) {
-            cache.remove(&key);
-        } else {
-            cache.insert(key, DelAdd::Deletion);
+        match kind {
+            FacetKind::Number => {
+                let key = (fid, value);
+                if let Some(DelAdd::Addition) = self.f64s.get(&key) {
+                    self.f64s.remove(&key);
+                } else {
+                    self.f64s.insert(key, DelAdd::Deletion);
+                }
+            }
+            FacetKind::String => {
+                if let Ok(s) = std::str::from_utf8(&value) {
+                    let normalized = crate::normalize_facet(s);
+                    let truncated = self.doc_alloc.alloc_str(truncate_str(&normalized));
+                    self.strings.insert((fid, truncated), None);
+                }
+            }
+            _ => (),
        }
    }

@@ -329,18 +347,14 @@ impl<'doc> DelAddFacetValue<'doc> {
        doc_alloc: &Bump,
    ) -> crate::Result<()> {
        let mut buffer = bumpalo::collections::Vec::new_in(doc_alloc);
-        for ((fid, value), deladd) in self.strings {
-            if let Ok(s) = std::str::from_utf8(&value) {
-                buffer.clear();
-                buffer.extend_from_slice(&fid.to_be_bytes());
-                buffer.extend_from_slice(&docid.to_be_bytes());
-                let normalized = crate::normalize_facet(s);
-                let truncated = truncate_str(&normalized);
-                buffer.extend_from_slice(truncated.as_bytes());
-                match deladd {
-                    DelAdd::Deletion => sender.delete_facet_string(&buffer)?,
-                    DelAdd::Addition => sender.write_facet_string(&buffer, &value)?,
-                }
+        for ((fid, truncated), value) in self.strings {
+            buffer.clear();
+            buffer.extend_from_slice(&fid.to_be_bytes());
+            buffer.extend_from_slice(&docid.to_be_bytes());
+            buffer.extend_from_slice(truncated.as_bytes());
+            match &value {
+                Some(value) => sender.write_facet_string(&buffer, value)?,
+                None => sender.delete_facet_string(&buffer)?,
            }
        }

--- a/crates/milli/src/update/new/extract/faceted/facet_document.rs
+++ b/crates/milli/src/update/new/extract/faceted/facet_document.rs
@@ -27,7 +27,7 @@ pub fn extract_document_facets<'doc>(
        let selection = perm_json_p::select_field(field_name, Some(attributes_to_extract), &[]);
        if selection != perm_json_p::Selection::Skip {
            // parse json.
-            match serde_json::value::to_value(value).map_err(InternalError::SerdeJson)? {
+            match serde_json::value::to_value(value).map_err(InternalError::SerdeJson).unwrap() {
                Value::Object(object) => {
                    perm_json_p::seek_leaf_values_in_object(
                        &object,
--- a/crates/milli/src/update/new/extract/geo/mod.rs
+++ b/crates/milli/src/update/new/extract/geo/mod.rs
@@ -256,15 +256,16 @@ pub fn extract_geo_coordinates(
    external_id: &str,
    raw_value: &RawValue,
 ) -> Result<Option<[f64; 2]>> {
-    let mut geo = match serde_json::from_str(raw_value.get()).map_err(InternalError::SerdeJson)? {
-        Value::Null => return Ok(None),
-        Value::Object(map) => map,
-        value => {
-            return Err(
-                GeoError::NotAnObject { document_id: Value::from(external_id), value }.into()
-            )
-        }
-    };
+    let mut geo =
+        match serde_json::from_str(raw_value.get()).map_err(InternalError::SerdeJson).unwrap() {
+            Value::Null => return Ok(None),
+            Value::Object(map) => map,
+            value => {
+                return Err(
+                    GeoError::NotAnObject { document_id: Value::from(external_id), value }.into()
+                )
+            }
+        };

    let [lat, lng] = match (geo.remove("lat"), geo.remove("lng")) {
        (Some(lat), Some(lng)) => {
--- a/crates/milli/src/update/new/extract/searchable/tokenize_document.rs
+++ b/crates/milli/src/update/new/extract/searchable/tokenize_document.rs
@@ -94,7 +94,7 @@ impl<'a> DocumentTokenizer<'a> {
            };

            // parse json.
-            match serde_json::to_value(value).map_err(InternalError::SerdeJson)? {
+            match serde_json::to_value(value).map_err(InternalError::SerdeJson).unwrap() {
                Value::Object(object) => seek_leaf_values_in_object(
                    &object,
                    None,
--- a/crates/milli/src/update/new/indexer/document_operation.rs
+++ b/crates/milli/src/update/new/indexer/document_operation.rs
@@ -158,7 +158,7 @@ fn extract_addition_payload_changes<'r, 'pl: 'r>(

    let mut previous_offset = 0;
    let mut iter = Deserializer::from_slice(payload).into_iter::<&RawValue>();
-    while let Some(doc) = iter.next().transpose().map_err(InternalError::SerdeJson)? {
+    while let Some(doc) = iter.next().transpose().map_err(InternalError::SerdeJson).unwrap() {
        *bytes = previous_offset as u64;

        // Only guess the primary key if it is the first document
--- a/crates/milli/src/update/new/indexer/mod.rs
+++ b/crates/milli/src/update/new/indexer/mod.rs
@@ -93,17 +93,25 @@ where
        ..grenad_parameters
    };

-    // We compute and remove the allocated BBQueues buffers capacity from the indexing memory.
-    let minimum_capacity = 50 * 1024 * 1024 * pool.current_num_threads(); // 50 MiB
+    // 5% percent of the allocated memory for the extractors, or min 100MiB
+    // 5% percent of the allocated memory for the bbqueues, or min 50MiB
+    //
+    // Minimum capacity for bbqueues
+    let minimum_total_bbbuffer_capacity = 50 * 1024 * 1024 * pool.current_num_threads(); // 50 MiB
+    let minimum_total_extractors_capacity = minimum_total_bbbuffer_capacity * 2;
+
    let (grenad_parameters, total_bbbuffer_capacity) = grenad_parameters.max_memory.map_or(
-        (grenad_parameters, 2 * minimum_capacity), // 100 MiB by thread by default
+        (
+            GrenadParameters {
+                max_memory: Some(minimum_total_extractors_capacity),
+                ..grenad_parameters
+            },
+            minimum_total_bbbuffer_capacity,
+        ), // 100 MiB by thread by default
        |max_memory| {
-            // 2% of the indexing memory
-            let total_bbbuffer_capacity = (max_memory / 100 / 2).max(minimum_capacity);
+            let total_bbbuffer_capacity = max_memory.max(minimum_total_bbbuffer_capacity);
            let new_grenad_parameters = GrenadParameters {
-                max_memory: Some(
-                    max_memory.saturating_sub(total_bbbuffer_capacity).max(100 * 1024 * 1024),
-                ),
+                max_memory: Some(max_memory.max(minimum_total_extractors_capacity)),
                ..grenad_parameters
            };
            (new_grenad_parameters, total_bbbuffer_capacity)
--- a/crates/milli/src/update/new/indexer/partial_dump.rs
+++ b/crates/milli/src/update/new/indexer/partial_dump.rs
@@ -78,7 +78,8 @@ where
        let external_document_id = external_document_id.to_de();

        let document = RawMap::from_raw_value_and_hasher(document, FxBuildHasher, doc_alloc)
-            .map_err(InternalError::SerdeJson)?;
+            .map_err(InternalError::SerdeJson)
+            .unwrap();

        let insertion = Insertion::create(docid, external_document_id, Versions::single(document));
        Ok(Some(DocumentChange::Insertion(insertion)))
--- a/crates/milli/src/update/new/indexer/update_by_function.rs
+++ b/crates/milli/src/update/new/indexer/update_by_function.rs
@@ -58,9 +58,9 @@ impl UpdateByFunction {

        let ast = engine.compile(code).map_err(UserError::DocumentEditionCompilationError)?;
        let context = match context {
-            Some(context) => {
-                Some(serde_json::from_value(context.into()).map_err(InternalError::SerdeJson)?)
-            }
+            Some(context) => Some(
+                serde_json::from_value(context.into()).map_err(InternalError::SerdeJson).unwrap(),
+            ),
            None => None,
        };

@@ -137,9 +137,11 @@ impl<'index> DocumentChanges<'index> for UpdateByFunctionChanges<'index> {
                Some(new_rhai_document) => {
                    let mut buffer = bumpalo::collections::Vec::new_in(doc_alloc);
                    serde_json::to_writer(&mut buffer, &new_rhai_document)
-                        .map_err(InternalError::SerdeJson)?;
+                        .map_err(InternalError::SerdeJson)
+                        .unwrap();
                    let raw_new_doc = serde_json::from_slice(buffer.into_bump_slice())
-                        .map_err(InternalError::SerdeJson)?;
+                        .map_err(InternalError::SerdeJson)
+                        .unwrap();

                    // Note: This condition is not perfect. Sometimes it detect changes
                    //       like with floating points numbers and consider updating
@@ -166,7 +168,8 @@ impl<'index> DocumentChanges<'index> for UpdateByFunctionChanges<'index> {
                                FxBuildHasher,
                                doc_alloc,
                            )
-                            .map_err(InternalError::SerdeJson)?;
+                            .map_err(InternalError::SerdeJson)
+                            .unwrap();

                            Ok(Some(DocumentChange::Update(Update::create(
                                docid,
@@ -200,7 +203,7 @@ fn obkv_to_rhaimap(obkv: &KvReaderFieldId, fields_ids_map: &FieldsIdsMap) -> Res
                field_id: id,
                process: "all_obkv_to_rhaimap",
            })?;
-            let value = serde_json::from_slice(value).map_err(InternalError::SerdeJson)?;
+            let value = serde_json::from_slice(value).map_err(InternalError::SerdeJson).unwrap();
            Ok((name.into(), value))
        })
        .collect();
--- a/crates/milli/src/update/new/vector_document.rs
+++ b/crates/milli/src/update/new/vector_document.rs
@@ -105,7 +105,8 @@ impl<'t> VectorDocumentFromDb<'t> {
        let vectors_field = match vectors {
            Some(vectors) => Some(
                RawMap::from_raw_value_and_hasher(vectors, FxBuildHasher, doc_alloc)
-                    .map_err(InternalError::SerdeJson)?,
+                    .map_err(InternalError::SerdeJson)
+                    .unwrap(),
            ),
            None => None,
        };
--- a/crates/milli/src/vector/ollama.rs
+++ b/crates/milli/src/vector/ollama.rs
@@ -5,7 +5,7 @@ use rayon::slice::ParallelSlice as _;

 use super::error::{EmbedError, EmbedErrorKind, NewEmbedderError, NewEmbedderErrorKind};
 use super::rest::{Embedder as RestEmbedder, EmbedderOptions as RestEmbedderOptions};
-use super::DistributionShift;
+use super::{DistributionShift, REQUEST_PARALLELISM};
 use crate::error::FaultSource;
 use crate::vector::Embedding;
 use crate::ThreadPoolNoAbort;
@@ -98,14 +98,18 @@ impl Embedder {
        text_chunks: Vec<Vec<String>>,
        threads: &ThreadPoolNoAbort,
    ) -> Result<Vec<Vec<Embedding>>, EmbedError> {
-        threads
-            .install(move || {
-                text_chunks.into_par_iter().map(move |chunk| self.embed(&chunk, None)).collect()
-            })
-            .map_err(|error| EmbedError {
-                kind: EmbedErrorKind::PanicInThreadPool(error),
-                fault: FaultSource::Bug,
-            })?
+        if threads.active_operations() >= REQUEST_PARALLELISM {
+            text_chunks.into_iter().map(move |chunk| self.embed(&chunk, None)).collect()
+        } else {
+            threads
+                .install(move || {
+                    text_chunks.into_par_iter().map(move |chunk| self.embed(&chunk, None)).collect()
+                })
+                .map_err(|error| EmbedError {
+                    kind: EmbedErrorKind::PanicInThreadPool(error),
+                    fault: FaultSource::Bug,
+                })?
+        }
    }

    pub(crate) fn embed_chunks_ref(
@@ -113,20 +117,30 @@ impl Embedder {
        texts: &[&str],
        threads: &ThreadPoolNoAbort,
    ) -> Result<Vec<Vec<f32>>, EmbedError> {
-        threads
-            .install(move || {
-                let embeddings: Result<Vec<Vec<Embedding>>, _> = texts
-                    .par_chunks(self.prompt_count_in_chunk_hint())
-                    .map(move |chunk| self.embed(chunk, None))
-                    .collect();
+        if threads.active_operations() >= REQUEST_PARALLELISM {
+            let embeddings: Result<Vec<Vec<Embedding>>, _> = texts
+                .chunks(self.prompt_count_in_chunk_hint())
+                .map(move |chunk| self.embed(chunk, None))
+                .collect();

-                let embeddings = embeddings?;
-                Ok(embeddings.into_iter().flatten().collect())
-            })
-            .map_err(|error| EmbedError {
-                kind: EmbedErrorKind::PanicInThreadPool(error),
-                fault: FaultSource::Bug,
-            })?
+            let embeddings = embeddings?;
+            Ok(embeddings.into_iter().flatten().collect())
+        } else {
+            threads
+                .install(move || {
+                    let embeddings: Result<Vec<Vec<Embedding>>, _> = texts
+                        .par_chunks(self.prompt_count_in_chunk_hint())
+                        .map(move |chunk| self.embed(chunk, None))
+                        .collect();
+
+                    let embeddings = embeddings?;
+                    Ok(embeddings.into_iter().flatten().collect())
+                })
+                .map_err(|error| EmbedError {
+                    kind: EmbedErrorKind::PanicInThreadPool(error),
+                    fault: FaultSource::Bug,
+                })?
+        }
    }

    pub fn chunk_count_hint(&self) -> usize {
--- a/crates/milli/src/vector/openai.rs
+++ b/crates/milli/src/vector/openai.rs
@@ -6,7 +6,7 @@ use rayon::slice::ParallelSlice as _;

 use super::error::{EmbedError, NewEmbedderError};
 use super::rest::{Embedder as RestEmbedder, EmbedderOptions as RestEmbedderOptions};
-use super::DistributionShift;
+use super::{DistributionShift, REQUEST_PARALLELISM};
 use crate::error::FaultSource;
 use crate::vector::error::EmbedErrorKind;
 use crate::vector::Embedding;
@@ -255,14 +255,18 @@ impl Embedder {
        text_chunks: Vec<Vec<String>>,
        threads: &ThreadPoolNoAbort,
    ) -> Result<Vec<Vec<Embedding>>, EmbedError> {
-        threads
-            .install(move || {
-                text_chunks.into_par_iter().map(move |chunk| self.embed(&chunk, None)).collect()
-            })
-            .map_err(|error| EmbedError {
-                kind: EmbedErrorKind::PanicInThreadPool(error),
-                fault: FaultSource::Bug,
-            })?
+        if threads.active_operations() >= REQUEST_PARALLELISM {
+            text_chunks.into_iter().map(move |chunk| self.embed(&chunk, None)).collect()
+        } else {
+            threads
+                .install(move || {
+                    text_chunks.into_par_iter().map(move |chunk| self.embed(&chunk, None)).collect()
+                })
+                .map_err(|error| EmbedError {
+                    kind: EmbedErrorKind::PanicInThreadPool(error),
+                    fault: FaultSource::Bug,
+                })?
+        }
    }

    pub(crate) fn embed_chunks_ref(
@@ -270,20 +274,29 @@ impl Embedder {
        texts: &[&str],
        threads: &ThreadPoolNoAbort,
    ) -> Result<Vec<Vec<f32>>, EmbedError> {
-        threads
-            .install(move || {
-                let embeddings: Result<Vec<Vec<Embedding>>, _> = texts
-                    .par_chunks(self.prompt_count_in_chunk_hint())
-                    .map(move |chunk| self.embed(chunk, None))
-                    .collect();
+        if threads.active_operations() >= REQUEST_PARALLELISM {
+            let embeddings: Result<Vec<Vec<Embedding>>, _> = texts
+                .chunks(self.prompt_count_in_chunk_hint())
+                .map(move |chunk| self.embed(chunk, None))
+                .collect();
+            let embeddings = embeddings?;
+            Ok(embeddings.into_iter().flatten().collect())
+        } else {
+            threads
+                .install(move || {
+                    let embeddings: Result<Vec<Vec<Embedding>>, _> = texts
+                        .par_chunks(self.prompt_count_in_chunk_hint())
+                        .map(move |chunk| self.embed(chunk, None))
+                        .collect();

-                let embeddings = embeddings?;
-                Ok(embeddings.into_iter().flatten().collect())
-            })
-            .map_err(|error| EmbedError {
-                kind: EmbedErrorKind::PanicInThreadPool(error),
-                fault: FaultSource::Bug,
-            })?
+                    let embeddings = embeddings?;
+                    Ok(embeddings.into_iter().flatten().collect())
+                })
+                .map_err(|error| EmbedError {
+                    kind: EmbedErrorKind::PanicInThreadPool(error),
+                    fault: FaultSource::Bug,
+                })?
+        }
    }

    pub fn chunk_count_hint(&self) -> usize {
--- a/crates/milli/src/vector/rest.rs
+++ b/crates/milli/src/vector/rest.rs
@@ -188,14 +188,18 @@ impl Embedder {
        text_chunks: Vec<Vec<String>>,
        threads: &ThreadPoolNoAbort,
    ) -> Result<Vec<Vec<Embedding>>, EmbedError> {
-        threads
-            .install(move || {
-                text_chunks.into_par_iter().map(move |chunk| self.embed(chunk, None)).collect()
-            })
-            .map_err(|error| EmbedError {
-                kind: EmbedErrorKind::PanicInThreadPool(error),
-                fault: FaultSource::Bug,
-            })?
+        if threads.active_operations() >= REQUEST_PARALLELISM {
+            text_chunks.into_iter().map(move |chunk| self.embed(chunk, None)).collect()
+        } else {
+            threads
+                .install(move || {
+                    text_chunks.into_par_iter().map(move |chunk| self.embed(chunk, None)).collect()
+                })
+                .map_err(|error| EmbedError {
+                    kind: EmbedErrorKind::PanicInThreadPool(error),
+                    fault: FaultSource::Bug,
+                })?
+        }
    }

    pub(crate) fn embed_chunks_ref(
@@ -203,20 +207,30 @@ impl Embedder {
        texts: &[&str],
        threads: &ThreadPoolNoAbort,
    ) -> Result<Vec<Embedding>, EmbedError> {
-        threads
-            .install(move || {
-                let embeddings: Result<Vec<Vec<Embedding>>, _> = texts
-                    .par_chunks(self.prompt_count_in_chunk_hint())
-                    .map(move |chunk| self.embed_ref(chunk, None))
-                    .collect();
+        if threads.active_operations() >= REQUEST_PARALLELISM {
+            let embeddings: Result<Vec<Vec<Embedding>>, _> = texts
+                .chunks(self.prompt_count_in_chunk_hint())
+                .map(move |chunk| self.embed_ref(chunk, None))
+                .collect();

-                let embeddings = embeddings?;
-                Ok(embeddings.into_iter().flatten().collect())
-            })
-            .map_err(|error| EmbedError {
-                kind: EmbedErrorKind::PanicInThreadPool(error),
-                fault: FaultSource::Bug,
-            })?
+            let embeddings = embeddings?;
+            Ok(embeddings.into_iter().flatten().collect())
+        } else {
+            threads
+                .install(move || {
+                    let embeddings: Result<Vec<Vec<Embedding>>, _> = texts
+                        .par_chunks(self.prompt_count_in_chunk_hint())
+                        .map(move |chunk| self.embed_ref(chunk, None))
+                        .collect();
+
+                    let embeddings = embeddings?;
+                    Ok(embeddings.into_iter().flatten().collect())
+                })
+                .map_err(|error| EmbedError {
+                    kind: EmbedErrorKind::PanicInThreadPool(error),
+                    fault: FaultSource::Bug,
+                })?
+        }
    }

    pub fn chunk_count_hint(&self) -> usize {
Author	SHA1	Message	Date
Kerollmops	226bcb2717	Do not create too many rayon tasks when processing the settings	2025-01-29 17:02:06 +01:00
Louis Dureuil	cd58a71f57	panic on serde json	2025-01-29 10:13:02 +01:00
Kerollmops	e0f446e4d3	Remove a log that would log too much	2025-01-28 21:31:01 +01:00
Kerollmops	3bbad823e0	Refine the env variable and the max readers	2025-01-28 21:31:01 +01:00
Kerollmops	b605549bf2	Do not create too many rayon tasks	2025-01-28 21:31:01 +01:00
Kerollmops	6a1062edf5	Add more logs to see calls to the embedders	2025-01-28 21:31:01 +01:00
Kerollmops	426ea5aa97	Accept the max readers param by env var and increase it	2025-01-28 21:31:00 +01:00
meili-bors[bot]	e20b91210d	Merge #5276 5276: Fix the stuck indexation due to the internal BBQueue capacity r=curquiza a=Kerollmops Fixes https://github.com/meilisearch/meilisearch/issues/5277. Reduce the maximum reserve grant in the BBQueue so we are never stuck. Co-authored-by: Kerollmops <clement@meilisearch.com> Co-authored-by: Louis Dureuil <louis@meilisearch.com> Co-authored-by: Clément Renault <clement@meilisearch.com>	2025-01-23 13:41:34 +00:00
meili-bors[bot]	17478301ab	Merge #5278 Some checks failed Test suite / Tests almost all features (push) Has been skipped Test suite / Test disabled tokenization (push) Has been skipped Test suite / Tests on ubuntu-20.04 (push) Failing after 14s Test suite / Run tests in debug (push) Failing after 13s Test suite / Run Clippy (push) Failing after 14s Test suite / Tests on windows-2022 (push) Failing after 26s Test suite / Run Rustfmt (push) Successful in 1m43s Test suite / Tests on macos-13 (push) Has been cancelled 5278: Update version for the next release (v1.12.7) in Cargo.toml r=dureuill a=meili-bot ⚠️ This PR is automatically generated. Check the new version is the expected one and Cargo.lock has been updated before merging. Co-authored-by: dureuill <dureuill@users.noreply.github.com>	2025-01-23 10:47:30 +00:00
dureuill	968c9dff27	Update version for the next release (v1.12.7) in Cargo.toml	2025-01-23 10:17:23 +00:00
Louis Dureuil	463553988c	Support offline upgrade up to v1.12.7	2025-01-23 11:11:40 +01:00
Clément Renault	c321fdb9c0	Comment the max grant of the bbqueue Co-authored-by: Louis Dureuil <louis@meilisearch.com>	2025-01-23 11:09:20 +01:00
Louis Dureuil	36b6e94b29	Give more RAM to bbqueue. - bbqueue buffers used to have (5% * 2%) / num_threads - they now have 5% / num_threads	2025-01-23 10:55:03 +01:00
Kerollmops	34dea863e5	Reduce the maximum grant possible we can store in the BBQueue	2025-01-23 10:43:28 +01:00
meili-bors[bot]	ad9d8e10f2	Merge #5260 Some checks failed Test suite / Tests on ubuntu-20.04 (push) Failing after 2s Test suite / Tests almost all features (push) Has been skipped Test suite / Test disabled tokenization (push) Has been skipped Test suite / Run tests in debug (push) Failing after 1s Test suite / Tests on windows-2022 (push) Failing after 26s Test suite / Run Rustfmt (push) Successful in 1m57s Test suite / Run Clippy (push) Successful in 6m6s Test suite / Tests on macos-13 (push) Has been cancelled 5260: Update version for the next release (v1.12.6) in Cargo.toml r=Kerollmops a=meili-bot ⚠️ This PR is automatically generated. Check the new version is the expected one and Cargo.lock has been updated before merging. Co-authored-by: Kerollmops <Kerollmops@users.noreply.github.com>	2025-01-21 12:37:46 +00:00
Kerollmops	f7f35ef37c	Update version for the next release (v1.12.6) in Cargo.toml	2025-01-21 12:22:56 +00:00
meili-bors[bot]	c575d2693b	Merge #5258 5258: Unify facet strings by their normalized value r=ManyTheFish a=dureuill Fixes #5228: the "missing facet keys" issue. - Before this PR, updating a document such that `"facet": "DUREUILL"` would become `"facet": "dureuill"` could cause the normalized facet value `dureuill` to be removed from `field_id_docid_facet_strings` db. - This PR makes sure to unify the intermediate representation of the facet strings by their field_id and normalized (and truncated) string value. - The introduced test is testing only one of the two facet distribution algorithms. - We removed the panic when the facet string was not found, and we instead returned the normalized string. ## Draft status - [x] target release v1.12.6 branch and milestone - [ ] ~consider meilitool offline upgrade to fix the corrupted dbs in the wild.~ workaround: ~remove facets, then add them again... if your facet distribution is right.~ Just use a dump. - [x] Add unit test demonstrating the issue fixed by this PR. Co-authored-by: Louis Dureuil <louis@meilisearch.com> Co-authored-by: Kerollmops <clement@meilisearch.com>	2025-01-21 11:02:33 +00:00
Kerollmops	024e06f7e3	Do not panic when the facet string is not found	2025-01-21 12:01:26 +01:00
Kerollmops	145fa3a8ff	Add a test to check the facet casing is good	2025-01-21 11:42:25 +01:00
Louis Dureuil	d3a7e10348	Unify facet strings by their normalized value	2025-01-21 00:11:50 +01:00
meili-bors[bot]	1c78447226	Merge #5246 Some checks failed Test suite / Tests on ${{ matrix.os }} (macos-13) (push) Waiting to run Test suite / Tests on ubuntu-20.04 (push) Failing after 1s Test suite / Tests almost all features (push) Has been skipped Test suite / Test disabled tokenization (push) Has been skipped Test suite / Run tests in debug (push) Failing after 17s Test suite / Run Rustfmt (push) Failing after 16s Test suite / Tests on ${{ matrix.os }} (windows-2022) (push) Failing after 40s Test suite / Run Clippy (push) Failing after 1m24s 5246: Fix dump import r=Kerollmops a=dureuill - Fix: handle the change of format of the update files - Correctly handle update files as JSON stream rather than obkv when exporting a dump with enqueued tasks - Correctly recreate update files as JSON stream rather than obkv when importing a dump - As the dump format itself didn't change, all dumps are still compatible - Temporary workaround for https://github.com/meilisearch/meilisearch/issues/5247: set the batch uid of tasks to `null` at dump export time. - Changes to meilitool - Export dump with update files in new format if DB >= v1.12 - offline upgrade now supports upgrading from [1.9.0-1.12.5] to [1.10.0-1.12.5]. - offline upgrade supports no-op upgrades and has better error messages Co-authored-by: Louis Dureuil <louis@meilisearch.com> Co-authored-by: ManyTheFish <many@meilisearch.com>	2025-01-20 13:03:49 +00:00
Louis Dureuil	c55891f73b	Replace guards by OR patterns Co-authored-by: Tamo <tamo@meilisearch.com>	2025-01-20 11:46:03 +01:00
Louis Dureuil	40f8c0d840	Remove batch ids on export	2025-01-20 11:16:18 +01:00
Louis Dureuil	34d8c1a903	Make offline upgrade more flexible	2025-01-20 10:43:47 +01:00
Louis Dureuil	3c9483b6e0	meilitool dumps old-style dump for older DBs, otherwise new-style	2025-01-20 10:43:47 +01:00
meili-bors[bot]	8c789b3c7a	Merge #5252 5252: Update version for the next release (v1.12.5) in Cargo.toml r=dureuill a=meili-bot ⚠️ This PR is automatically generated. Check the new version is the expected one and Cargo.lock has been updated before merging. Co-authored-by: dureuill <dureuill@users.noreply.github.com>	2025-01-20 09:03:35 +00:00
dureuill	3403eae9ee	Update version for the next release (v1.12.5) in Cargo.toml	2025-01-20 08:53:20 +00:00
Louis Dureuil	11458eefd9	Handle empty payloads	2025-01-20 09:51:07 +01:00
Louis Dureuil	289eb92bef	Fix warnings	2025-01-20 09:51:07 +01:00
ManyTheFish	cea0c89212	Change format of update file when importing dump	2025-01-20 09:51:07 +01:00
Louis Dureuil	1cadab9ad8	Also fix dump import from meilitool	2025-01-20 09:51:07 +01:00
Louis Dureuil	6383f8f19e	Do not explode on missing content file if the task has no docs	2025-01-20 09:51:06 +01:00
Louis Dureuil	8a9f952bda	Create update files in new format	2025-01-20 09:51:06 +01:00