Merge branch 'main' into release-v1.14.0-tmp

2025-10-27 05:56:26 +00:00 · 2025-04-14 12:35:47 +02:00
parent 94b43001db c3c5a928e4
commit b025f1bcf1
113 changed files with 1268 additions and 852 deletions
--- a/crates/milli/src/update/index_documents/enrich.rs
+++ b/crates/milli/src/update/index_documents/enrich.rs
@@ -115,7 +115,7 @@ pub fn enrich_documents_batch<R: Read + Seek>(

        if let Some(geo_value) = geo_field_id.and_then(|fid| document.get(fid)) {
            if let Err(user_error) = validate_geo_from_json(&document_id, geo_value)? {
-                return Ok(Err(UserError::from(user_error)));
+                return Ok(Err(UserError::from(Box::new(user_error))));
            }
        }

--- a/crates/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs
+++ b/crates/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs
@@ -160,11 +160,11 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
                    let del_geo_support = settings_diff
                        .old
                        .geo_fields_ids
-                        .map_or(false, |(lat, lng)| field_id == lat || field_id == lng);
+                        .is_some_and(|(lat, lng)| field_id == lat || field_id == lng);
                    let add_geo_support = settings_diff
                        .new
                        .geo_fields_ids
-                        .map_or(false, |(lat, lng)| field_id == lat || field_id == lng);
+                        .is_some_and(|(lat, lng)| field_id == lat || field_id == lng);
                    let del_filterable_values =
                        del_value.map(|value| extract_facet_values(&value, del_geo_support));
                    let add_filterable_values =
--- a/crates/milli/src/update/index_documents/extract/extract_geo_points.rs
+++ b/crates/milli/src/update/index_documents/extract/extract_geo_points.rs
@@ -80,22 +80,28 @@ fn extract_lat_lng(
            let (lat, lng) = match (lat, lng) {
                (Some(lat), Some(lng)) => (lat, lng),
                (Some(_), None) => {
-                    return Err(GeoError::MissingLatitude { document_id: document_id() }.into())
+                    return Err(
+                        Box::new(GeoError::MissingLatitude { document_id: document_id() }).into()
+                    )
                }
                (None, Some(_)) => {
-                    return Err(GeoError::MissingLongitude { document_id: document_id() }.into())
+                    return Err(
+                        Box::new(GeoError::MissingLongitude { document_id: document_id() }).into()
+                    )
                }
                (None, None) => return Ok(None),
            };
            let lat = extract_finite_float_from_value(
                serde_json::from_slice(lat).map_err(InternalError::SerdeJson)?,
            )
-            .map_err(|lat| GeoError::BadLatitude { document_id: document_id(), value: lat })?;
+            .map_err(|lat| GeoError::BadLatitude { document_id: document_id(), value: lat })
+            .map_err(Box::new)?;

            let lng = extract_finite_float_from_value(
                serde_json::from_slice(lng).map_err(InternalError::SerdeJson)?,
            )
-            .map_err(|lng| GeoError::BadLongitude { document_id: document_id(), value: lng })?;
+            .map_err(|lng| GeoError::BadLongitude { document_id: document_id(), value: lng })
+            .map_err(Box::new)?;
            Ok(Some([lat, lng]))
        }
        None => Ok(None),
--- a/crates/milli/src/update/index_documents/extract/extract_word_pair_proximity_docids.rs
+++ b/crates/milli/src/update/index_documents/extract/extract_word_pair_proximity_docids.rs
@@ -69,7 +69,7 @@ pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
        let document_id = u32::from_be_bytes(document_id_bytes);

        // if we change document, we fill the sorter
-        if current_document_id.map_or(false, |id| id != document_id) {
+        if current_document_id.is_some_and(|id| id != document_id) {
            // FIXME: span inside of a hot loop might degrade performance and create big reports
            let span = tracing::trace_span!(target: "indexing::details", "document_into_sorter");
            let _entered = span.enter();
@@ -96,7 +96,7 @@ pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
                if let Some(deletion) = KvReaderDelAdd::from_slice(value).get(DelAdd::Deletion) {
                    for (position, word) in KvReaderU16::from_slice(deletion).iter() {
                        // drain the proximity window until the head word is considered close to the word we are inserting.
-                        while del_word_positions.front().map_or(false, |(_w, p)| {
+                        while del_word_positions.front().is_some_and(|(_w, p)| {
                            index_proximity(*p as u32, position as u32) >= MAX_DISTANCE
                        }) {
                            word_positions_into_word_pair_proximity(
@@ -129,7 +129,7 @@ pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
                if let Some(addition) = KvReaderDelAdd::from_slice(value).get(DelAdd::Addition) {
                    for (position, word) in KvReaderU16::from_slice(addition).iter() {
                        // drain the proximity window until the head word is considered close to the word we are inserting.
-                        while add_word_positions.front().map_or(false, |(_w, p)| {
+                        while add_word_positions.front().is_some_and(|(_w, p)| {
                            index_proximity(*p as u32, position as u32) >= MAX_DISTANCE
                        }) {
                            word_positions_into_word_pair_proximity(
--- a/crates/milli/src/update/index_documents/extract/extract_word_position_docids.rs
+++ b/crates/milli/src/update/index_documents/extract/extract_word_position_docids.rs
@@ -46,7 +46,7 @@ pub fn extract_word_position_docids<R: io::Read + io::Seek>(
            .ok_or(SerializationError::Decoding { db_name: Some(DOCID_WORD_POSITIONS) })?;
        let document_id = DocumentId::from_be_bytes(document_id_bytes);

-        if current_document_id.map_or(false, |id| document_id != id) {
+        if current_document_id.is_some_and(|id| document_id != id) {
            words_position_into_sorter(
                current_document_id.unwrap(),
                &mut key_buffer,
--- a/crates/milli/src/update/index_documents/extract/mod.rs
+++ b/crates/milli/src/update/index_documents/extract/mod.rs
@@ -281,7 +281,7 @@ fn send_original_documents_data(
                        };
                        if !(remove_vectors.is_empty()
                            && manual_vectors.is_empty()
-                            && embeddings.as_ref().map_or(true, |e| e.is_empty()))
+                            && embeddings.as_ref().is_none_or(|e| e.is_empty()))
                        {
                            let _ = lmdb_writer_sx.send(Ok(TypedChunk::VectorPoints {
                                remove_vectors,
--- a/crates/milli/src/update/index_documents/mod.rs
+++ b/crates/milli/src/update/index_documents/mod.rs
@@ -514,12 +514,9 @@ where
                InternalError::DatabaseMissingEntry { db_name: "embedder_category_id", key: None },
            )?;
            let embedder_config = settings_diff.embedding_config_updates.get(&embedder_name);
-            let was_quantized = settings_diff
-                .old
-                .embedding_configs
-                .get(&embedder_name)
-                .map_or(false, |conf| conf.2);
-            let is_quantizing = embedder_config.map_or(false, |action| action.is_being_quantized);
+            let was_quantized =
+                settings_diff.old.embedding_configs.get(&embedder_name).is_some_and(|conf| conf.2);
+            let is_quantizing = embedder_config.is_some_and(|action| action.is_being_quantized);

            pool.install(|| {
                let mut writer = ArroyWrapper::new(vector_arroy, embedder_index, was_quantized);
--- a/crates/milli/src/update/index_documents/transform.rs
+++ b/crates/milli/src/update/index_documents/transform.rs
@@ -197,7 +197,7 @@ impl<'a, 'i> Transform<'a, 'i> {
            // drop_and_reuse is called instead of .clear() to communicate to the compiler that field_buffer
            // does not keep references from the cursor between loop iterations
            let mut field_buffer_cache = drop_and_reuse(field_buffer);
-            if self.indexer_settings.log_every_n.map_or(false, |len| documents_count % len == 0) {
+            if self.indexer_settings.log_every_n.is_some_and(|len| documents_count % len == 0) {
                progress_callback(UpdateIndexingStep::RemapDocumentAddition {
                    documents_seen: documents_count,
                });
--- a/crates/milli/src/update/index_documents/typed_chunk.rs
+++ b/crates/milli/src/update/index_documents/typed_chunk.rs
@@ -55,7 +55,7 @@ impl ChunkAccumulator {
        match self
            .inner
            .iter()
-            .position(|right| right.first().map_or(false, |right| chunk.mergeable_with(right)))
+            .position(|right| right.first().is_some_and(|right| chunk.mergeable_with(right)))
        {
            Some(position) => {
                let v = self.inner.get_mut(position).unwrap();
@@ -664,11 +664,8 @@ pub(crate) fn write_typed_chunk_into_index(
            let embedder_index = index.embedder_category_id.get(wtxn, &embedder_name)?.ok_or(
                InternalError::DatabaseMissingEntry { db_name: "embedder_category_id", key: None },
            )?;
-            let binary_quantized = settings_diff
-                .old
-                .embedding_configs
-                .get(&embedder_name)
-                .map_or(false, |conf| conf.2);
+            let binary_quantized =
+                settings_diff.old.embedding_configs.get(&embedder_name).is_some_and(|conf| conf.2);
            // FIXME: allow customizing distance
            let writer = ArroyWrapper::new(index.vector_arroy, embedder_index, binary_quantized);