From afc164a2715224931e1a4fbc2bb90a3fe1930836 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Thu, 17 Jul 2025 16:09:11 +0200 Subject: [PATCH] Fix in old indexer --- .../extract/extract_vector_points.rs | 23 +++++++++++-------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/crates/milli/src/update/index_documents/extract/extract_vector_points.rs b/crates/milli/src/update/index_documents/extract/extract_vector_points.rs index 064cfd154..a1dfa1aad 100644 --- a/crates/milli/src/update/index_documents/extract/extract_vector_points.rs +++ b/crates/milli/src/update/index_documents/extract/extract_vector_points.rs @@ -23,7 +23,7 @@ use crate::progress::EmbedderStats; use crate::prompt::Prompt; use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd}; use crate::update::settings::InnerIndexSettingsDiff; -use crate::vector::db::{EmbedderInfo, EmbeddingStatus, EmbeddingStatusDelta}; +use crate::vector::db::{EmbedderInfo, EmbeddingStatusDelta}; use crate::vector::error::{EmbedErrorKind, PossibleEmbeddingMistakes, UnusedVectorsDistribution}; use crate::vector::extractor::{Extractor, ExtractorDiff, RequestFragmentExtractor}; use crate::vector::parsed_vectors::{ParsedVectorsDiff, VectorState}; @@ -441,6 +441,8 @@ pub fn extract_vector_points( { let embedder_is_manual = matches!(*runtime.embedder, Embedder::UserProvided(_)); + let (old_is_user_provided, old_must_regenerate) = + embedder_info.embedding_status.is_user_provided_must_regenerate(docid); let (old, new) = parsed_vectors.remove(embedder_name); let new_must_regenerate = new.must_regenerate(); let delta = match action { @@ -499,16 +501,19 @@ pub fn extract_vector_points( let is_adding_fragments = has_fragments && !old_has_fragments; - if is_adding_fragments { + if !has_fragments { + // removing fragments + regenerate_prompt(obkv, &runtime.document_template, new_fields_ids_map)? + } else if is_adding_fragments || + // regenerate all fragments when going from user provided to ! user provided + old_is_user_provided + { regenerate_all_fragments( runtime.fragments(), &doc_alloc, new_fields_ids_map, obkv, ) - } else if !has_fragments { - // removing fragments - regenerate_prompt(obkv, &runtime.document_template, new_fields_ids_map)? } else { let mut fragment_diff = Vec::new(); let new_fields_ids_map = new_fields_ids_map.as_fields_ids_map(); @@ -600,7 +605,8 @@ pub fn extract_vector_points( docid, &delta, new_must_regenerate, - &embedder_info.embedding_status, + old_is_user_provided, + old_must_regenerate, ); // and we finally push the unique vectors into the writer @@ -657,10 +663,9 @@ fn push_embedding_status_delta( docid: DocumentId, delta: &VectorStateDelta, new_must_regenerate: bool, - embedding_status: &EmbeddingStatus, + old_is_user_provided: bool, + old_must_regenerate: bool, ) { - let (old_is_user_provided, old_must_regenerate) = - embedding_status.is_user_provided_must_regenerate(docid); let new_is_user_provided = match delta { VectorStateDelta::NoChange => old_is_user_provided, VectorStateDelta::NowRemoved => {