From 396d76046de8adff191410c01cf4dbfb6bd86efd Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Thu, 12 Jun 2025 15:41:53 +0200 Subject: [PATCH] Regenerate embeddings more often: - When `regenerate` was previously `false` and became `true` - When rendering the old version of the docs failed --- .../src/update/new/extract/vectors/mod.rs | 36 +++++++++++++------ 1 file changed, 25 insertions(+), 11 deletions(-) diff --git a/crates/milli/src/update/new/extract/vectors/mod.rs b/crates/milli/src/update/new/extract/vectors/mod.rs index 47bd622ae..0c727fa11 100644 --- a/crates/milli/src/update/new/extract/vectors/mod.rs +++ b/crates/milli/src/update/new/extract/vectors/mod.rs @@ -141,17 +141,31 @@ impl<'extractor> Extractor<'extractor> for EmbeddingExtractor<'_, '_> { context.new_fields_ids_map, &context.doc_alloc, )?; - let old_rendered = prompt.render_document( - update.external_document_id(), - update.merged( - &context.rtxn, - context.index, - context.db_fields_ids_map, - )?, - context.new_fields_ids_map, - &context.doc_alloc, - )?; - if new_rendered != old_rendered { + let must_regenerate = if !old_vectors.regenerate { + // we just enabled `regenerate` + true + } else { + let old_rendered = prompt.render_document( + update.external_document_id(), + update.merged( + &context.rtxn, + context.index, + context.db_fields_ids_map, + )?, + context.new_fields_ids_map, + &context.doc_alloc, + ); + + if let Ok(old_rendered) = old_rendered { + // must regenerate if the rendered changed + new_rendered != old_rendered + } else { + // cannot check previous rendered, better regenerate + true + } + }; + + if must_regenerate { chunks.set_autogenerated( update.docid(), update.external_document_id(),