Regenerate embeddings more often:

- When `regenerate` was previously `false` and became `true`
- When rendering the old version of the docs failed
This commit is contained in:
Louis Dureuil 2025-06-12 15:41:53 +02:00
parent 933e319364
commit 396d76046d
No known key found for this signature in database

View File

@ -141,17 +141,31 @@ impl<'extractor> Extractor<'extractor> for EmbeddingExtractor<'_, '_> {
context.new_fields_ids_map, context.new_fields_ids_map,
&context.doc_alloc, &context.doc_alloc,
)?; )?;
let old_rendered = prompt.render_document( let must_regenerate = if !old_vectors.regenerate {
update.external_document_id(), // we just enabled `regenerate`
update.merged( true
&context.rtxn, } else {
context.index, let old_rendered = prompt.render_document(
context.db_fields_ids_map, update.external_document_id(),
)?, update.merged(
context.new_fields_ids_map, &context.rtxn,
&context.doc_alloc, context.index,
)?; context.db_fields_ids_map,
if new_rendered != old_rendered { )?,
context.new_fields_ids_map,
&context.doc_alloc,
);
if let Ok(old_rendered) = old_rendered {
// must regenerate if the rendered changed
new_rendered != old_rendered
} else {
// cannot check previous rendered, better regenerate
true
}
};
if must_regenerate {
chunks.set_autogenerated( chunks.set_autogenerated(
update.docid(), update.docid(),
update.external_document_id(), update.external_document_id(),