Regenerate embeddings more often:

- When `regenerate` was previously `false` and became `true`
- When rendering the old version of the docs failed
This commit is contained in:
Louis Dureuil 2025-06-12 15:41:53 +02:00
parent 933e319364
commit 396d76046d
No known key found for this signature in database

View File

@ -141,17 +141,31 @@ impl<'extractor> Extractor<'extractor> for EmbeddingExtractor<'_, '_> {
context.new_fields_ids_map,
&context.doc_alloc,
)?;
let old_rendered = prompt.render_document(
update.external_document_id(),
update.merged(
&context.rtxn,
context.index,
context.db_fields_ids_map,
)?,
context.new_fields_ids_map,
&context.doc_alloc,
)?;
if new_rendered != old_rendered {
let must_regenerate = if !old_vectors.regenerate {
// we just enabled `regenerate`
true
} else {
let old_rendered = prompt.render_document(
update.external_document_id(),
update.merged(
&context.rtxn,
context.index,
context.db_fields_ids_map,
)?,
context.new_fields_ids_map,
&context.doc_alloc,
);
if let Ok(old_rendered) = old_rendered {
// must regenerate if the rendered changed
new_rendered != old_rendered
} else {
// cannot check previous rendered, better regenerate
true
}
};
if must_regenerate {
chunks.set_autogenerated(
update.docid(),
update.external_document_id(),