From bd686991b915570df467716529a52b3daa06fb42 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Sat, 7 Dec 2024 15:14:12 +0100 Subject: [PATCH] Take updates into account to distinguish between values in the DB that are unchanged and values that are deleted --- crates/milli/src/update/new/document.rs | 58 +++++++++------ .../milli/src/update/new/document_change.rs | 9 ++- .../new/extract/faceted/facet_document.rs | 72 +++++++++---------- 3 files changed, 80 insertions(+), 59 deletions(-) diff --git a/crates/milli/src/update/new/document.rs b/crates/milli/src/update/new/document.rs index 4442155c9..d25840204 100644 --- a/crates/milli/src/update/new/document.rs +++ b/crates/milli/src/update/new/document.rs @@ -202,6 +202,7 @@ pub struct MergedDocument<'a, 'doc, 't, Mapper: FieldIdMapper> { pub struct DeltaDocument<'a, 'doc, 't, Mapper: FieldIdMapper> { new_doc: DocumentFromVersions<'a, 'doc>, db_doc: Option>, + has_deletion: bool, } impl<'a, 'doc, 't, Mapper: FieldIdMapper> DeltaDocument<'a, 'doc, 't, Mapper> { @@ -211,17 +212,18 @@ impl<'a, 'doc, 't, Mapper: FieldIdMapper> DeltaDocument<'a, 'doc, 't, Mapper> { index: &'t Index, db_fields_ids_map: &'t Mapper, new_doc: DocumentFromVersions<'a, 'doc>, + has_deletion: bool, ) -> Result { let db_doc = DocumentFromDb::new(docid, rtxn, index, db_fields_ids_map)?; - Ok(Self { db_doc, new_doc }) + Ok(Self { db_doc, new_doc, has_deletion }) } - pub fn delta_top_level_fields<'d>( + pub fn delta_top_level_fields<'either>( &self, - ) -> impl Iterator)>> + '_ + ) -> impl Iterator)>> + '_ where - 't: 'd, - 'doc: 'd, + 't: 'either, + 'doc: 'either, { match &self.db_doc { // since we'll be returning all db top level fields, it makes more sense to iterate on the db first: @@ -232,6 +234,7 @@ impl<'a, 'doc, 't, Mapper: FieldIdMapper> DeltaDocument<'a, 'doc, 't, Mapper> { let mut db_iter = db_doc.iter_top_level_fields_with_fid(); let fid_map = db_doc.fields_ids_map; let mut seen_fields = RoaringBitmap::new(); + let has_deletion = self.has_deletion; Either::Left(std::iter::from_fn(move || { if let Some(entry) = db_iter.next() { @@ -247,12 +250,22 @@ impl<'a, 'doc, 't, Mapper: FieldIdMapper> DeltaDocument<'a, 'doc, 't, Mapper> { match new_value { Some(new_value) => { - return Some(Ok(( - name, - DeltaValue::CurrentAndUpdated(db_value, new_value), - ))) + if new_value.get() == db_value.get() { + return Some(Ok((name, DeltaValue::Unchanged(new_value)))); + } else { + return Some(Ok(( + name, + DeltaValue::Modified(db_value, new_value), + ))); + } + } + None => { + if has_deletion { + return Some(Ok((name, DeltaValue::Deleted(db_value)))); + } else { + return Some(Ok((name, DeltaValue::Unchanged(db_value)))); + } } - None => return Some(Ok((name, DeltaValue::Current(db_value)))), } } { @@ -265,7 +278,7 @@ impl<'a, 'doc, 't, Mapper: FieldIdMapper> DeltaDocument<'a, 'doc, 't, Mapper> { true })? { Ok((name, new_value)) => { - Some(Ok((name, DeltaValue::Updated(new_value)))) + Some(Ok((name, DeltaValue::Inserted(new_value)))) } Err(err) => Some(Err(err)), } @@ -274,12 +287,12 @@ impl<'a, 'doc, 't, Mapper: FieldIdMapper> DeltaDocument<'a, 'doc, 't, Mapper> { } None => Either::Right(self.new_doc.iter_top_level_fields().map(|res| { let (k, v) = res?; - Ok((k, DeltaValue::Updated(v))) + Ok((k, DeltaValue::Inserted(v))) })), } } - pub fn delta_geo_field(&self) -> Result>> { + pub fn delta_geo_field(&self) -> Result>> { let db_geo_field = match self.db_doc { Some(db) => db.geo_field()?, None => None, @@ -289,9 +302,13 @@ impl<'a, 'doc, 't, Mapper: FieldIdMapper> DeltaDocument<'a, 'doc, 't, Mapper> { Ok(match (db_geo_field, new_doc_geo_field) { (None, None) => None, - (None, Some(new_doc)) => Some(DeltaValue::Updated(new_doc)), - (Some(db), None) => Some(DeltaValue::Current(db)), - (Some(db), Some(new_doc)) => Some(DeltaValue::CurrentAndUpdated(db, new_doc)), + (None, Some(new_doc)) => Some(DeltaValue::Inserted(new_doc)), + (Some(db), None) => Some(if self.has_deletion { + DeltaValue::Deleted(db) + } else { + DeltaValue::Unchanged(db) + }), + (Some(db), Some(new_doc)) => Some(DeltaValue::Modified(db, new_doc)), }) } } @@ -313,10 +330,11 @@ impl<'a, 'doc, 't, Mapper: FieldIdMapper> MergedDocument<'a, 'doc, 't, Mapper> { } } -pub enum DeltaValue<'t, 'doc> { - Current(&'t RawValue), - Updated(&'doc RawValue), - CurrentAndUpdated(&'t RawValue, &'doc RawValue), +pub enum DeltaValue<'either, 't: 'either, 'doc: 'either> { + Deleted(&'t RawValue), + Inserted(&'doc RawValue), + Modified(&'t RawValue, &'doc RawValue), + Unchanged(&'either RawValue), } impl<'d, 'doc: 'd, 't: 'd, Mapper: FieldIdMapper> Document<'d> diff --git a/crates/milli/src/update/new/document_change.rs b/crates/milli/src/update/new/document_change.rs index abc538533..d6fc9298f 100644 --- a/crates/milli/src/update/new/document_change.rs +++ b/crates/milli/src/update/new/document_change.rs @@ -173,7 +173,14 @@ impl<'doc> Update<'doc> { index: &'t Index, mapper: &'t Mapper, ) -> Result> { - DeltaDocument::new(self.docid, rtxn, index, mapper, DocumentFromVersions::new(&self.new)) + DeltaDocument::new( + self.docid, + rtxn, + index, + mapper, + DocumentFromVersions::new(&self.new), + self.has_deletion, + ) } /// Returns whether the updated version of the document is different from the current version for the passed subset of fields. diff --git a/crates/milli/src/update/new/extract/faceted/facet_document.rs b/crates/milli/src/update/new/extract/faceted/facet_document.rs index 95997efef..39ef1cc74 100644 --- a/crates/milli/src/update/new/extract/faceted/facet_document.rs +++ b/crates/milli/src/update/new/extract/faceted/facet_document.rs @@ -104,14 +104,14 @@ pub fn extract_merged_document_facets<'doc, 'del_add_facet_value, 'cache>( del_add_facet_value: &mut DelAddFacetValue<'del_add_facet_value>, cached_sorter: &mut BalancedCaches<'cache>, field_id_map: &mut GlobalFieldsIdsMap, - facet_fn_current: &mut impl FnMut( + facet_fn_del: &mut impl FnMut( FieldId, perm_json_p::Depth, &Value, &mut DelAddFacetValue<'del_add_facet_value>, &mut BalancedCaches<'cache>, ) -> Result<()>, - facet_fn_updated: &mut impl FnMut( + facet_fn_add: &mut impl FnMut( FieldId, perm_json_p::Depth, &Value, @@ -122,37 +122,34 @@ pub fn extract_merged_document_facets<'doc, 'del_add_facet_value, 'cache>( for res in document.delta_top_level_fields() { let (field_name, value) = res?; match value { - DeltaValue::Current(value) => { + DeltaValue::Deleted(value) => { extract_document_facet( attributes_to_extract, field_id_map, &mut |fid, depth, value| { - facet_fn_current(fid, depth, value, del_add_facet_value, cached_sorter) + facet_fn_del(fid, depth, value, del_add_facet_value, cached_sorter) }, field_name, value, )?; } - DeltaValue::Updated(value) => { + DeltaValue::Inserted(value) => { extract_document_facet( attributes_to_extract, field_id_map, &mut |fid, depth, value| { - facet_fn_updated(fid, depth, value, del_add_facet_value, cached_sorter) + facet_fn_add(fid, depth, value, del_add_facet_value, cached_sorter) }, field_name, value, )?; } - DeltaValue::CurrentAndUpdated(current, updated) => { - if current.get() == updated.get() { - continue; - } + DeltaValue::Modified(current, updated) => { extract_document_facet( attributes_to_extract, field_id_map, &mut |fid, depth, value| { - facet_fn_current(fid, depth, value, del_add_facet_value, cached_sorter) + facet_fn_del(fid, depth, value, del_add_facet_value, cached_sorter) }, field_name, current, @@ -161,46 +158,45 @@ pub fn extract_merged_document_facets<'doc, 'del_add_facet_value, 'cache>( attributes_to_extract, field_id_map, &mut |fid, depth, value| { - facet_fn_updated(fid, depth, value, del_add_facet_value, cached_sorter) + facet_fn_add(fid, depth, value, del_add_facet_value, cached_sorter) }, field_name, updated, )?; } + DeltaValue::Unchanged(_) => {} } } if attributes_to_extract.contains(&"_geo") { match document.delta_geo_field()? { - Some(DeltaValue::Current(current)) => { + Some(DeltaValue::Deleted(deleted)) => { + extract_geo_facet( + external_document_id, + deleted, + field_id_map, + &mut |fid, depth, value| { + facet_fn_del(fid, depth, value, del_add_facet_value, cached_sorter) + }, + )?; + } + Some(DeltaValue::Inserted(inserted)) => { + extract_geo_facet( + external_document_id, + inserted, + field_id_map, + &mut |fid, depth, value| { + facet_fn_add(fid, depth, value, del_add_facet_value, cached_sorter) + }, + )?; + } + Some(DeltaValue::Modified(current, updated)) => { extract_geo_facet( external_document_id, current, field_id_map, &mut |fid, depth, value| { - facet_fn_current(fid, depth, value, del_add_facet_value, cached_sorter) - }, - )?; - } - Some(DeltaValue::Updated(updated)) => { - extract_geo_facet( - external_document_id, - updated, - field_id_map, - &mut |fid, depth, value| { - facet_fn_updated(fid, depth, value, del_add_facet_value, cached_sorter) - }, - )?; - } - Some(DeltaValue::CurrentAndUpdated(current, updated)) - if current.get() != updated.get() => - { - extract_geo_facet( - external_document_id, - current, - field_id_map, - &mut |fid, depth, value| { - facet_fn_current(fid, depth, value, del_add_facet_value, cached_sorter) + facet_fn_del(fid, depth, value, del_add_facet_value, cached_sorter) }, )?; extract_geo_facet( @@ -208,11 +204,11 @@ pub fn extract_merged_document_facets<'doc, 'del_add_facet_value, 'cache>( updated, field_id_map, &mut |fid, depth, value| { - facet_fn_updated(fid, depth, value, del_add_facet_value, cached_sorter) + facet_fn_add(fid, depth, value, del_add_facet_value, cached_sorter) }, )?; } - None | Some(DeltaValue::CurrentAndUpdated(_, _)) => {} + None | Some(DeltaValue::Unchanged(_)) => {} } }