From 24e94b28c1c4ae30e4d3256f3af896b8446ba722 Mon Sep 17 00:00:00 2001 From: nnethercott Date: Mon, 26 May 2025 09:22:20 +0200 Subject: [PATCH] feat: uncouple geo extraction from full doc --- .../new/extract/faceted/extract_facets.rs | 248 ++++++++++++------ .../new/extract/faceted/facet_document.rs | 29 +- 2 files changed, 188 insertions(+), 89 deletions(-) diff --git a/crates/milli/src/update/new/extract/faceted/extract_facets.rs b/crates/milli/src/update/new/extract/faceted/extract_facets.rs index 01cfe338f..aa7510863 100644 --- a/crates/milli/src/update/new/extract/faceted/extract_facets.rs +++ b/crates/milli/src/update/new/extract/faceted/extract_facets.rs @@ -8,7 +8,7 @@ use hashbrown::HashMap; use serde_json::Value; use super::super::cache::BalancedCaches; -use super::facet_document::extract_document_facets; +use super::facet_document::{extract_document_facets, extract_geo_document}; use super::FacetKind; use crate::fields_ids_map::metadata::Metadata; use crate::filterable_attributes_rules::match_faceted_field; @@ -90,53 +90,8 @@ impl FacetedDocidsExtractor { let mut cached_sorter = context.data.borrow_mut_or_yield(); let mut del_add_facet_value = DelAddFacetValue::new(&context.doc_alloc); let docid = document_change.docid(); - let res = match document_change { - DocumentChange::Deletion(inner) => extract_document_facets( - inner.current(rtxn, index, context.db_fields_ids_map)?, - inner.external_document_id(), - new_fields_ids_map.deref_mut(), - filterable_attributes, - sortable_fields, - asc_desc_fields, - distinct_field, - is_geo_enabled, - &mut |fid, meta, depth, value| { - Self::facet_fn_with_options( - &context.doc_alloc, - cached_sorter.deref_mut(), - BalancedCaches::insert_del_u32, - &mut del_add_facet_value, - DelAddFacetValue::insert_del, - docid, - fid, - meta, - filterable_attributes, - depth, - value, - ) - }, - ), - DocumentChange::Update(inner) => { - let has_changed = inner.has_changed_for_fields( - &mut |field_name| { - match_faceted_field( - field_name, - filterable_attributes, - sortable_fields, - asc_desc_fields, - distinct_field, - ) - }, - rtxn, - index, - context.db_fields_ids_map, - )?; - let has_changed_for_geo_fields = - inner.has_changed_for_geo_fields(rtxn, index, context.db_fields_ids_map)?; - if !has_changed && !has_changed_for_geo_fields { - return Ok(()); - } - + match document_change { + DocumentChange::Deletion(inner) => { extract_document_facets( inner.current(rtxn, index, context.db_fields_ids_map)?, inner.external_document_id(), @@ -145,7 +100,6 @@ impl FacetedDocidsExtractor { sortable_fields, asc_desc_fields, distinct_field, - is_geo_enabled, &mut |fid, meta, depth, value| { Self::facet_fn_with_options( &context.doc_alloc, @@ -163,15 +117,155 @@ impl FacetedDocidsExtractor { }, )?; + if is_geo_enabled { + extract_geo_document( + inner.current(rtxn, index, context.db_fields_ids_map)?, + inner.external_document_id(), + new_fields_ids_map.deref_mut(), + asc_desc_fields, + &mut |fid, meta, depth, value| { + Self::facet_fn_with_options( + &context.doc_alloc, + cached_sorter.deref_mut(), + BalancedCaches::insert_del_u32, + &mut del_add_facet_value, + DelAddFacetValue::insert_del, + docid, + fid, + meta, + filterable_attributes, + depth, + value, + ) + }, + )?; + } + } + DocumentChange::Update(inner) => { + let has_changed = inner.has_changed_for_fields( + &mut |field_name| { + match_faceted_field( + field_name, + filterable_attributes, + sortable_fields, + asc_desc_fields, + distinct_field, + ) + }, + rtxn, + index, + context.db_fields_ids_map, + )?; + let has_changed_for_geo_fields = + inner.has_changed_for_geo_fields(rtxn, index, context.db_fields_ids_map)?; + + if has_changed { + extract_document_facets( + inner.current(rtxn, index, context.db_fields_ids_map)?, + inner.external_document_id(), + new_fields_ids_map.deref_mut(), + filterable_attributes, + sortable_fields, + asc_desc_fields, + distinct_field, + &mut |fid, meta, depth, value| { + Self::facet_fn_with_options( + &context.doc_alloc, + cached_sorter.deref_mut(), + BalancedCaches::insert_del_u32, + &mut del_add_facet_value, + DelAddFacetValue::insert_del, + docid, + fid, + meta, + filterable_attributes, + depth, + value, + ) + }, + )?; + + extract_document_facets( + inner.merged(rtxn, index, context.db_fields_ids_map)?, + inner.external_document_id(), + new_fields_ids_map.deref_mut(), + filterable_attributes, + sortable_fields, + asc_desc_fields, + distinct_field, + &mut |fid, meta, depth, value| { + Self::facet_fn_with_options( + &context.doc_alloc, + cached_sorter.deref_mut(), + BalancedCaches::insert_add_u32, + &mut del_add_facet_value, + DelAddFacetValue::insert_add, + docid, + fid, + meta, + filterable_attributes, + depth, + value, + ) + }, + )?; + } + + if is_geo_enabled && has_changed_for_geo_fields{ + extract_geo_document( + inner.current(rtxn, index, context.db_fields_ids_map)?, + inner.external_document_id(), + new_fields_ids_map.deref_mut(), + asc_desc_fields, + &mut |fid, meta, depth, value| { + Self::facet_fn_with_options( + &context.doc_alloc, + cached_sorter.deref_mut(), + BalancedCaches::insert_del_u32, + &mut del_add_facet_value, + DelAddFacetValue::insert_del, + docid, + fid, + meta, + filterable_attributes, + depth, + value, + ) + }, + )?; + + extract_geo_document( + inner.merged(rtxn, index, context.db_fields_ids_map)?, + inner.external_document_id(), + new_fields_ids_map.deref_mut(), + asc_desc_fields, + &mut |fid, meta, depth, value| { + Self::facet_fn_with_options( + &context.doc_alloc, + cached_sorter.deref_mut(), + BalancedCaches::insert_add_u32, + &mut del_add_facet_value, + DelAddFacetValue::insert_add, + docid, + fid, + meta, + filterable_attributes, + depth, + value, + ) + }, + )?; + } + } + DocumentChange::Insertion(inner) => { extract_document_facets( - inner.merged(rtxn, index, context.db_fields_ids_map)?, + inner.inserted(), inner.external_document_id(), new_fields_ids_map.deref_mut(), filterable_attributes, sortable_fields, asc_desc_fields, distinct_field, - is_geo_enabled, &mut |fid, meta, depth, value| { Self::facet_fn_with_options( &context.doc_alloc, @@ -187,37 +281,35 @@ impl FacetedDocidsExtractor { value, ) }, - ) + )?; + if is_geo_enabled { + extract_geo_document( + inner.inserted(), + inner.external_document_id(), + new_fields_ids_map.deref_mut(), + asc_desc_fields, + &mut |fid, meta, depth, value| { + Self::facet_fn_with_options( + &context.doc_alloc, + cached_sorter.deref_mut(), + BalancedCaches::insert_add_u32, + &mut del_add_facet_value, + DelAddFacetValue::insert_add, + docid, + fid, + meta, + filterable_attributes, + depth, + value, + ) + }, + )?; + } } - DocumentChange::Insertion(inner) => extract_document_facets( - inner.inserted(), - inner.external_document_id(), - new_fields_ids_map.deref_mut(), - filterable_attributes, - sortable_fields, - asc_desc_fields, - distinct_field, - is_geo_enabled, - &mut |fid, meta, depth, value| { - Self::facet_fn_with_options( - &context.doc_alloc, - cached_sorter.deref_mut(), - BalancedCaches::insert_add_u32, - &mut del_add_facet_value, - DelAddFacetValue::insert_add, - docid, - fid, - meta, - filterable_attributes, - depth, - value, - ) - }, - ), }; del_add_facet_value.send_data(docid, sender, &context.doc_alloc).unwrap(); - res + Ok(()) } #[allow(clippy::too_many_arguments)] diff --git a/crates/milli/src/update/new/extract/faceted/facet_document.rs b/crates/milli/src/update/new/extract/faceted/facet_document.rs index e74131402..30a5c462e 100644 --- a/crates/milli/src/update/new/extract/faceted/facet_document.rs +++ b/crates/milli/src/update/new/extract/faceted/facet_document.rs @@ -22,7 +22,6 @@ pub fn extract_document_facets<'doc>( sortable_fields: &HashSet, asc_desc_fields: &HashSet, distinct_field: &Option, - is_geo_enabled: bool, facet_fn: &mut impl FnMut(FieldId, Metadata, perm_json_p::Depth, &Value) -> Result<()>, ) -> Result<()> { // return the match result for the given field name. @@ -102,17 +101,25 @@ pub fn extract_document_facets<'doc>( } } - if is_geo_enabled { - if let Some(geo_value) = document.geo_field()? { - if let Some([lat, lng]) = extract_geo_coordinates(external_document_id, geo_value)? { - let ((lat_fid, lat_meta), (lng_fid, lng_meta)) = field_id_map - .id_with_metadata_or_insert("_geo.lat") - .zip(field_id_map.id_with_metadata_or_insert("_geo.lng")) - .ok_or(UserError::AttributeLimitReached)?; + Ok(()) +} - facet_fn(lat_fid, lat_meta, perm_json_p::Depth::OnBaseKey, &lat.into())?; - facet_fn(lng_fid, lng_meta, perm_json_p::Depth::OnBaseKey, &lng.into())?; - } +pub fn extract_geo_document<'doc>( + document: impl Document<'doc>, + external_document_id: &str, + field_id_map: &mut GlobalFieldsIdsMap, + asc_desc_fields: &HashSet, + facet_fn: &mut impl FnMut(FieldId, Metadata, perm_json_p::Depth, &Value) -> Result<()>, +) -> Result<()> { + if let Some(geo_value) = document.geo_field()? { + if let Some([lat, lng]) = extract_geo_coordinates(external_document_id, geo_value)? { + let ((lat_fid, lat_meta), (lng_fid, lng_meta)) = field_id_map + .id_with_metadata_or_insert("_geo.lat") + .zip(field_id_map.id_with_metadata_or_insert("_geo.lng")) + .ok_or(UserError::AttributeLimitReached)?; + + facet_fn(lat_fid, lat_meta, perm_json_p::Depth::OnBaseKey, &lat.into())?; + facet_fn(lng_fid, lng_meta, perm_json_p::Depth::OnBaseKey, &lng.into())?; } }