mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-25 21:16:28 +00:00 
			
		
		
		
	Add depth to facet extraction so that null inside an array doesn't mark the entire field as null
This commit is contained in:
		
				
					committed by
					
						 Clément Renault
						Clément Renault
					
				
			
			
				
	
			
			
			
						parent
						
							50d1bd01df
						
					
				
				
					commit
					8049df125b
				
			| @@ -14,6 +14,7 @@ use super::FacetKind; | ||||
| use crate::heed_codec::facet::OrderedF64Codec; | ||||
| use crate::update::del_add::DelAdd; | ||||
| use crate::update::new::channel::FieldIdDocidFacetSender; | ||||
| use crate::update::new::extract::perm_json_p; | ||||
| use crate::update::new::indexer::document_changes::{ | ||||
|     extract, DocumentChangeContext, DocumentChanges, Extractor, IndexingContext, Progress, | ||||
| }; | ||||
| @@ -81,7 +82,7 @@ impl FacetedDocidsExtractor { | ||||
|                 inner.current(rtxn, index, context.db_fields_ids_map)?, | ||||
|                 inner.external_document_id(), | ||||
|                 new_fields_ids_map.deref_mut(), | ||||
|                 &mut |fid, value| { | ||||
|                 &mut |fid, depth, value| { | ||||
|                     Self::facet_fn_with_options( | ||||
|                         &context.doc_alloc, | ||||
|                         cached_sorter.deref_mut(), | ||||
| @@ -90,6 +91,7 @@ impl FacetedDocidsExtractor { | ||||
|                         DelAddFacetValue::insert_del, | ||||
|                         docid, | ||||
|                         fid, | ||||
|                         depth, | ||||
|                         value, | ||||
|                     ) | ||||
|                 }, | ||||
| @@ -100,7 +102,7 @@ impl FacetedDocidsExtractor { | ||||
|                     inner.current(rtxn, index, context.db_fields_ids_map)?, | ||||
|                     inner.external_document_id(), | ||||
|                     new_fields_ids_map.deref_mut(), | ||||
|                     &mut |fid, value| { | ||||
|                     &mut |fid, depth, value| { | ||||
|                         Self::facet_fn_with_options( | ||||
|                             &context.doc_alloc, | ||||
|                             cached_sorter.deref_mut(), | ||||
| @@ -109,6 +111,7 @@ impl FacetedDocidsExtractor { | ||||
|                             DelAddFacetValue::insert_del, | ||||
|                             docid, | ||||
|                             fid, | ||||
|                             depth, | ||||
|                             value, | ||||
|                         ) | ||||
|                     }, | ||||
| @@ -119,7 +122,7 @@ impl FacetedDocidsExtractor { | ||||
|                     inner.merged(rtxn, index, context.db_fields_ids_map)?, | ||||
|                     inner.external_document_id(), | ||||
|                     new_fields_ids_map.deref_mut(), | ||||
|                     &mut |fid, value| { | ||||
|                     &mut |fid, depth, value| { | ||||
|                         Self::facet_fn_with_options( | ||||
|                             &context.doc_alloc, | ||||
|                             cached_sorter.deref_mut(), | ||||
| @@ -128,6 +131,7 @@ impl FacetedDocidsExtractor { | ||||
|                             DelAddFacetValue::insert_add, | ||||
|                             docid, | ||||
|                             fid, | ||||
|                             depth, | ||||
|                             value, | ||||
|                         ) | ||||
|                     }, | ||||
| @@ -138,7 +142,7 @@ impl FacetedDocidsExtractor { | ||||
|                 inner.inserted(), | ||||
|                 inner.external_document_id(), | ||||
|                 new_fields_ids_map.deref_mut(), | ||||
|                 &mut |fid, value| { | ||||
|                 &mut |fid, depth, value| { | ||||
|                     Self::facet_fn_with_options( | ||||
|                         &context.doc_alloc, | ||||
|                         cached_sorter.deref_mut(), | ||||
| @@ -147,6 +151,7 @@ impl FacetedDocidsExtractor { | ||||
|                         DelAddFacetValue::insert_add, | ||||
|                         docid, | ||||
|                         fid, | ||||
|                         depth, | ||||
|                         value, | ||||
|                     ) | ||||
|                 }, | ||||
| @@ -166,6 +171,7 @@ impl FacetedDocidsExtractor { | ||||
|         facet_fn: impl Fn(&mut DelAddFacetValue<'doc>, FieldId, BVec<'doc, u8>, FacetKind), | ||||
|         docid: DocumentId, | ||||
|         fid: FieldId, | ||||
|         depth: perm_json_p::Depth, | ||||
|         value: &Value, | ||||
|     ) -> Result<()> { | ||||
|         let mut buffer = BVec::new_in(doc_alloc); | ||||
| @@ -217,7 +223,7 @@ impl FacetedDocidsExtractor { | ||||
|             } | ||||
|             // Null | ||||
|             // key: fid | ||||
|             Value::Null => { | ||||
|             Value::Null if depth == perm_json_p::Depth::OnBaseKey => { | ||||
|                 buffer.clear(); | ||||
|                 buffer.push(FacetKind::Null as u8); | ||||
|                 buffer.extend_from_slice(&fid.to_be_bytes()); | ||||
| @@ -225,13 +231,13 @@ impl FacetedDocidsExtractor { | ||||
|             } | ||||
|             // Empty | ||||
|             // key: fid | ||||
|             Value::Array(a) if a.is_empty() => { | ||||
|             Value::Array(a) if a.is_empty() && depth == perm_json_p::Depth::OnBaseKey => { | ||||
|                 buffer.clear(); | ||||
|                 buffer.push(FacetKind::Empty as u8); | ||||
|                 buffer.extend_from_slice(&fid.to_be_bytes()); | ||||
|                 cache_fn(cached_sorter, &buffer, docid) | ||||
|             } | ||||
|             Value::Object(o) if o.is_empty() => { | ||||
|             Value::Object(o) if o.is_empty() && depth == perm_json_p::Depth::OnBaseKey => { | ||||
|                 buffer.clear(); | ||||
|                 buffer.push(FacetKind::Empty as u8); | ||||
|                 buffer.extend_from_slice(&fid.to_be_bytes()); | ||||
|   | ||||
| @@ -10,15 +10,18 @@ pub fn extract_document_facets<'doc>( | ||||
|     document: impl Document<'doc>, | ||||
|     external_document_id: &str, | ||||
|     field_id_map: &mut GlobalFieldsIdsMap, | ||||
|     facet_fn: &mut impl FnMut(FieldId, &Value) -> Result<()>, | ||||
|     facet_fn: &mut impl FnMut(FieldId, perm_json_p::Depth, &Value) -> Result<()>, | ||||
| ) -> Result<()> { | ||||
|     for res in document.iter_top_level_fields() { | ||||
|         let (field_name, value) = res?; | ||||
|  | ||||
|         let mut tokenize_field = |name: &str, value: &Value| match field_id_map.id_or_insert(name) { | ||||
|             Some(field_id) => facet_fn(field_id, value), | ||||
|             None => Err(UserError::AttributeLimitReached.into()), | ||||
|         }; | ||||
|         let mut tokenize_field = | ||||
|             |name: &str, depth: perm_json_p::Depth, value: &Value| match field_id_map | ||||
|                 .id_or_insert(name) | ||||
|             { | ||||
|                 Some(field_id) => facet_fn(field_id, depth, value), | ||||
|                 None => Err(UserError::AttributeLimitReached.into()), | ||||
|             }; | ||||
|  | ||||
|         // if the current field is searchable or contains a searchable attribute | ||||
|         if perm_json_p::select_field(field_name, Some(attributes_to_extract), &[]) { | ||||
| @@ -29,6 +32,7 @@ pub fn extract_document_facets<'doc>( | ||||
|                     Some(attributes_to_extract), | ||||
|                     &[], // skip no attributes | ||||
|                     field_name, | ||||
|                     perm_json_p::Depth::OnBaseKey, | ||||
|                     &mut tokenize_field, | ||||
|                 )?, | ||||
|                 Value::Array(array) => perm_json_p::seek_leaf_values_in_array( | ||||
| @@ -36,9 +40,10 @@ pub fn extract_document_facets<'doc>( | ||||
|                     Some(attributes_to_extract), | ||||
|                     &[], // skip no attributes | ||||
|                     field_name, | ||||
|                     perm_json_p::Depth::OnBaseKey, | ||||
|                     &mut tokenize_field, | ||||
|                 )?, | ||||
|                 value => tokenize_field(field_name, &value)?, | ||||
|                 value => tokenize_field(field_name, perm_json_p::Depth::OnBaseKey, &value)?, | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| @@ -51,8 +56,8 @@ pub fn extract_document_facets<'doc>( | ||||
|                     .zip(field_id_map.id_or_insert("_geo.lng")) | ||||
|                     .ok_or(UserError::AttributeLimitReached)?; | ||||
|  | ||||
|                 facet_fn(lat_fid, &lat.into())?; | ||||
|                 facet_fn(lng_fid, &lng.into())?; | ||||
|                 facet_fn(lat_fid, perm_json_p::Depth::OnBaseKey, &lat.into())?; | ||||
|                 facet_fn(lng_fid, perm_json_p::Depth::OnBaseKey, &lng.into())?; | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|   | ||||
| @@ -59,15 +59,24 @@ pub mod perm_json_p { | ||||
|             && selector[key.len()..].chars().next().map(|c| c == SPLIT_SYMBOL).unwrap_or(true) | ||||
|     } | ||||
|  | ||||
|     #[derive(Debug, Clone, Copy, PartialEq, Eq)] | ||||
|     pub enum Depth { | ||||
|         /// The perm json ptr is currently on the field of an object | ||||
|         OnBaseKey, | ||||
|         /// The perm json ptr is currently inside of an array | ||||
|         InsideArray, | ||||
|     } | ||||
|  | ||||
|     pub fn seek_leaf_values_in_object( | ||||
|         value: &Map<String, Value>, | ||||
|         selectors: Option<&[&str]>, | ||||
|         skip_selectors: &[&str], | ||||
|         base_key: &str, | ||||
|         seeker: &mut impl FnMut(&str, &Value) -> Result<()>, | ||||
|         base_depth: Depth, | ||||
|         seeker: &mut impl FnMut(&str, Depth, &Value) -> Result<()>, | ||||
|     ) -> Result<()> { | ||||
|         if value.is_empty() { | ||||
|             seeker(base_key, &Value::Object(Map::with_capacity(0)))?; | ||||
|             seeker(base_key, base_depth, &Value::Object(Map::with_capacity(0)))?; | ||||
|         } | ||||
|  | ||||
|         for (key, value) in value.iter() { | ||||
| @@ -87,6 +96,7 @@ pub mod perm_json_p { | ||||
|                         selectors, | ||||
|                         skip_selectors, | ||||
|                         &base_key, | ||||
|                         Depth::OnBaseKey, | ||||
|                         seeker, | ||||
|                     ), | ||||
|                     Value::Array(array) => seek_leaf_values_in_array( | ||||
| @@ -94,9 +104,10 @@ pub mod perm_json_p { | ||||
|                         selectors, | ||||
|                         skip_selectors, | ||||
|                         &base_key, | ||||
|                         Depth::OnBaseKey, | ||||
|                         seeker, | ||||
|                     ), | ||||
|                     value => seeker(&base_key, value), | ||||
|                     value => seeker(&base_key, Depth::OnBaseKey, value), | ||||
|                 }?; | ||||
|             } | ||||
|         } | ||||
| @@ -109,21 +120,32 @@ pub mod perm_json_p { | ||||
|         selectors: Option<&[&str]>, | ||||
|         skip_selectors: &[&str], | ||||
|         base_key: &str, | ||||
|         seeker: &mut impl FnMut(&str, &Value) -> Result<()>, | ||||
|         base_depth: Depth, | ||||
|         seeker: &mut impl FnMut(&str, Depth, &Value) -> Result<()>, | ||||
|     ) -> Result<()> { | ||||
|         if values.is_empty() { | ||||
|             seeker(base_key, &Value::Array(vec![]))?; | ||||
|             seeker(base_key, base_depth, &Value::Array(vec![]))?; | ||||
|         } | ||||
|  | ||||
|         for value in values { | ||||
|             match value { | ||||
|                 Value::Object(object) => { | ||||
|                     seek_leaf_values_in_object(object, selectors, skip_selectors, base_key, seeker) | ||||
|                 } | ||||
|                 Value::Array(array) => { | ||||
|                     seek_leaf_values_in_array(array, selectors, skip_selectors, base_key, seeker) | ||||
|                 } | ||||
|                 value => seeker(base_key, value), | ||||
|                 Value::Object(object) => seek_leaf_values_in_object( | ||||
|                     object, | ||||
|                     selectors, | ||||
|                     skip_selectors, | ||||
|                     base_key, | ||||
|                     Depth::InsideArray, | ||||
|                     seeker, | ||||
|                 ), | ||||
|                 Value::Array(array) => seek_leaf_values_in_array( | ||||
|                     array, | ||||
|                     selectors, | ||||
|                     skip_selectors, | ||||
|                     base_key, | ||||
|                     Depth::InsideArray, | ||||
|                     seeker, | ||||
|                 ), | ||||
|                 value => seeker(base_key, Depth::InsideArray, value), | ||||
|             }?; | ||||
|         } | ||||
|  | ||||
|   | ||||
| @@ -5,7 +5,7 @@ use serde_json::Value; | ||||
|  | ||||
| use crate::update::new::document::Document; | ||||
| use crate::update::new::extract::perm_json_p::{ | ||||
|     seek_leaf_values_in_array, seek_leaf_values_in_object, select_field, | ||||
|     seek_leaf_values_in_array, seek_leaf_values_in_object, select_field, Depth, | ||||
| }; | ||||
| use crate::{ | ||||
|     FieldId, GlobalFieldsIdsMap, InternalError, LocalizedAttributesRule, Result, UserError, | ||||
| @@ -35,7 +35,7 @@ impl<'a> DocumentTokenizer<'a> { | ||||
|         for entry in document.iter_top_level_fields() { | ||||
|             let (field_name, value) = entry?; | ||||
|  | ||||
|             let mut tokenize_field = |field_name: &str, value: &Value| { | ||||
|             let mut tokenize_field = |field_name: &str, _depth, value: &Value| { | ||||
|                 let Some(field_id) = field_id_map.id_or_insert(field_name) else { | ||||
|                     return Err(UserError::AttributeLimitReached.into()); | ||||
|                 }; | ||||
| @@ -96,6 +96,7 @@ impl<'a> DocumentTokenizer<'a> { | ||||
|                         self.attribute_to_extract, | ||||
|                         self.attribute_to_skip, | ||||
|                         field_name, | ||||
|                         Depth::OnBaseKey, | ||||
|                         &mut tokenize_field, | ||||
|                     )?, | ||||
|                     Value::Array(array) => seek_leaf_values_in_array( | ||||
| @@ -103,9 +104,10 @@ impl<'a> DocumentTokenizer<'a> { | ||||
|                         self.attribute_to_extract, | ||||
|                         self.attribute_to_skip, | ||||
|                         field_name, | ||||
|                         Depth::OnBaseKey, | ||||
|                         &mut tokenize_field, | ||||
|                     )?, | ||||
|                     value => tokenize_field(field_name, &value)?, | ||||
|                     value => tokenize_field(field_name, Depth::OnBaseKey, &value)?, | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user