mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-26 05:26:27 +00:00 
			
		
		
		
	Refactor Document indexing process (Facets)
**Changes:** The Documents changes now take a selector closure instead of a list of field to match the field to extract. The seek_leaf_values_in_object function now uses a selector closure of a list of field to match the field to extract The facet database extraction is now relying on the FilterableAttributesRule to match the field to extract. The facet-search database extraction is now relying on the FieldIdMapWithMetadata to select the field to index. The facet level database extraction is now relying on the FieldIdMapWithMetadata to select the field to index. **Important:** Because the filterable attributes are patterns now, the fieldIdMap will only register the fields that exists in at least one document. if a field doesn't exist in any document, it will not be registered even if it has been specified in the filterable fields. **Impact:** - Document Addition/modification facet indexing - Document deletion facet indexing
This commit is contained in:
		| @@ -4,10 +4,10 @@ use heed::RoTxn; | |||||||
| use super::document::{ | use super::document::{ | ||||||
|     Document as _, DocumentFromDb, DocumentFromVersions, MergedDocument, Versions, |     Document as _, DocumentFromDb, DocumentFromVersions, MergedDocument, Versions, | ||||||
| }; | }; | ||||||
| use super::extract::perm_json_p; |  | ||||||
| use super::vector_document::{ | use super::vector_document::{ | ||||||
|     MergedVectorDocument, VectorDocumentFromDb, VectorDocumentFromVersions, |     MergedVectorDocument, VectorDocumentFromDb, VectorDocumentFromVersions, | ||||||
| }; | }; | ||||||
|  | use crate::attribute_patterns::PatternMatch; | ||||||
| use crate::documents::FieldIdMapper; | use crate::documents::FieldIdMapper; | ||||||
| use crate::vector::EmbeddingConfigs; | use crate::vector::EmbeddingConfigs; | ||||||
| use crate::{DocumentId, Index, Result}; | use crate::{DocumentId, Index, Result}; | ||||||
| @@ -173,7 +173,7 @@ impl<'doc> Update<'doc> { | |||||||
|     /// Otherwise `false`. |     /// Otherwise `false`. | ||||||
|     pub fn has_changed_for_fields<'t, Mapper: FieldIdMapper>( |     pub fn has_changed_for_fields<'t, Mapper: FieldIdMapper>( | ||||||
|         &self, |         &self, | ||||||
|         fields: Option<&[&str]>, |         selector: &mut impl FnMut(&str) -> PatternMatch, | ||||||
|         rtxn: &'t RoTxn, |         rtxn: &'t RoTxn, | ||||||
|         index: &'t Index, |         index: &'t Index, | ||||||
|         mapper: &'t Mapper, |         mapper: &'t Mapper, | ||||||
| @@ -185,7 +185,7 @@ impl<'doc> Update<'doc> { | |||||||
|         for entry in self.only_changed_fields().iter_top_level_fields() { |         for entry in self.only_changed_fields().iter_top_level_fields() { | ||||||
|             let (key, updated_value) = entry?; |             let (key, updated_value) = entry?; | ||||||
|  |  | ||||||
|             if perm_json_p::select_field(key, fields, &[]) == perm_json_p::Selection::Skip { |             if selector(key) == PatternMatch::NoMatch { | ||||||
|                 continue; |                 continue; | ||||||
|             } |             } | ||||||
|  |  | ||||||
| @@ -229,7 +229,7 @@ impl<'doc> Update<'doc> { | |||||||
|             for entry in current.iter_top_level_fields() { |             for entry in current.iter_top_level_fields() { | ||||||
|                 let (key, _) = entry?; |                 let (key, _) = entry?; | ||||||
|  |  | ||||||
|                 if perm_json_p::select_field(key, fields, &[]) == perm_json_p::Selection::Skip { |                 if selector(key) == PatternMatch::NoMatch { | ||||||
|                     continue; |                     continue; | ||||||
|                 } |                 } | ||||||
|                 current_selected_field_count += 1; |                 current_selected_field_count += 1; | ||||||
|   | |||||||
| @@ -5,12 +5,13 @@ use std::ops::DerefMut as _; | |||||||
| use bumpalo::collections::Vec as BVec; | use bumpalo::collections::Vec as BVec; | ||||||
| use bumpalo::Bump; | use bumpalo::Bump; | ||||||
| use hashbrown::HashMap; | use hashbrown::HashMap; | ||||||
| use heed::RoTxn; |  | ||||||
| use serde_json::Value; | use serde_json::Value; | ||||||
|  |  | ||||||
| use super::super::cache::BalancedCaches; | use super::super::cache::BalancedCaches; | ||||||
| use super::facet_document::extract_document_facets; | use super::facet_document::extract_document_facets; | ||||||
| use super::FacetKind; | use super::FacetKind; | ||||||
|  | use crate::fields_ids_map::metadata::Metadata; | ||||||
|  | use crate::filterable_attributes_rules::match_faceted_field; | ||||||
| use crate::heed_codec::facet::OrderedF64Codec; | use crate::heed_codec::facet::OrderedF64Codec; | ||||||
| use crate::update::del_add::DelAdd; | use crate::update::del_add::DelAdd; | ||||||
| use crate::update::new::channel::FieldIdDocidFacetSender; | use crate::update::new::channel::FieldIdDocidFacetSender; | ||||||
| @@ -23,13 +24,17 @@ use crate::update::new::steps::IndexingStep; | |||||||
| use crate::update::new::thread_local::{FullySend, ThreadLocal}; | use crate::update::new::thread_local::{FullySend, ThreadLocal}; | ||||||
| use crate::update::new::DocumentChange; | use crate::update::new::DocumentChange; | ||||||
| use crate::update::GrenadParameters; | use crate::update::GrenadParameters; | ||||||
| use crate::{DocumentId, FieldId, Index, Result, MAX_FACET_VALUE_LENGTH}; | use crate::{DocumentId, FieldId, FilterableAttributesRule, Result, MAX_FACET_VALUE_LENGTH}; | ||||||
|  |  | ||||||
| pub struct FacetedExtractorData<'a, 'b> { | pub struct FacetedExtractorData<'a, 'b> { | ||||||
|     attributes_to_extract: &'a [&'a str], |  | ||||||
|     sender: &'a FieldIdDocidFacetSender<'a, 'b>, |     sender: &'a FieldIdDocidFacetSender<'a, 'b>, | ||||||
|     grenad_parameters: &'a GrenadParameters, |     grenad_parameters: &'a GrenadParameters, | ||||||
|     buckets: usize, |     buckets: usize, | ||||||
|  |     filterable_attributes: Vec<FilterableAttributesRule>, | ||||||
|  |     sortable_fields: HashSet<String>, | ||||||
|  |     asc_desc_fields: HashSet<String>, | ||||||
|  |     distinct_field: Option<String>, | ||||||
|  |     is_geo_enabled: bool, | ||||||
| } | } | ||||||
|  |  | ||||||
| impl<'a, 'b, 'extractor> Extractor<'extractor> for FacetedExtractorData<'a, 'b> { | impl<'a, 'b, 'extractor> Extractor<'extractor> for FacetedExtractorData<'a, 'b> { | ||||||
| @@ -52,7 +57,11 @@ impl<'a, 'b, 'extractor> Extractor<'extractor> for FacetedExtractorData<'a, 'b> | |||||||
|             let change = change?; |             let change = change?; | ||||||
|             FacetedDocidsExtractor::extract_document_change( |             FacetedDocidsExtractor::extract_document_change( | ||||||
|                 context, |                 context, | ||||||
|                 self.attributes_to_extract, |                 &self.filterable_attributes, | ||||||
|  |                 &self.sortable_fields, | ||||||
|  |                 &self.asc_desc_fields, | ||||||
|  |                 &self.distinct_field, | ||||||
|  |                 self.is_geo_enabled, | ||||||
|                 change, |                 change, | ||||||
|                 self.sender, |                 self.sender, | ||||||
|             )? |             )? | ||||||
| @@ -64,13 +73,18 @@ impl<'a, 'b, 'extractor> Extractor<'extractor> for FacetedExtractorData<'a, 'b> | |||||||
| pub struct FacetedDocidsExtractor; | pub struct FacetedDocidsExtractor; | ||||||
|  |  | ||||||
| impl FacetedDocidsExtractor { | impl FacetedDocidsExtractor { | ||||||
|  |     #[allow(clippy::too_many_arguments)] | ||||||
|     fn extract_document_change( |     fn extract_document_change( | ||||||
|         context: &DocumentChangeContext<RefCell<BalancedCaches>>, |         context: &DocumentChangeContext<RefCell<BalancedCaches>>, | ||||||
|         attributes_to_extract: &[&str], |         filterable_attributes: &[FilterableAttributesRule], | ||||||
|  |         sortable_fields: &HashSet<String>, | ||||||
|  |         asc_desc_fields: &HashSet<String>, | ||||||
|  |         distinct_field: &Option<String>, | ||||||
|  |         is_geo_enabled: bool, | ||||||
|         document_change: DocumentChange, |         document_change: DocumentChange, | ||||||
|         sender: &FieldIdDocidFacetSender, |         sender: &FieldIdDocidFacetSender, | ||||||
|     ) -> Result<()> { |     ) -> Result<()> { | ||||||
|         let index = &context.index; |         let index = context.index; | ||||||
|         let rtxn = &context.rtxn; |         let rtxn = &context.rtxn; | ||||||
|         let mut new_fields_ids_map = context.new_fields_ids_map.borrow_mut_or_yield(); |         let mut new_fields_ids_map = context.new_fields_ids_map.borrow_mut_or_yield(); | ||||||
|         let mut cached_sorter = context.data.borrow_mut_or_yield(); |         let mut cached_sorter = context.data.borrow_mut_or_yield(); | ||||||
| @@ -78,11 +92,15 @@ impl FacetedDocidsExtractor { | |||||||
|         let docid = document_change.docid(); |         let docid = document_change.docid(); | ||||||
|         let res = match document_change { |         let res = match document_change { | ||||||
|             DocumentChange::Deletion(inner) => extract_document_facets( |             DocumentChange::Deletion(inner) => extract_document_facets( | ||||||
|                 attributes_to_extract, |  | ||||||
|                 inner.current(rtxn, index, context.db_fields_ids_map)?, |                 inner.current(rtxn, index, context.db_fields_ids_map)?, | ||||||
|                 inner.external_document_id(), |                 inner.external_document_id(), | ||||||
|                 new_fields_ids_map.deref_mut(), |                 new_fields_ids_map.deref_mut(), | ||||||
|                 &mut |fid, depth, value| { |                 filterable_attributes, | ||||||
|  |                 sortable_fields, | ||||||
|  |                 asc_desc_fields, | ||||||
|  |                 distinct_field, | ||||||
|  |                 is_geo_enabled, | ||||||
|  |                 &mut |fid, meta, depth, value| { | ||||||
|                     Self::facet_fn_with_options( |                     Self::facet_fn_with_options( | ||||||
|                         &context.doc_alloc, |                         &context.doc_alloc, | ||||||
|                         cached_sorter.deref_mut(), |                         cached_sorter.deref_mut(), | ||||||
| @@ -91,6 +109,8 @@ impl FacetedDocidsExtractor { | |||||||
|                         DelAddFacetValue::insert_del, |                         DelAddFacetValue::insert_del, | ||||||
|                         docid, |                         docid, | ||||||
|                         fid, |                         fid, | ||||||
|  |                         meta, | ||||||
|  |                         filterable_attributes, | ||||||
|                         depth, |                         depth, | ||||||
|                         value, |                         value, | ||||||
|                     ) |                     ) | ||||||
| @@ -98,7 +118,15 @@ impl FacetedDocidsExtractor { | |||||||
|             ), |             ), | ||||||
|             DocumentChange::Update(inner) => { |             DocumentChange::Update(inner) => { | ||||||
|                 if !inner.has_changed_for_fields( |                 if !inner.has_changed_for_fields( | ||||||
|                     Some(attributes_to_extract), |                     &mut |field_name| { | ||||||
|  |                         match_faceted_field( | ||||||
|  |                             field_name, | ||||||
|  |                             filterable_attributes, | ||||||
|  |                             sortable_fields, | ||||||
|  |                             asc_desc_fields, | ||||||
|  |                             distinct_field, | ||||||
|  |                         ) | ||||||
|  |                     }, | ||||||
|                     rtxn, |                     rtxn, | ||||||
|                     index, |                     index, | ||||||
|                     context.db_fields_ids_map, |                     context.db_fields_ids_map, | ||||||
| @@ -107,11 +135,15 @@ impl FacetedDocidsExtractor { | |||||||
|                 } |                 } | ||||||
|  |  | ||||||
|                 extract_document_facets( |                 extract_document_facets( | ||||||
|                     attributes_to_extract, |  | ||||||
|                     inner.current(rtxn, index, context.db_fields_ids_map)?, |                     inner.current(rtxn, index, context.db_fields_ids_map)?, | ||||||
|                     inner.external_document_id(), |                     inner.external_document_id(), | ||||||
|                     new_fields_ids_map.deref_mut(), |                     new_fields_ids_map.deref_mut(), | ||||||
|                     &mut |fid, depth, value| { |                     filterable_attributes, | ||||||
|  |                     sortable_fields, | ||||||
|  |                     asc_desc_fields, | ||||||
|  |                     distinct_field, | ||||||
|  |                     is_geo_enabled, | ||||||
|  |                     &mut |fid, meta, depth, value| { | ||||||
|                         Self::facet_fn_with_options( |                         Self::facet_fn_with_options( | ||||||
|                             &context.doc_alloc, |                             &context.doc_alloc, | ||||||
|                             cached_sorter.deref_mut(), |                             cached_sorter.deref_mut(), | ||||||
| @@ -120,6 +152,8 @@ impl FacetedDocidsExtractor { | |||||||
|                             DelAddFacetValue::insert_del, |                             DelAddFacetValue::insert_del, | ||||||
|                             docid, |                             docid, | ||||||
|                             fid, |                             fid, | ||||||
|  |                             meta, | ||||||
|  |                             filterable_attributes, | ||||||
|                             depth, |                             depth, | ||||||
|                             value, |                             value, | ||||||
|                         ) |                         ) | ||||||
| @@ -127,11 +161,15 @@ impl FacetedDocidsExtractor { | |||||||
|                 )?; |                 )?; | ||||||
|  |  | ||||||
|                 extract_document_facets( |                 extract_document_facets( | ||||||
|                     attributes_to_extract, |  | ||||||
|                     inner.merged(rtxn, index, context.db_fields_ids_map)?, |                     inner.merged(rtxn, index, context.db_fields_ids_map)?, | ||||||
|                     inner.external_document_id(), |                     inner.external_document_id(), | ||||||
|                     new_fields_ids_map.deref_mut(), |                     new_fields_ids_map.deref_mut(), | ||||||
|                     &mut |fid, depth, value| { |                     filterable_attributes, | ||||||
|  |                     sortable_fields, | ||||||
|  |                     asc_desc_fields, | ||||||
|  |                     distinct_field, | ||||||
|  |                     is_geo_enabled, | ||||||
|  |                     &mut |fid, meta, depth, value| { | ||||||
|                         Self::facet_fn_with_options( |                         Self::facet_fn_with_options( | ||||||
|                             &context.doc_alloc, |                             &context.doc_alloc, | ||||||
|                             cached_sorter.deref_mut(), |                             cached_sorter.deref_mut(), | ||||||
| @@ -140,6 +178,8 @@ impl FacetedDocidsExtractor { | |||||||
|                             DelAddFacetValue::insert_add, |                             DelAddFacetValue::insert_add, | ||||||
|                             docid, |                             docid, | ||||||
|                             fid, |                             fid, | ||||||
|  |                             meta, | ||||||
|  |                             filterable_attributes, | ||||||
|                             depth, |                             depth, | ||||||
|                             value, |                             value, | ||||||
|                         ) |                         ) | ||||||
| @@ -147,11 +187,15 @@ impl FacetedDocidsExtractor { | |||||||
|                 ) |                 ) | ||||||
|             } |             } | ||||||
|             DocumentChange::Insertion(inner) => extract_document_facets( |             DocumentChange::Insertion(inner) => extract_document_facets( | ||||||
|                 attributes_to_extract, |  | ||||||
|                 inner.inserted(), |                 inner.inserted(), | ||||||
|                 inner.external_document_id(), |                 inner.external_document_id(), | ||||||
|                 new_fields_ids_map.deref_mut(), |                 new_fields_ids_map.deref_mut(), | ||||||
|                 &mut |fid, depth, value| { |                 filterable_attributes, | ||||||
|  |                 sortable_fields, | ||||||
|  |                 asc_desc_fields, | ||||||
|  |                 distinct_field, | ||||||
|  |                 is_geo_enabled, | ||||||
|  |                 &mut |fid, meta, depth, value| { | ||||||
|                     Self::facet_fn_with_options( |                     Self::facet_fn_with_options( | ||||||
|                         &context.doc_alloc, |                         &context.doc_alloc, | ||||||
|                         cached_sorter.deref_mut(), |                         cached_sorter.deref_mut(), | ||||||
| @@ -160,6 +204,8 @@ impl FacetedDocidsExtractor { | |||||||
|                         DelAddFacetValue::insert_add, |                         DelAddFacetValue::insert_add, | ||||||
|                         docid, |                         docid, | ||||||
|                         fid, |                         fid, | ||||||
|  |                         meta, | ||||||
|  |                         filterable_attributes, | ||||||
|                         depth, |                         depth, | ||||||
|                         value, |                         value, | ||||||
|                     ) |                     ) | ||||||
| @@ -180,9 +226,18 @@ impl FacetedDocidsExtractor { | |||||||
|         facet_fn: impl Fn(&mut DelAddFacetValue<'doc>, FieldId, BVec<'doc, u8>, FacetKind), |         facet_fn: impl Fn(&mut DelAddFacetValue<'doc>, FieldId, BVec<'doc, u8>, FacetKind), | ||||||
|         docid: DocumentId, |         docid: DocumentId, | ||||||
|         fid: FieldId, |         fid: FieldId, | ||||||
|  |         meta: Metadata, | ||||||
|  |         filterable_attributes: &[FilterableAttributesRule], | ||||||
|         depth: perm_json_p::Depth, |         depth: perm_json_p::Depth, | ||||||
|         value: &Value, |         value: &Value, | ||||||
|     ) -> Result<()> { |     ) -> Result<()> { | ||||||
|  |         // if the field is not faceted, do nothing | ||||||
|  |         if !meta.is_faceted(filterable_attributes) { | ||||||
|  |             return Ok(()); | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         let features = meta.filterable_attributes_features(filterable_attributes); | ||||||
|  |  | ||||||
|         let mut buffer = BVec::new_in(doc_alloc); |         let mut buffer = BVec::new_in(doc_alloc); | ||||||
|         // Exists |         // Exists | ||||||
|         // key: fid |         // key: fid | ||||||
| @@ -246,7 +301,9 @@ impl FacetedDocidsExtractor { | |||||||
|             } |             } | ||||||
|             // Null |             // Null | ||||||
|             // key: fid |             // key: fid | ||||||
|             Value::Null if depth == perm_json_p::Depth::OnBaseKey => { |             Value::Null | ||||||
|  |                 if depth == perm_json_p::Depth::OnBaseKey && features.is_filterable_null() => | ||||||
|  |             { | ||||||
|                 buffer.clear(); |                 buffer.clear(); | ||||||
|                 buffer.push(FacetKind::Null as u8); |                 buffer.push(FacetKind::Null as u8); | ||||||
|                 buffer.extend_from_slice(&fid.to_be_bytes()); |                 buffer.extend_from_slice(&fid.to_be_bytes()); | ||||||
| @@ -254,19 +311,29 @@ impl FacetedDocidsExtractor { | |||||||
|             } |             } | ||||||
|             // Empty |             // Empty | ||||||
|             // key: fid |             // key: fid | ||||||
|             Value::Array(a) if a.is_empty() && depth == perm_json_p::Depth::OnBaseKey => { |             Value::Array(a) | ||||||
|  |                 if a.is_empty() | ||||||
|  |                     && depth == perm_json_p::Depth::OnBaseKey | ||||||
|  |                     && features.is_filterable_empty() => | ||||||
|  |             { | ||||||
|                 buffer.clear(); |                 buffer.clear(); | ||||||
|                 buffer.push(FacetKind::Empty as u8); |                 buffer.push(FacetKind::Empty as u8); | ||||||
|                 buffer.extend_from_slice(&fid.to_be_bytes()); |                 buffer.extend_from_slice(&fid.to_be_bytes()); | ||||||
|                 cache_fn(cached_sorter, &buffer, docid) |                 cache_fn(cached_sorter, &buffer, docid) | ||||||
|             } |             } | ||||||
|             Value::String(_) if depth == perm_json_p::Depth::OnBaseKey => { |             Value::String(_) | ||||||
|  |                 if depth == perm_json_p::Depth::OnBaseKey && features.is_filterable_empty() => | ||||||
|  |             { | ||||||
|                 buffer.clear(); |                 buffer.clear(); | ||||||
|                 buffer.push(FacetKind::Empty as u8); |                 buffer.push(FacetKind::Empty as u8); | ||||||
|                 buffer.extend_from_slice(&fid.to_be_bytes()); |                 buffer.extend_from_slice(&fid.to_be_bytes()); | ||||||
|                 cache_fn(cached_sorter, &buffer, docid) |                 cache_fn(cached_sorter, &buffer, docid) | ||||||
|             } |             } | ||||||
|             Value::Object(o) if o.is_empty() && depth == perm_json_p::Depth::OnBaseKey => { |             Value::Object(o) | ||||||
|  |                 if o.is_empty() | ||||||
|  |                     && depth == perm_json_p::Depth::OnBaseKey | ||||||
|  |                     && features.is_filterable_empty() => | ||||||
|  |             { | ||||||
|                 buffer.clear(); |                 buffer.clear(); | ||||||
|                 buffer.push(FacetKind::Empty as u8); |                 buffer.push(FacetKind::Empty as u8); | ||||||
|                 buffer.extend_from_slice(&fid.to_be_bytes()); |                 buffer.extend_from_slice(&fid.to_be_bytes()); | ||||||
| @@ -276,10 +343,6 @@ impl FacetedDocidsExtractor { | |||||||
|             _ => Ok(()), |             _ => Ok(()), | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     fn attributes_to_extract<'a>(rtxn: &'a RoTxn, index: &'a Index) -> Result<HashSet<String>> { |  | ||||||
|         index.user_defined_faceted_fields(rtxn) |  | ||||||
|     } |  | ||||||
| } | } | ||||||
|  |  | ||||||
| struct DelAddFacetValue<'doc> { | struct DelAddFacetValue<'doc> { | ||||||
| @@ -399,9 +462,11 @@ impl FacetedDocidsExtractor { | |||||||
|     { |     { | ||||||
|         let index = indexing_context.index; |         let index = indexing_context.index; | ||||||
|         let rtxn = index.read_txn()?; |         let rtxn = index.read_txn()?; | ||||||
|         let attributes_to_extract = Self::attributes_to_extract(&rtxn, index)?; |         let filterable_attributes = index.filterable_attributes_rules(&rtxn)?; | ||||||
|         let attributes_to_extract: Vec<_> = |         let sortable_fields = index.sortable_fields(&rtxn)?; | ||||||
|             attributes_to_extract.iter().map(|s| s.as_ref()).collect(); |         let asc_desc_fields = index.asc_desc_fields(&rtxn)?; | ||||||
|  |         let distinct_field = index.distinct_field(&rtxn)?.map(|s| s.to_string()); | ||||||
|  |         let is_geo_enabled = index.is_geo_enabled(&rtxn)?; | ||||||
|         let datastore = ThreadLocal::new(); |         let datastore = ThreadLocal::new(); | ||||||
|  |  | ||||||
|         { |         { | ||||||
| @@ -410,10 +475,14 @@ impl FacetedDocidsExtractor { | |||||||
|             let _entered = span.enter(); |             let _entered = span.enter(); | ||||||
|  |  | ||||||
|             let extractor = FacetedExtractorData { |             let extractor = FacetedExtractorData { | ||||||
|                 attributes_to_extract: &attributes_to_extract, |  | ||||||
|                 grenad_parameters: indexing_context.grenad_parameters, |                 grenad_parameters: indexing_context.grenad_parameters, | ||||||
|                 buckets: rayon::current_num_threads(), |                 buckets: rayon::current_num_threads(), | ||||||
|                 sender, |                 sender, | ||||||
|  |                 filterable_attributes, | ||||||
|  |                 sortable_fields, | ||||||
|  |                 asc_desc_fields, | ||||||
|  |                 distinct_field, | ||||||
|  |                 is_geo_enabled, | ||||||
|             }; |             }; | ||||||
|             extract( |             extract( | ||||||
|                 document_changes, |                 document_changes, | ||||||
|   | |||||||
| @@ -1,46 +1,80 @@ | |||||||
|  | use std::collections::HashSet; | ||||||
|  |  | ||||||
| use serde_json::Value; | use serde_json::Value; | ||||||
|  |  | ||||||
| use crate::constants::RESERVED_GEO_FIELD_NAME; | use crate::attribute_patterns::PatternMatch; | ||||||
|  | use crate::fields_ids_map::metadata::Metadata; | ||||||
| use crate::update::new::document::Document; | use crate::update::new::document::Document; | ||||||
| use crate::update::new::extract::geo::extract_geo_coordinates; | use crate::update::new::extract::geo::extract_geo_coordinates; | ||||||
| use crate::update::new::extract::perm_json_p; | use crate::update::new::extract::perm_json_p; | ||||||
| use crate::{FieldId, GlobalFieldsIdsMap, InternalError, Result, UserError}; | use crate::{ | ||||||
|  |     FieldId, FilterableAttributesRule, GlobalFieldsIdsMap, InternalError, Result, UserError, | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | use crate::filterable_attributes_rules::match_faceted_field; | ||||||
|  |  | ||||||
|  | #[allow(clippy::too_many_arguments)] | ||||||
| pub fn extract_document_facets<'doc>( | pub fn extract_document_facets<'doc>( | ||||||
|     attributes_to_extract: &[&str], |  | ||||||
|     document: impl Document<'doc>, |     document: impl Document<'doc>, | ||||||
|     external_document_id: &str, |     external_document_id: &str, | ||||||
|     field_id_map: &mut GlobalFieldsIdsMap, |     field_id_map: &mut GlobalFieldsIdsMap, | ||||||
|     facet_fn: &mut impl FnMut(FieldId, perm_json_p::Depth, &Value) -> Result<()>, |     filterable_attributes: &[FilterableAttributesRule], | ||||||
|  |     sortable_fields: &HashSet<String>, | ||||||
|  |     asc_desc_fields: &HashSet<String>, | ||||||
|  |     distinct_field: &Option<String>, | ||||||
|  |     is_geo_enabled: bool, | ||||||
|  |     facet_fn: &mut impl FnMut(FieldId, Metadata, perm_json_p::Depth, &Value) -> Result<()>, | ||||||
| ) -> Result<()> { | ) -> Result<()> { | ||||||
|  |     // return the match result for the given field name. | ||||||
|  |     let match_field = |field_name: &str| -> PatternMatch { | ||||||
|  |         match_faceted_field( | ||||||
|  |             field_name, | ||||||
|  |             filterable_attributes, | ||||||
|  |             sortable_fields, | ||||||
|  |             asc_desc_fields, | ||||||
|  |             distinct_field, | ||||||
|  |         ) | ||||||
|  |     }; | ||||||
|  |  | ||||||
|  |     // extract the field if it is faceted (facet searchable, filterable, sortable) | ||||||
|  |     let mut extract_field = |name: &str, depth: perm_json_p::Depth, value: &Value| -> Result<()> { | ||||||
|  |         match field_id_map.id_with_metadata_or_insert(name) { | ||||||
|  |             Some((field_id, meta)) => { | ||||||
|  |                 facet_fn(field_id, meta, depth, value)?; | ||||||
|  |  | ||||||
|  |                 Ok(()) | ||||||
|  |             } | ||||||
|  |             None => Err(UserError::AttributeLimitReached.into()), | ||||||
|  |         } | ||||||
|  |     }; | ||||||
|  |  | ||||||
|     for res in document.iter_top_level_fields() { |     for res in document.iter_top_level_fields() { | ||||||
|         let (field_name, value) = res?; |         let (field_name, value) = res?; | ||||||
|  |         let selection = match_field(field_name); | ||||||
|  |  | ||||||
|         let mut tokenize_field = |         // extract the field if it matches a pattern and if it is faceted (facet searchable, filterable, sortable) | ||||||
|             |name: &str, depth: perm_json_p::Depth, value: &Value| match field_id_map |         let mut match_and_extract = |name: &str, depth: perm_json_p::Depth, value: &Value| { | ||||||
|                 .id_or_insert(name) |             let selection = match_field(name); | ||||||
|             { |             if selection == PatternMatch::Match { | ||||||
|                 Some(field_id) => facet_fn(field_id, depth, value), |                 extract_field(name, depth, value)?; | ||||||
|                 None => Err(UserError::AttributeLimitReached.into()), |             } | ||||||
|             }; |  | ||||||
|  |  | ||||||
|         // if the current field is searchable or contains a searchable attribute |             Ok(selection) | ||||||
|         let selection = perm_json_p::select_field(field_name, Some(attributes_to_extract), &[]); |         }; | ||||||
|         if selection != perm_json_p::Selection::Skip { |  | ||||||
|  |         if selection != PatternMatch::NoMatch { | ||||||
|             // parse json. |             // parse json. | ||||||
|             match serde_json::value::to_value(value).map_err(InternalError::SerdeJson)? { |             match serde_json::value::to_value(value).map_err(InternalError::SerdeJson)? { | ||||||
|                 Value::Object(object) => { |                 Value::Object(object) => { | ||||||
|                     perm_json_p::seek_leaf_values_in_object( |                     perm_json_p::seek_leaf_values_in_object( | ||||||
|                         &object, |                         &object, | ||||||
|                         Some(attributes_to_extract), |  | ||||||
|                         &[], // skip no attributes |  | ||||||
|                         field_name, |                         field_name, | ||||||
|                         perm_json_p::Depth::OnBaseKey, |                         perm_json_p::Depth::OnBaseKey, | ||||||
|                         &mut tokenize_field, |                         &mut match_and_extract, | ||||||
|                     )?; |                     )?; | ||||||
|  |  | ||||||
|                     if selection == perm_json_p::Selection::Select { |                     if selection == PatternMatch::Match { | ||||||
|                         tokenize_field( |                         extract_field( | ||||||
|                             field_name, |                             field_name, | ||||||
|                             perm_json_p::Depth::OnBaseKey, |                             perm_json_p::Depth::OnBaseKey, | ||||||
|                             &Value::Object(object), |                             &Value::Object(object), | ||||||
| @@ -50,36 +84,34 @@ pub fn extract_document_facets<'doc>( | |||||||
|                 Value::Array(array) => { |                 Value::Array(array) => { | ||||||
|                     perm_json_p::seek_leaf_values_in_array( |                     perm_json_p::seek_leaf_values_in_array( | ||||||
|                         &array, |                         &array, | ||||||
|                         Some(attributes_to_extract), |  | ||||||
|                         &[], // skip no attributes |  | ||||||
|                         field_name, |                         field_name, | ||||||
|                         perm_json_p::Depth::OnBaseKey, |                         perm_json_p::Depth::OnBaseKey, | ||||||
|                         &mut tokenize_field, |                         &mut match_and_extract, | ||||||
|                     )?; |                     )?; | ||||||
|  |  | ||||||
|                     if selection == perm_json_p::Selection::Select { |                     if selection == PatternMatch::Match { | ||||||
|                         tokenize_field( |                         extract_field( | ||||||
|                             field_name, |                             field_name, | ||||||
|                             perm_json_p::Depth::OnBaseKey, |                             perm_json_p::Depth::OnBaseKey, | ||||||
|                             &Value::Array(array), |                             &Value::Array(array), | ||||||
|                         )?; |                         )?; | ||||||
|                     } |                     } | ||||||
|                 } |                 } | ||||||
|                 value => tokenize_field(field_name, perm_json_p::Depth::OnBaseKey, &value)?, |                 value => extract_field(field_name, perm_json_p::Depth::OnBaseKey, &value)?, | ||||||
|             } |             } | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     if attributes_to_extract.contains(&RESERVED_GEO_FIELD_NAME) { |     if is_geo_enabled { | ||||||
|         if let Some(geo_value) = document.geo_field()? { |         if let Some(geo_value) = document.geo_field()? { | ||||||
|             if let Some([lat, lng]) = extract_geo_coordinates(external_document_id, geo_value)? { |             if let Some([lat, lng]) = extract_geo_coordinates(external_document_id, geo_value)? { | ||||||
|                 let (lat_fid, lng_fid) = field_id_map |                 let ((lat_fid, lat_meta), (lng_fid, lng_meta)) = field_id_map | ||||||
|                     .id_or_insert("_geo.lat") |                     .id_with_metadata_or_insert("_geo.lat") | ||||||
|                     .zip(field_id_map.id_or_insert("_geo.lng")) |                     .zip(field_id_map.id_with_metadata_or_insert("_geo.lng")) | ||||||
|                     .ok_or(UserError::AttributeLimitReached)?; |                     .ok_or(UserError::AttributeLimitReached)?; | ||||||
|  |  | ||||||
|                 facet_fn(lat_fid, perm_json_p::Depth::OnBaseKey, &lat.into())?; |                 facet_fn(lat_fid, lat_meta, perm_json_p::Depth::OnBaseKey, &lat.into())?; | ||||||
|                 facet_fn(lng_fid, perm_json_p::Depth::OnBaseKey, &lng.into())?; |                 facet_fn(lng_fid, lng_meta, perm_json_p::Depth::OnBaseKey, &lng.into())?; | ||||||
|             } |             } | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|   | |||||||
| @@ -9,7 +9,6 @@ use heed::RoTxn; | |||||||
| use serde_json::value::RawValue; | use serde_json::value::RawValue; | ||||||
| use serde_json::Value; | use serde_json::Value; | ||||||
|  |  | ||||||
| use crate::constants::RESERVED_GEO_FIELD_NAME; |  | ||||||
| use crate::error::GeoError; | use crate::error::GeoError; | ||||||
| use crate::update::new::document::Document; | use crate::update::new::document::Document; | ||||||
| use crate::update::new::indexer::document_changes::{DocumentChangeContext, Extractor}; | use crate::update::new::indexer::document_changes::{DocumentChangeContext, Extractor}; | ||||||
| @@ -29,9 +28,7 @@ impl GeoExtractor { | |||||||
|         index: &Index, |         index: &Index, | ||||||
|         grenad_parameters: GrenadParameters, |         grenad_parameters: GrenadParameters, | ||||||
|     ) -> Result<Option<Self>> { |     ) -> Result<Option<Self>> { | ||||||
|         let is_sortable = index.sortable_fields(rtxn)?.contains(RESERVED_GEO_FIELD_NAME); |         if index.is_geo_enabled(rtxn)? { | ||||||
|         let is_filterable = index.filterable_fields(rtxn)?.contains(RESERVED_GEO_FIELD_NAME); |  | ||||||
|         if is_sortable || is_filterable { |  | ||||||
|             Ok(Some(GeoExtractor { grenad_parameters })) |             Ok(Some(GeoExtractor { grenad_parameters })) | ||||||
|         } else { |         } else { | ||||||
|             Ok(None) |             Ok(None) | ||||||
|   | |||||||
| @@ -5,7 +5,6 @@ mod geo; | |||||||
| mod searchable; | mod searchable; | ||||||
| mod vectors; | mod vectors; | ||||||
|  |  | ||||||
| use bumpalo::Bump; |  | ||||||
| pub use cache::{ | pub use cache::{ | ||||||
|     merge_caches_sorted, transpose_and_freeze_caches, BalancedCaches, DelAddRoaringBitmap, |     merge_caches_sorted, transpose_and_freeze_caches, BalancedCaches, DelAddRoaringBitmap, | ||||||
| }; | }; | ||||||
| @@ -15,27 +14,11 @@ pub use geo::*; | |||||||
| pub use searchable::*; | pub use searchable::*; | ||||||
| pub use vectors::EmbeddingExtractor; | pub use vectors::EmbeddingExtractor; | ||||||
|  |  | ||||||
| use super::indexer::document_changes::{DocumentChanges, IndexingContext}; |  | ||||||
| use super::steps::IndexingStep; |  | ||||||
| use super::thread_local::{FullySend, ThreadLocal}; |  | ||||||
| use crate::Result; |  | ||||||
|  |  | ||||||
| pub trait DocidsExtractor { |  | ||||||
|     fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP>( |  | ||||||
|         document_changes: &DC, |  | ||||||
|         indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP>, |  | ||||||
|         extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>, |  | ||||||
|         step: IndexingStep, |  | ||||||
|     ) -> Result<Vec<BalancedCaches<'extractor>>> |  | ||||||
|     where |  | ||||||
|         MSP: Fn() -> bool + Sync; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| /// TODO move in permissive json pointer | /// TODO move in permissive json pointer | ||||||
| pub mod perm_json_p { | pub mod perm_json_p { | ||||||
|     use serde_json::{Map, Value}; |     use serde_json::{Map, Value}; | ||||||
|  |  | ||||||
|     use crate::Result; |     use crate::{attribute_patterns::PatternMatch, Result}; | ||||||
|     const SPLIT_SYMBOL: char = '.'; |     const SPLIT_SYMBOL: char = '.'; | ||||||
|  |  | ||||||
|     /// Returns `true` if the `selector` match the `key`. |     /// Returns `true` if the `selector` match the `key`. | ||||||
| @@ -68,11 +51,9 @@ pub mod perm_json_p { | |||||||
|  |  | ||||||
|     pub fn seek_leaf_values_in_object( |     pub fn seek_leaf_values_in_object( | ||||||
|         value: &Map<String, Value>, |         value: &Map<String, Value>, | ||||||
|         selectors: Option<&[&str]>, |  | ||||||
|         skip_selectors: &[&str], |  | ||||||
|         base_key: &str, |         base_key: &str, | ||||||
|         base_depth: Depth, |         base_depth: Depth, | ||||||
|         seeker: &mut impl FnMut(&str, Depth, &Value) -> Result<()>, |         seeker: &mut impl FnMut(&str, Depth, &Value) -> Result<PatternMatch>, | ||||||
|     ) -> Result<()> { |     ) -> Result<()> { | ||||||
|         if value.is_empty() { |         if value.is_empty() { | ||||||
|             seeker(base_key, base_depth, &Value::Object(Map::with_capacity(0)))?; |             seeker(base_key, base_depth, &Value::Object(Map::with_capacity(0)))?; | ||||||
| @@ -85,40 +66,16 @@ pub mod perm_json_p { | |||||||
|                 format!("{}{}{}", base_key, SPLIT_SYMBOL, key) |                 format!("{}{}{}", base_key, SPLIT_SYMBOL, key) | ||||||
|             }; |             }; | ||||||
|  |  | ||||||
|             // here if the user only specified `doggo` we need to iterate in all the fields of `doggo` |             let selection = seeker(&base_key, Depth::OnBaseKey, value)?; | ||||||
|             // so we check the contained_in on both side |             if selection != PatternMatch::NoMatch { | ||||||
|             let selection = select_field(&base_key, selectors, skip_selectors); |  | ||||||
|             if selection != Selection::Skip { |  | ||||||
|                 match value { |                 match value { | ||||||
|                     Value::Object(object) => { |                     Value::Object(object) => { | ||||||
|                         if selection == Selection::Select { |                         seek_leaf_values_in_object(object, &base_key, Depth::OnBaseKey, seeker) | ||||||
|                             seeker(&base_key, Depth::OnBaseKey, value)?; |  | ||||||
|                         } |  | ||||||
|  |  | ||||||
|                         seek_leaf_values_in_object( |  | ||||||
|                             object, |  | ||||||
|                             selectors, |  | ||||||
|                             skip_selectors, |  | ||||||
|                             &base_key, |  | ||||||
|                             Depth::OnBaseKey, |  | ||||||
|                             seeker, |  | ||||||
|                         ) |  | ||||||
|                     } |                     } | ||||||
|                     Value::Array(array) => { |                     Value::Array(array) => { | ||||||
|                         if selection == Selection::Select { |                         seek_leaf_values_in_array(array, &base_key, Depth::OnBaseKey, seeker) | ||||||
|                             seeker(&base_key, Depth::OnBaseKey, value)?; |  | ||||||
|                         } |  | ||||||
|  |  | ||||||
|                         seek_leaf_values_in_array( |  | ||||||
|                             array, |  | ||||||
|                             selectors, |  | ||||||
|                             skip_selectors, |  | ||||||
|                             &base_key, |  | ||||||
|                             Depth::OnBaseKey, |  | ||||||
|                             seeker, |  | ||||||
|                         ) |  | ||||||
|                     } |                     } | ||||||
|                     value => seeker(&base_key, Depth::OnBaseKey, value), |                     _ => Ok(()), | ||||||
|                 }?; |                 }?; | ||||||
|             } |             } | ||||||
|         } |         } | ||||||
| @@ -128,11 +85,9 @@ pub mod perm_json_p { | |||||||
|  |  | ||||||
|     pub fn seek_leaf_values_in_array( |     pub fn seek_leaf_values_in_array( | ||||||
|         values: &[Value], |         values: &[Value], | ||||||
|         selectors: Option<&[&str]>, |  | ||||||
|         skip_selectors: &[&str], |  | ||||||
|         base_key: &str, |         base_key: &str, | ||||||
|         base_depth: Depth, |         base_depth: Depth, | ||||||
|         seeker: &mut impl FnMut(&str, Depth, &Value) -> Result<()>, |         seeker: &mut impl FnMut(&str, Depth, &Value) -> Result<PatternMatch>, | ||||||
|     ) -> Result<()> { |     ) -> Result<()> { | ||||||
|         if values.is_empty() { |         if values.is_empty() { | ||||||
|             seeker(base_key, base_depth, &Value::Array(vec![]))?; |             seeker(base_key, base_depth, &Value::Array(vec![]))?; | ||||||
| @@ -140,61 +95,16 @@ pub mod perm_json_p { | |||||||
|  |  | ||||||
|         for value in values { |         for value in values { | ||||||
|             match value { |             match value { | ||||||
|                 Value::Object(object) => seek_leaf_values_in_object( |                 Value::Object(object) => { | ||||||
|                     object, |                     seek_leaf_values_in_object(object, base_key, Depth::InsideArray, seeker) | ||||||
|                     selectors, |                 } | ||||||
|                     skip_selectors, |                 Value::Array(array) => { | ||||||
|                     base_key, |                     seek_leaf_values_in_array(array, base_key, Depth::InsideArray, seeker) | ||||||
|                     Depth::InsideArray, |                 } | ||||||
|                     seeker, |                 value => seeker(base_key, Depth::InsideArray, value).map(|_| ()), | ||||||
|                 ), |  | ||||||
|                 Value::Array(array) => seek_leaf_values_in_array( |  | ||||||
|                     array, |  | ||||||
|                     selectors, |  | ||||||
|                     skip_selectors, |  | ||||||
|                     base_key, |  | ||||||
|                     Depth::InsideArray, |  | ||||||
|                     seeker, |  | ||||||
|                 ), |  | ||||||
|                 value => seeker(base_key, Depth::InsideArray, value), |  | ||||||
|             }?; |             }?; | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         Ok(()) |         Ok(()) | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     pub fn select_field( |  | ||||||
|         field_name: &str, |  | ||||||
|         selectors: Option<&[&str]>, |  | ||||||
|         skip_selectors: &[&str], |  | ||||||
|     ) -> Selection { |  | ||||||
|         if skip_selectors.iter().any(|skip_selector| { |  | ||||||
|             contained_in(skip_selector, field_name) || contained_in(field_name, skip_selector) |  | ||||||
|         }) { |  | ||||||
|             Selection::Skip |  | ||||||
|         } else if let Some(selectors) = selectors { |  | ||||||
|             let mut selection = Selection::Skip; |  | ||||||
|             for selector in selectors { |  | ||||||
|                 if contained_in(field_name, selector) { |  | ||||||
|                     selection = Selection::Select; |  | ||||||
|                     break; |  | ||||||
|                 } else if contained_in(selector, field_name) { |  | ||||||
|                     selection = Selection::Parent; |  | ||||||
|                 } |  | ||||||
|             } |  | ||||||
|             selection |  | ||||||
|         } else { |  | ||||||
|             Selection::Select |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     #[derive(Debug, Clone, Copy, PartialEq, Eq)] |  | ||||||
|     pub enum Selection { |  | ||||||
|         /// The field is a parent of the of a nested field that must be selected |  | ||||||
|         Parent, |  | ||||||
|         /// The field must be selected |  | ||||||
|         Select, |  | ||||||
|         /// The field must be skipped |  | ||||||
|         Skip, |  | ||||||
|     } |  | ||||||
| } | } | ||||||
|   | |||||||
| @@ -9,12 +9,14 @@ use heed::{BytesDecode, BytesEncode, RoTxn, RwTxn}; | |||||||
|  |  | ||||||
| use super::fst_merger_builder::FstMergerBuilder; | use super::fst_merger_builder::FstMergerBuilder; | ||||||
| use super::KvReaderDelAdd; | use super::KvReaderDelAdd; | ||||||
|  | use crate::attribute_patterns::PatternMatch; | ||||||
| use crate::heed_codec::facet::FacetGroupKey; | use crate::heed_codec::facet::FacetGroupKey; | ||||||
| use crate::update::del_add::{DelAdd, KvWriterDelAdd}; | use crate::update::del_add::{DelAdd, KvWriterDelAdd}; | ||||||
| use crate::update::{create_sorter, MergeDeladdBtreesetString}; | use crate::update::{create_sorter, MergeDeladdBtreesetString}; | ||||||
| use crate::{ | use crate::{ | ||||||
|     BEU16StrCodec, FieldId, GlobalFieldsIdsMap, Index, LocalizedAttributesRule, Result, |     BEU16StrCodec, FieldId, FieldIdMapMissingEntry, FilterableAttributesFeatures, | ||||||
|     MAX_FACET_VALUE_LENGTH, |     FilterableAttributesRule, GlobalFieldsIdsMap, Index, InternalError, LocalizedAttributesRule, | ||||||
|  |     Result, MAX_FACET_VALUE_LENGTH, | ||||||
| }; | }; | ||||||
|  |  | ||||||
| pub struct FacetSearchBuilder<'indexer> { | pub struct FacetSearchBuilder<'indexer> { | ||||||
| @@ -22,6 +24,7 @@ pub struct FacetSearchBuilder<'indexer> { | |||||||
|     normalized_facet_string_docids_sorter: Sorter<MergeDeladdBtreesetString>, |     normalized_facet_string_docids_sorter: Sorter<MergeDeladdBtreesetString>, | ||||||
|     global_fields_ids_map: GlobalFieldsIdsMap<'indexer>, |     global_fields_ids_map: GlobalFieldsIdsMap<'indexer>, | ||||||
|     localized_attributes_rules: Vec<LocalizedAttributesRule>, |     localized_attributes_rules: Vec<LocalizedAttributesRule>, | ||||||
|  |     filterable_attributes_rules: Vec<FilterableAttributesRule>, | ||||||
|     // Buffered data below |     // Buffered data below | ||||||
|     buffer: Vec<u8>, |     buffer: Vec<u8>, | ||||||
|     localized_field_ids: HashMap<FieldId, Option<Vec<Language>>>, |     localized_field_ids: HashMap<FieldId, Option<Vec<Language>>>, | ||||||
| @@ -31,6 +34,7 @@ impl<'indexer> FacetSearchBuilder<'indexer> { | |||||||
|     pub fn new( |     pub fn new( | ||||||
|         global_fields_ids_map: GlobalFieldsIdsMap<'indexer>, |         global_fields_ids_map: GlobalFieldsIdsMap<'indexer>, | ||||||
|         localized_attributes_rules: Vec<LocalizedAttributesRule>, |         localized_attributes_rules: Vec<LocalizedAttributesRule>, | ||||||
|  |         filterable_attributes_rules: Vec<FilterableAttributesRule>, | ||||||
|     ) -> Self { |     ) -> Self { | ||||||
|         let registered_facets = HashMap::new(); |         let registered_facets = HashMap::new(); | ||||||
|         let normalized_facet_string_docids_sorter = create_sorter( |         let normalized_facet_string_docids_sorter = create_sorter( | ||||||
| @@ -49,6 +53,7 @@ impl<'indexer> FacetSearchBuilder<'indexer> { | |||||||
|             buffer: Vec::new(), |             buffer: Vec::new(), | ||||||
|             global_fields_ids_map, |             global_fields_ids_map, | ||||||
|             localized_attributes_rules, |             localized_attributes_rules, | ||||||
|  |             filterable_attributes_rules, | ||||||
|             localized_field_ids: HashMap::new(), |             localized_field_ids: HashMap::new(), | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| @@ -60,6 +65,13 @@ impl<'indexer> FacetSearchBuilder<'indexer> { | |||||||
|     ) -> Result<()> { |     ) -> Result<()> { | ||||||
|         let FacetGroupKey { field_id, level: _level, left_bound } = facet_key; |         let FacetGroupKey { field_id, level: _level, left_bound } = facet_key; | ||||||
|  |  | ||||||
|  |         let filterable_attributes_features = self.filterable_attributes_features(field_id)?; | ||||||
|  |  | ||||||
|  |         // if facet search is disabled, we don't need to register the facet | ||||||
|  |         if !filterable_attributes_features.is_facet_searchable() { | ||||||
|  |             return Ok(()); | ||||||
|  |         }; | ||||||
|  |  | ||||||
|         if deladd == DelAdd::Addition { |         if deladd == DelAdd::Addition { | ||||||
|             self.registered_facets.entry(field_id).and_modify(|count| *count += 1).or_insert(1); |             self.registered_facets.entry(field_id).and_modify(|count| *count += 1).or_insert(1); | ||||||
|         } |         } | ||||||
| @@ -83,6 +95,24 @@ impl<'indexer> FacetSearchBuilder<'indexer> { | |||||||
|         Ok(()) |         Ok(()) | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     fn filterable_attributes_features( | ||||||
|  |         &mut self, | ||||||
|  |         field_id: u16, | ||||||
|  |     ) -> Result<FilterableAttributesFeatures> { | ||||||
|  |         let Some(filterable_attributes_features) = | ||||||
|  |             self.global_fields_ids_map.metadata(field_id).map(|metadata| { | ||||||
|  |                 metadata.filterable_attributes_features(&self.filterable_attributes_rules) | ||||||
|  |             }) | ||||||
|  |         else { | ||||||
|  |             return Err(InternalError::FieldIdMapMissingEntry(FieldIdMapMissingEntry::FieldId { | ||||||
|  |                 field_id, | ||||||
|  |                 process: "facet_search_builder::register_from_key", | ||||||
|  |             }) | ||||||
|  |             .into()); | ||||||
|  |         }; | ||||||
|  |         Ok(filterable_attributes_features) | ||||||
|  |     } | ||||||
|  |  | ||||||
|     fn locales(&mut self, field_id: FieldId) -> Option<&[Language]> { |     fn locales(&mut self, field_id: FieldId) -> Option<&[Language]> { | ||||||
|         if let Entry::Vacant(e) = self.localized_field_ids.entry(field_id) { |         if let Entry::Vacant(e) = self.localized_field_ids.entry(field_id) { | ||||||
|             let Some(field_name) = self.global_fields_ids_map.name(field_id) else { |             let Some(field_name) = self.global_fields_ids_map.name(field_id) else { | ||||||
| @@ -92,7 +122,7 @@ impl<'indexer> FacetSearchBuilder<'indexer> { | |||||||
|             let locales = self |             let locales = self | ||||||
|                 .localized_attributes_rules |                 .localized_attributes_rules | ||||||
|                 .iter() |                 .iter() | ||||||
|                 .find(|rule| rule.match_str(field_name)) |                 .find(|rule| rule.match_str(field_name) == PatternMatch::Match) | ||||||
|                 .map(|rule| rule.locales.clone()); |                 .map(|rule| rule.locales.clone()); | ||||||
|  |  | ||||||
|             e.insert(locales); |             e.insert(locales); | ||||||
|   | |||||||
| @@ -33,10 +33,8 @@ where | |||||||
| { | { | ||||||
|     let index = indexing_context.index; |     let index = indexing_context.index; | ||||||
|     indexing_context.progress.update_progress(IndexingStep::PostProcessingFacets); |     indexing_context.progress.update_progress(IndexingStep::PostProcessingFacets); | ||||||
|     if index.facet_search(wtxn)? { |     compute_facet_level_database(index, wtxn, facet_field_ids_delta, &global_fields_ids_map)?; | ||||||
|         compute_facet_search_database(index, wtxn, global_fields_ids_map)?; |     compute_facet_search_database(index, wtxn, global_fields_ids_map)?; | ||||||
|     } |  | ||||||
|     compute_facet_level_database(index, wtxn, facet_field_ids_delta)?; |  | ||||||
|     indexing_context.progress.update_progress(IndexingStep::PostProcessingWords); |     indexing_context.progress.update_progress(IndexingStep::PostProcessingWords); | ||||||
|     if let Some(prefix_delta) = compute_word_fst(index, wtxn)? { |     if let Some(prefix_delta) = compute_word_fst(index, wtxn)? { | ||||||
|         compute_prefix_database(index, wtxn, prefix_delta, indexing_context.grenad_parameters)?; |         compute_prefix_database(index, wtxn, prefix_delta, indexing_context.grenad_parameters)?; | ||||||
| @@ -116,10 +114,18 @@ fn compute_facet_search_database( | |||||||
|     global_fields_ids_map: GlobalFieldsIdsMap, |     global_fields_ids_map: GlobalFieldsIdsMap, | ||||||
| ) -> Result<()> { | ) -> Result<()> { | ||||||
|     let rtxn = index.read_txn()?; |     let rtxn = index.read_txn()?; | ||||||
|  |  | ||||||
|  |     // if the facet search is not enabled, we can skip the rest of the function | ||||||
|  |     if !index.facet_search(wtxn)? { | ||||||
|  |         return Ok(()); | ||||||
|  |     } | ||||||
|  |  | ||||||
|     let localized_attributes_rules = index.localized_attributes_rules(&rtxn)?; |     let localized_attributes_rules = index.localized_attributes_rules(&rtxn)?; | ||||||
|  |     let filterable_attributes_rules = index.filterable_attributes_rules(&rtxn)?; | ||||||
|     let mut facet_search_builder = FacetSearchBuilder::new( |     let mut facet_search_builder = FacetSearchBuilder::new( | ||||||
|         global_fields_ids_map, |         global_fields_ids_map, | ||||||
|         localized_attributes_rules.unwrap_or_default(), |         localized_attributes_rules.unwrap_or_default(), | ||||||
|  |         filterable_attributes_rules, | ||||||
|     ); |     ); | ||||||
|  |  | ||||||
|     let previous_facet_id_string_docids = index |     let previous_facet_id_string_docids = index | ||||||
| @@ -164,8 +170,19 @@ fn compute_facet_level_database( | |||||||
|     index: &Index, |     index: &Index, | ||||||
|     wtxn: &mut RwTxn, |     wtxn: &mut RwTxn, | ||||||
|     mut facet_field_ids_delta: FacetFieldIdsDelta, |     mut facet_field_ids_delta: FacetFieldIdsDelta, | ||||||
|  |     global_fields_ids_map: &GlobalFieldsIdsMap, | ||||||
| ) -> Result<()> { | ) -> Result<()> { | ||||||
|  |     let rtxn = index.read_txn()?; | ||||||
|  |     let filterable_attributes_rules = index.filterable_attributes_rules(&rtxn)?; | ||||||
|     for (fid, delta) in facet_field_ids_delta.consume_facet_string_delta() { |     for (fid, delta) in facet_field_ids_delta.consume_facet_string_delta() { | ||||||
|  |         // skip field ids that should not be facet leveled | ||||||
|  |         let Some(metadata) = global_fields_ids_map.metadata(fid) else { | ||||||
|  |             continue; | ||||||
|  |         }; | ||||||
|  |         if !metadata.require_facet_level_database(&filterable_attributes_rules) { | ||||||
|  |             continue; | ||||||
|  |         } | ||||||
|  |  | ||||||
|         let span = tracing::trace_span!(target: "indexing::facet_field_ids", "string"); |         let span = tracing::trace_span!(target: "indexing::facet_field_ids", "string"); | ||||||
|         let _entered = span.enter(); |         let _entered = span.enter(); | ||||||
|         match delta { |         match delta { | ||||||
|   | |||||||
| @@ -137,7 +137,6 @@ pub(super) fn update_index( | |||||||
|         index.put_primary_key(wtxn, new_primary_key.name())?; |         index.put_primary_key(wtxn, new_primary_key.name())?; | ||||||
|     } |     } | ||||||
|     let mut inner_index_settings = InnerIndexSettings::from_index(index, wtxn, Some(embedders))?; |     let mut inner_index_settings = InnerIndexSettings::from_index(index, wtxn, Some(embedders))?; | ||||||
|     inner_index_settings.recompute_facets(wtxn, index)?; |  | ||||||
|     inner_index_settings.recompute_searchables(wtxn, index)?; |     inner_index_settings.recompute_searchables(wtxn, index)?; | ||||||
|     index.put_field_distribution(wtxn, &field_distribution)?; |     index.put_field_distribution(wtxn, &field_distribution)?; | ||||||
|     index.put_documents_ids(wtxn, &document_ids)?; |     index.put_documents_ids(wtxn, &document_ids)?; | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user