mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-30 23:46:28 +00:00 
			
		
		
		
	Refactor Settings Indexing process
**Changes:** The transform structure is now relying on FieldIdMapWithMetadata and AttributePatterns to prepare the obkv documents during a settings reindexing. The InnerIndexSettingsDiff and InnerIndexSettings structs are now relying on FieldIdMapWithMetadata, FilterableAttributesRule and AttributePatterns to define the field and the databases that should be reindexed. The faceted_fields_ids, localized_searchable_fields_ids and localized_faceted_fields_ids have been removed in favor of the FieldIdMapWithMetadata. We are now relying on the FieldIdMapWithMetadata to retain vectors_fids from the facets and the searchables. The searchable database computing is now relying on the FieldIdMapWithMetadata to know if a field is searchable and retrieve the locales. The facet database computing is now relying on the FieldIdMapWithMetadata to compute the facet databases, the facet-search and retrieve the locales. The facet level database computing is now relying on the FieldIdMapWithMetadata and the facet level database are cleared depending on the settings differences (clear_facet_levels_based_on_settings_diff). The vector point extraction uses the FieldIdMapWithMetadata instead of FieldsIdsMapWithMetadata. **Impact:** - Dump import - Settings update
This commit is contained in:
		| @@ -81,6 +81,17 @@ pub enum DelAddOperation { | ||||
|     DeletionAndAddition, | ||||
| } | ||||
|  | ||||
| impl DelAddOperation { | ||||
|     /// Merge two DelAddOperation enum variants. | ||||
|     pub fn merge(self, other: Self) -> Self { | ||||
|         match (self, other) { | ||||
|             (Self::Deletion, Self::Deletion) => Self::Deletion, | ||||
|             (Self::Addition, Self::Addition) => Self::Addition, | ||||
|             _ => Self::DeletionAndAddition, | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| /// Creates a Kv<K, Kv<DelAdd, value>> from two Kv<K, value> | ||||
| /// | ||||
| /// putting each deletion obkv's keys under an DelAdd::Deletion | ||||
|   | ||||
| @@ -6,7 +6,7 @@ use heed::types::Bytes; | ||||
| use heed::{BytesDecode, BytesEncode, Error, PutFlags, RoTxn, RwTxn}; | ||||
| use roaring::RoaringBitmap; | ||||
|  | ||||
| use super::{FACET_GROUP_SIZE, FACET_MIN_LEVEL_SIZE}; | ||||
| use super::{clear_facet_levels, FACET_GROUP_SIZE, FACET_MIN_LEVEL_SIZE}; | ||||
| use crate::facet::FacetType; | ||||
| use crate::heed_codec::facet::{ | ||||
|     FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec, | ||||
| @@ -97,9 +97,7 @@ pub(crate) struct FacetsUpdateBulkInner<R: std::io::Read + std::io::Seek> { | ||||
| impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> { | ||||
|     pub fn update(mut self, wtxn: &mut RwTxn<'_>, field_ids: &[u16]) -> Result<()> { | ||||
|         self.update_level0(wtxn)?; | ||||
|         for &field_id in field_ids.iter() { | ||||
|             self.clear_levels(wtxn, field_id)?; | ||||
|         } | ||||
|         clear_facet_levels(wtxn, &self.db.remap_data_type(), field_ids)?; | ||||
|  | ||||
|         for &field_id in field_ids.iter() { | ||||
|             let level_readers = self.compute_levels_for_field_id(field_id, wtxn)?; | ||||
| @@ -114,14 +112,6 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> { | ||||
|         Ok(()) | ||||
|     } | ||||
|  | ||||
|     fn clear_levels(&self, wtxn: &mut heed::RwTxn<'_>, field_id: FieldId) -> Result<()> { | ||||
|         let left = FacetGroupKey::<&[u8]> { field_id, level: 1, left_bound: &[] }; | ||||
|         let right = FacetGroupKey::<&[u8]> { field_id, level: u8::MAX, left_bound: &[] }; | ||||
|         let range = left..=right; | ||||
|         self.db.delete_range(wtxn, &range).map(drop)?; | ||||
|         Ok(()) | ||||
|     } | ||||
|  | ||||
|     fn update_level0(&mut self, wtxn: &mut RwTxn<'_>) -> Result<()> { | ||||
|         let delta_data = match self.delta_data.take() { | ||||
|             Some(x) => x, | ||||
| @@ -365,8 +355,6 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> { | ||||
| mod tests { | ||||
|     use std::iter::once; | ||||
|  | ||||
|     use big_s::S; | ||||
|     use maplit::hashset; | ||||
|     use roaring::RoaringBitmap; | ||||
|  | ||||
|     use crate::documents::mmap_from_objects; | ||||
| @@ -374,7 +362,7 @@ mod tests { | ||||
|     use crate::heed_codec::StrRefCodec; | ||||
|     use crate::index::tests::TempIndex; | ||||
|     use crate::update::facet::test_helpers::{ordered_string, FacetIndex}; | ||||
|     use crate::{db_snap, milli_snap}; | ||||
|     use crate::{db_snap, milli_snap, FilterableAttributesRule}; | ||||
|  | ||||
|     #[test] | ||||
|     fn insert() { | ||||
| @@ -474,7 +462,8 @@ mod tests { | ||||
|         index | ||||
|             .update_settings(|settings| { | ||||
|                 settings.set_primary_key("id".to_owned()); | ||||
|                 settings.set_filterable_fields(hashset! { S("id") }); | ||||
|                 settings | ||||
|                     .set_filterable_fields(vec![FilterableAttributesRule::Field("id".to_string())]); | ||||
|             }) | ||||
|             .unwrap(); | ||||
|  | ||||
|   | ||||
| @@ -89,6 +89,7 @@ use time::OffsetDateTime; | ||||
| use tracing::debug; | ||||
|  | ||||
| use self::incremental::FacetsUpdateIncremental; | ||||
| use super::settings::{InnerIndexSettings, InnerIndexSettingsDiff}; | ||||
| use super::{FacetsUpdateBulk, MergeDeladdBtreesetString, MergeDeladdCboRoaringBitmaps}; | ||||
| use crate::facet::FacetType; | ||||
| use crate::heed_codec::facet::{ | ||||
| @@ -147,7 +148,11 @@ impl<'i> FacetsUpdate<'i> { | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     pub fn execute(self, wtxn: &mut heed::RwTxn<'_>) -> Result<()> { | ||||
|     pub fn execute( | ||||
|         self, | ||||
|         wtxn: &mut heed::RwTxn<'_>, | ||||
|         new_settings: &InnerIndexSettings, | ||||
|     ) -> Result<()> { | ||||
|         if self.data_size == 0 { | ||||
|             return Ok(()); | ||||
|         } | ||||
| @@ -156,8 +161,7 @@ impl<'i> FacetsUpdate<'i> { | ||||
|  | ||||
|         // See self::comparison_bench::benchmark_facet_indexing | ||||
|         if self.data_size >= (self.database.len(wtxn)? / 500) { | ||||
|             let field_ids = | ||||
|                 self.index.faceted_fields_ids(wtxn)?.iter().copied().collect::<Vec<_>>(); | ||||
|             let field_ids = facet_levels_field_ids(new_settings); | ||||
|             let bulk_update = FacetsUpdateBulk::new( | ||||
|                 self.index, | ||||
|                 field_ids, | ||||
| @@ -291,6 +295,53 @@ fn index_facet_search( | ||||
|     Ok(()) | ||||
| } | ||||
|  | ||||
| /// Clear all the levels greater than 0 for given field ids. | ||||
| pub fn clear_facet_levels<'a, I>( | ||||
|     wtxn: &mut heed::RwTxn<'_>, | ||||
|     db: &heed::Database<FacetGroupKeyCodec<BytesRefCodec>, DecodeIgnore>, | ||||
|     field_ids: I, | ||||
| ) -> Result<()> | ||||
| where | ||||
|     I: IntoIterator<Item = &'a FieldId>, | ||||
| { | ||||
|     for field_id in field_ids { | ||||
|         let field_id = *field_id; | ||||
|         let left = FacetGroupKey::<&[u8]> { field_id, level: 1, left_bound: &[] }; | ||||
|         let right = FacetGroupKey::<&[u8]> { field_id, level: u8::MAX, left_bound: &[] }; | ||||
|         let range = left..=right; | ||||
|         db.delete_range(wtxn, &range).map(drop)?; | ||||
|     } | ||||
|     Ok(()) | ||||
| } | ||||
|  | ||||
| pub fn clear_facet_levels_based_on_settings_diff( | ||||
|     wtxn: &mut heed::RwTxn<'_>, | ||||
|     index: &Index, | ||||
|     settings_diff: &InnerIndexSettingsDiff, | ||||
| ) -> Result<()> { | ||||
|     let new_field_ids: BTreeSet<_> = facet_levels_field_ids(&settings_diff.new); | ||||
|     let old_field_ids: BTreeSet<_> = facet_levels_field_ids(&settings_diff.old); | ||||
|  | ||||
|     let field_ids_to_clear: Vec<_> = old_field_ids.difference(&new_field_ids).copied().collect(); | ||||
|     clear_facet_levels(wtxn, &index.facet_id_string_docids.remap_types(), &field_ids_to_clear)?; | ||||
|     clear_facet_levels(wtxn, &index.facet_id_f64_docids.remap_types(), &field_ids_to_clear)?; | ||||
|     Ok(()) | ||||
| } | ||||
|  | ||||
| fn facet_levels_field_ids<B>(settings: &InnerIndexSettings) -> B | ||||
| where | ||||
|     B: FromIterator<FieldId>, | ||||
| { | ||||
|     settings | ||||
|         .fields_ids_map | ||||
|         .iter_id_metadata() | ||||
|         .filter(|(_, metadata)| { | ||||
|             metadata.require_facet_level_database(&settings.filterable_attributes_rules) | ||||
|         }) | ||||
|         .map(|(id, _)| id) | ||||
|         .collect() | ||||
| } | ||||
|  | ||||
| #[cfg(test)] | ||||
| pub(crate) mod test_helpers { | ||||
|     use std::cell::Cell; | ||||
|   | ||||
| @@ -95,12 +95,7 @@ pub fn enrich_documents_batch<R: Read + Seek>( | ||||
|     // If the settings specifies that a _geo field must be used therefore we must check the | ||||
|     // validity of it in all the documents of this batch and this is when we return `Some`. | ||||
|     let geo_field_id = match documents_batch_index.id(RESERVED_GEO_FIELD_NAME) { | ||||
|         Some(geo_field_id) | ||||
|             if index.sortable_fields(rtxn)?.contains(RESERVED_GEO_FIELD_NAME) | ||||
|                 || index.filterable_fields(rtxn)?.contains(RESERVED_GEO_FIELD_NAME) => | ||||
|         { | ||||
|             Some(geo_field_id) | ||||
|         } | ||||
|         Some(geo_field_id) if index.is_geo_enabled(rtxn)? => Some(geo_field_id), | ||||
|         _otherwise => None, | ||||
|     }; | ||||
|  | ||||
|   | ||||
| @@ -150,9 +150,14 @@ fn searchable_fields_changed( | ||||
|     obkv: &KvReader<FieldId>, | ||||
|     settings_diff: &InnerIndexSettingsDiff, | ||||
| ) -> bool { | ||||
|     let searchable_fields = &settings_diff.new.searchable_fields_ids; | ||||
|     for (field_id, field_bytes) in obkv.iter() { | ||||
|         if searchable_fields.contains(&field_id) { | ||||
|         let Some(metadata) = settings_diff.new.fields_ids_map.metadata(field_id) else { | ||||
|             // If the field id is not in the fields ids map, skip it. | ||||
|             // This happens for the vectors sub-fields. for example: | ||||
|             // "_vectors": { "manual": [1, 2, 3]} -> "_vectors.manual" is not registered. | ||||
|             continue; | ||||
|         }; | ||||
|         if metadata.is_searchable() { | ||||
|             let del_add = KvReaderDelAdd::from_slice(field_bytes); | ||||
|             match (del_add.get(DelAdd::Deletion), del_add.get(DelAdd::Addition)) { | ||||
|                 // if both fields are None, check the next field. | ||||
| @@ -200,8 +205,14 @@ fn tokens_from_document<'a>( | ||||
|     buffers.obkv_buffer.clear(); | ||||
|     let mut document_writer = KvWriterU16::new(&mut buffers.obkv_buffer); | ||||
|     for (field_id, field_bytes) in obkv.iter() { | ||||
|         let Some(metadata) = settings.fields_ids_map.metadata(field_id) else { | ||||
|             // If the field id is not in the fields ids map, skip it. | ||||
|             // This happens for the vectors sub-fields. for example: | ||||
|             // "_vectors": { "manual": [1, 2, 3]} -> "_vectors.manual" is not registered. | ||||
|             continue; | ||||
|         }; | ||||
|         // if field is searchable. | ||||
|         if settings.searchable_fields_ids.contains(&field_id) { | ||||
|         if metadata.is_searchable() { | ||||
|             // extract deletion or addition only. | ||||
|             if let Some(field_bytes) = KvReaderDelAdd::from_slice(field_bytes).get(del_add) { | ||||
|                 // parse json. | ||||
| @@ -216,7 +227,7 @@ fn tokens_from_document<'a>( | ||||
|                 buffers.field_buffer.clear(); | ||||
|                 if let Some(field) = json_to_string(&value, &mut buffers.field_buffer) { | ||||
|                     // create an iterator of token with their positions. | ||||
|                     let locales = settings.localized_searchable_fields_ids.locales(field_id); | ||||
|                     let locales = metadata.locales(&settings.localized_attributes_rules); | ||||
|                     let tokens = process_tokens(tokenizer.tokenize_with_allow_list(field, locales)) | ||||
|                         .take_while(|(p, _)| (*p as u32) < max_positions_per_attributes); | ||||
|  | ||||
|   | ||||
| @@ -12,12 +12,11 @@ use heed::BytesEncode; | ||||
| use super::helpers::{create_sorter, sorter_into_reader, try_split_array_at, GrenadParameters}; | ||||
| use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec}; | ||||
| use crate::heed_codec::{BEU16StrCodec, StrRefCodec}; | ||||
| use crate::localized_attributes_rules::LocalizedFieldIds; | ||||
| use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd}; | ||||
| use crate::update::index_documents::helpers::{ | ||||
|     MergeDeladdBtreesetString, MergeDeladdCboRoaringBitmaps, | ||||
| }; | ||||
| use crate::update::settings::InnerIndexSettingsDiff; | ||||
| use crate::update::settings::{InnerIndexSettings, InnerIndexSettingsDiff}; | ||||
| use crate::{FieldId, Result, MAX_FACET_VALUE_LENGTH}; | ||||
|  | ||||
| /// Extracts the facet string and the documents ids where this facet string appear. | ||||
| @@ -33,13 +32,10 @@ pub fn extract_facet_string_docids<R: io::Read + io::Seek>( | ||||
|     if settings_diff.settings_update_only() { | ||||
|         extract_facet_string_docids_settings(docid_fid_facet_string, indexer, settings_diff) | ||||
|     } else { | ||||
|         let localized_field_ids = &settings_diff.new.localized_faceted_fields_ids; | ||||
|         let facet_search = settings_diff.new.facet_search; | ||||
|         extract_facet_string_docids_document_update( | ||||
|             docid_fid_facet_string, | ||||
|             indexer, | ||||
|             localized_field_ids, | ||||
|             facet_search, | ||||
|             &settings_diff.new, | ||||
|         ) | ||||
|     } | ||||
| } | ||||
| @@ -52,8 +48,7 @@ pub fn extract_facet_string_docids<R: io::Read + io::Seek>( | ||||
| fn extract_facet_string_docids_document_update<R: io::Read + io::Seek>( | ||||
|     docid_fid_facet_string: grenad::Reader<R>, | ||||
|     indexer: GrenadParameters, | ||||
|     localized_field_ids: &LocalizedFieldIds, | ||||
|     facet_search: bool, | ||||
|     settings: &InnerIndexSettings, | ||||
| ) -> Result<(grenad::Reader<BufReader<File>>, grenad::Reader<BufReader<File>>)> { | ||||
|     let max_memory = indexer.max_memory_by_thread(); | ||||
|  | ||||
| @@ -92,6 +87,14 @@ fn extract_facet_string_docids_document_update<R: io::Read + io::Seek>( | ||||
|         let (field_id_bytes, bytes) = try_split_array_at(key).unwrap(); | ||||
|         let field_id = FieldId::from_be_bytes(field_id_bytes); | ||||
|  | ||||
|         let Some(metadata) = settings.fields_ids_map.metadata(field_id) else { | ||||
|             unreachable!("metadata not found for field_id: {}", field_id) | ||||
|         }; | ||||
|  | ||||
|         if !metadata.is_faceted(&settings.filterable_attributes_rules) { | ||||
|             continue; | ||||
|         } | ||||
|  | ||||
|         let (document_id_bytes, normalized_value_bytes) = | ||||
|             try_split_array_at::<_, 4>(bytes).unwrap(); | ||||
|         let document_id = u32::from_be_bytes(document_id_bytes); | ||||
| @@ -99,8 +102,10 @@ fn extract_facet_string_docids_document_update<R: io::Read + io::Seek>( | ||||
|         let normalized_value = str::from_utf8(normalized_value_bytes)?; | ||||
|  | ||||
|         // Facet search normalization | ||||
|         if facet_search { | ||||
|             let locales = localized_field_ids.locales(field_id); | ||||
|         let features = | ||||
|             metadata.filterable_attributes_features(&settings.filterable_attributes_rules); | ||||
|         if features.is_facet_searchable() { | ||||
|             let locales = metadata.locales(&settings.localized_attributes_rules); | ||||
|             let hyper_normalized_value = normalize_facet_string(normalized_value, locales); | ||||
|  | ||||
|             let set = BTreeSet::from_iter(std::iter::once(normalized_value)); | ||||
| @@ -178,8 +183,15 @@ fn extract_facet_string_docids_settings<R: io::Read + io::Seek>( | ||||
|         let (field_id_bytes, bytes) = try_split_array_at(key).unwrap(); | ||||
|         let field_id = FieldId::from_be_bytes(field_id_bytes); | ||||
|  | ||||
|         let old_locales = settings_diff.old.localized_faceted_fields_ids.locales(field_id); | ||||
|         let new_locales = settings_diff.new.localized_faceted_fields_ids.locales(field_id); | ||||
|         let Some(old_metadata) = settings_diff.old.fields_ids_map.metadata(field_id) else { | ||||
|             unreachable!("old metadata not found for field_id: {}", field_id) | ||||
|         }; | ||||
|         let Some(new_metadata) = settings_diff.new.fields_ids_map.metadata(field_id) else { | ||||
|             unreachable!("new metadata not found for field_id: {}", field_id) | ||||
|         }; | ||||
|  | ||||
|         let old_locales = old_metadata.locales(&settings_diff.old.localized_attributes_rules); | ||||
|         let new_locales = new_metadata.locales(&settings_diff.new.localized_attributes_rules); | ||||
|  | ||||
|         let are_same_locales = old_locales == new_locales; | ||||
|         let reindex_facet_search = | ||||
| @@ -197,10 +209,15 @@ fn extract_facet_string_docids_settings<R: io::Read + io::Seek>( | ||||
|  | ||||
|         // Facet search normalization | ||||
|         if settings_diff.new.facet_search { | ||||
|             let new_filterable_features = new_metadata | ||||
|                 .filterable_attributes_features(&settings_diff.new.filterable_attributes_rules); | ||||
|             let new_hyper_normalized_value = normalize_facet_string(normalized_value, new_locales); | ||||
|             let old_hyper_normalized_value; | ||||
|             let old_filterable_features = old_metadata | ||||
|                 .filterable_attributes_features(&settings_diff.old.filterable_attributes_rules); | ||||
|             let old_hyper_normalized_value = if !settings_diff.old.facet_search | ||||
|                 || deladd_reader.get(DelAdd::Deletion).is_none() | ||||
|                 || !old_filterable_features.is_facet_searchable() | ||||
|             { | ||||
|                 // if the facet search is disabled in the old settings or if no facet string is deleted, | ||||
|                 // we don't need to normalize the facet string. | ||||
| @@ -215,7 +232,9 @@ fn extract_facet_string_docids_settings<R: io::Read + io::Seek>( | ||||
|             let set = BTreeSet::from_iter(std::iter::once(normalized_value)); | ||||
|  | ||||
|             // if the facet string is the same, we can put the deletion and addition in the same obkv. | ||||
|             if old_hyper_normalized_value == Some(&new_hyper_normalized_value) { | ||||
|             if old_hyper_normalized_value == Some(&new_hyper_normalized_value) | ||||
|                 && new_filterable_features.is_facet_searchable() | ||||
|             { | ||||
|                 // nothing to do if we delete and re-add the value. | ||||
|                 if is_same_value { | ||||
|                     continue; | ||||
| @@ -249,7 +268,9 @@ fn extract_facet_string_docids_settings<R: io::Read + io::Seek>( | ||||
|                 } | ||||
|  | ||||
|                 // addition | ||||
|                 if deladd_reader.get(DelAdd::Addition).is_some() { | ||||
|                 if new_filterable_features.is_facet_searchable() | ||||
|                     && deladd_reader.get(DelAdd::Addition).is_some() | ||||
|                 { | ||||
|                     // insert new value | ||||
|                     let val = SerdeJson::bytes_encode(&set).map_err(heed::Error::Encoding)?; | ||||
|                     buffer.clear(); | ||||
|   | ||||
| @@ -76,9 +76,9 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>( | ||||
|     let mut strings_key_buffer = Vec::new(); | ||||
|  | ||||
|     let old_faceted_fids: BTreeSet<_> = | ||||
|         settings_diff.old.faceted_fields_ids.iter().copied().collect(); | ||||
|         settings_diff.list_faceted_fields_from_fid_map(DelAdd::Deletion); | ||||
|     let new_faceted_fids: BTreeSet<_> = | ||||
|         settings_diff.new.faceted_fields_ids.iter().copied().collect(); | ||||
|         settings_diff.list_faceted_fields_from_fid_map(DelAdd::Addition); | ||||
|  | ||||
|     if !settings_diff.settings_update_only || settings_diff.reindex_facets() { | ||||
|         let mut cursor = obkv_documents.into_cursor()?; | ||||
|   | ||||
| @@ -15,8 +15,9 @@ use serde_json::Value; | ||||
| use super::helpers::{create_writer, writer_into_reader, GrenadParameters}; | ||||
| use crate::constants::RESERVED_VECTORS_FIELD_NAME; | ||||
| use crate::error::FaultSource; | ||||
| use crate::fields_ids_map::metadata::FieldIdMapWithMetadata; | ||||
| use crate::index::IndexEmbeddingConfig; | ||||
| use crate::prompt::{FieldsIdsMapWithMetadata, Prompt}; | ||||
| use crate::prompt::Prompt; | ||||
| use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd}; | ||||
| use crate::update::settings::InnerIndexSettingsDiff; | ||||
| use crate::vector::error::{EmbedErrorKind, PossibleEmbeddingMistakes, UnusedVectorsDistribution}; | ||||
| @@ -190,12 +191,8 @@ pub fn extract_vector_points<R: io::Read + io::Seek>( | ||||
|     let reindex_vectors = settings_diff.reindex_vectors(); | ||||
|  | ||||
|     let old_fields_ids_map = &settings_diff.old.fields_ids_map; | ||||
|     let old_fields_ids_map = | ||||
|         FieldsIdsMapWithMetadata::new(old_fields_ids_map, &settings_diff.old.searchable_fields_ids); | ||||
|  | ||||
|     let new_fields_ids_map = &settings_diff.new.fields_ids_map; | ||||
|     let new_fields_ids_map = | ||||
|         FieldsIdsMapWithMetadata::new(new_fields_ids_map, &settings_diff.new.searchable_fields_ids); | ||||
|  | ||||
|     // the vector field id may have changed | ||||
|     let old_vectors_fid = old_fields_ids_map.id(RESERVED_VECTORS_FIELD_NAME); | ||||
| @@ -383,7 +380,7 @@ pub fn extract_vector_points<R: io::Read + io::Seek>( | ||||
|                             ); | ||||
|                             continue; | ||||
|                         } | ||||
|                         regenerate_prompt(obkv, prompt, &new_fields_ids_map)? | ||||
|                         regenerate_prompt(obkv, prompt, new_fields_ids_map)? | ||||
|                     } | ||||
|                 }, | ||||
|                 // prompt regeneration is only triggered for existing embedders | ||||
| @@ -400,7 +397,7 @@ pub fn extract_vector_points<R: io::Read + io::Seek>( | ||||
|                         regenerate_if_prompt_changed( | ||||
|                             obkv, | ||||
|                             (old_prompt, prompt), | ||||
|                             (&old_fields_ids_map, &new_fields_ids_map), | ||||
|                             (old_fields_ids_map, new_fields_ids_map), | ||||
|                         )? | ||||
|                     } else { | ||||
|                         // we can simply ignore user provided vectors as they are not regenerated and are | ||||
| @@ -416,7 +413,7 @@ pub fn extract_vector_points<R: io::Read + io::Seek>( | ||||
|                     prompt, | ||||
|                     (add_to_user_provided, remove_from_user_provided), | ||||
|                     (old, new), | ||||
|                     (&old_fields_ids_map, &new_fields_ids_map), | ||||
|                     (old_fields_ids_map, new_fields_ids_map), | ||||
|                     document_id, | ||||
|                     embedder_name, | ||||
|                     embedder_is_manual, | ||||
| @@ -486,10 +483,7 @@ fn extract_vector_document_diff( | ||||
|     prompt: &Prompt, | ||||
|     (add_to_user_provided, remove_from_user_provided): (&mut RoaringBitmap, &mut RoaringBitmap), | ||||
|     (old, new): (VectorState, VectorState), | ||||
|     (old_fields_ids_map, new_fields_ids_map): ( | ||||
|         &FieldsIdsMapWithMetadata, | ||||
|         &FieldsIdsMapWithMetadata, | ||||
|     ), | ||||
|     (old_fields_ids_map, new_fields_ids_map): (&FieldIdMapWithMetadata, &FieldIdMapWithMetadata), | ||||
|     document_id: impl Fn() -> Value, | ||||
|     embedder_name: &str, | ||||
|     embedder_is_manual: bool, | ||||
| @@ -611,10 +605,7 @@ fn extract_vector_document_diff( | ||||
| fn regenerate_if_prompt_changed( | ||||
|     obkv: &obkv::KvReader<FieldId>, | ||||
|     (old_prompt, new_prompt): (&Prompt, &Prompt), | ||||
|     (old_fields_ids_map, new_fields_ids_map): ( | ||||
|         &FieldsIdsMapWithMetadata, | ||||
|         &FieldsIdsMapWithMetadata, | ||||
|     ), | ||||
|     (old_fields_ids_map, new_fields_ids_map): (&FieldIdMapWithMetadata, &FieldIdMapWithMetadata), | ||||
| ) -> Result<VectorStateDelta> { | ||||
|     let old_prompt = old_prompt | ||||
|         .render_kvdeladd(obkv, DelAdd::Deletion, old_fields_ids_map) | ||||
| @@ -630,7 +621,7 @@ fn regenerate_if_prompt_changed( | ||||
| fn regenerate_prompt( | ||||
|     obkv: &obkv::KvReader<FieldId>, | ||||
|     prompt: &Prompt, | ||||
|     new_fields_ids_map: &FieldsIdsMapWithMetadata, | ||||
|     new_fields_ids_map: &FieldIdMapWithMetadata, | ||||
| ) -> Result<VectorStateDelta> { | ||||
|     let prompt = prompt.render_kvdeladd(obkv, DelAdd::Addition, new_fields_ids_map)?; | ||||
|  | ||||
|   | ||||
| @@ -26,6 +26,7 @@ use typed_chunk::{write_typed_chunk_into_index, ChunkAccumulator, TypedChunk}; | ||||
| pub use self::enrich::{extract_finite_float_from_value, DocumentId}; | ||||
| pub use self::helpers::*; | ||||
| pub use self::transform::{Transform, TransformOutput}; | ||||
| use super::facet::clear_facet_levels_based_on_settings_diff; | ||||
| use super::new::StdResult; | ||||
| use crate::documents::{obkv_to_object, DocumentsBatchReader}; | ||||
| use crate::error::{Error, InternalError}; | ||||
| @@ -215,9 +216,8 @@ where | ||||
|             flattened_documents, | ||||
|         } = output; | ||||
|  | ||||
|         // update the internal facet and searchable list, | ||||
|         // update the searchable list, | ||||
|         // because they might have changed due to the nested documents flattening. | ||||
|         settings_diff.new.recompute_facets(self.wtxn, self.index)?; | ||||
|         settings_diff.new.recompute_searchables(self.wtxn, self.index)?; | ||||
|  | ||||
|         let settings_diff = Arc::new(settings_diff); | ||||
| @@ -465,6 +465,11 @@ where | ||||
|                     } | ||||
|                 } | ||||
|  | ||||
|                 // If the settings are only being updated, we may have to clear some of the facet levels. | ||||
|                 if settings_diff.settings_update_only() { | ||||
|                     clear_facet_levels_based_on_settings_diff(self.wtxn, self.index, &settings_diff)?; | ||||
|                 } | ||||
|  | ||||
|                 Ok(()) | ||||
|             }).map_err(InternalError::from)??; | ||||
|  | ||||
| @@ -765,18 +770,19 @@ mod tests { | ||||
|     use bumpalo::Bump; | ||||
|     use fst::IntoStreamer; | ||||
|     use heed::RwTxn; | ||||
|     use maplit::hashset; | ||||
|     use maplit::{btreeset, hashset}; | ||||
|  | ||||
|     use super::*; | ||||
|     use crate::constants::RESERVED_GEO_FIELD_NAME; | ||||
|     use crate::documents::mmap_from_objects; | ||||
|     use crate::filterable_attributes_rules::filtered_matching_field_names; | ||||
|     use crate::index::tests::TempIndex; | ||||
|     use crate::index::IndexEmbeddingConfig; | ||||
|     use crate::progress::Progress; | ||||
|     use crate::search::TermsMatchingStrategy; | ||||
|     use crate::update::new::indexer; | ||||
|     use crate::update::Setting; | ||||
|     use crate::{all_obkv_to_json, db_snap, Filter, Search, UserError}; | ||||
|     use crate::{all_obkv_to_json, db_snap, Filter, FilterableAttributesRule, Search, UserError}; | ||||
|  | ||||
|     #[test] | ||||
|     fn simple_document_replacement() { | ||||
| @@ -1006,7 +1012,9 @@ mod tests { | ||||
|  | ||||
|         index | ||||
|             .update_settings(|settings| { | ||||
|                 settings.set_filterable_fields(hashset!(S(RESERVED_GEO_FIELD_NAME))); | ||||
|                 settings.set_filterable_fields(vec![FilterableAttributesRule::Field( | ||||
|                     RESERVED_GEO_FIELD_NAME.to_string(), | ||||
|                 )]); | ||||
|             }) | ||||
|             .unwrap(); | ||||
|     } | ||||
| @@ -1018,7 +1026,9 @@ mod tests { | ||||
|  | ||||
|         index | ||||
|             .update_settings(|settings| { | ||||
|                 settings.set_filterable_fields(hashset!(S(RESERVED_GEO_FIELD_NAME))); | ||||
|                 settings.set_filterable_fields(vec![FilterableAttributesRule::Field( | ||||
|                     RESERVED_GEO_FIELD_NAME.to_string(), | ||||
|                 )]); | ||||
|             }) | ||||
|             .unwrap(); | ||||
|  | ||||
| @@ -1234,15 +1244,24 @@ mod tests { | ||||
|                 let searchable_fields = vec![S("title"), S("nested.object"), S("nested.machin")]; | ||||
|                 settings.set_searchable_fields(searchable_fields); | ||||
|  | ||||
|                 let faceted_fields = hashset!(S("title"), S("nested.object"), S("nested.machin")); | ||||
|                 let faceted_fields = vec![ | ||||
|                     FilterableAttributesRule::Field("title".to_string()), | ||||
|                     FilterableAttributesRule::Field("nested.object".to_string()), | ||||
|                     FilterableAttributesRule::Field("nested.machin".to_string()), | ||||
|                 ]; | ||||
|                 settings.set_filterable_fields(faceted_fields); | ||||
|             }) | ||||
|             .unwrap(); | ||||
|  | ||||
|         let rtxn = index.read_txn().unwrap(); | ||||
|  | ||||
|         let facets = index.faceted_fields(&rtxn).unwrap(); | ||||
|         assert_eq!(facets, hashset!(S("title"), S("nested.object"), S("nested.machin"))); | ||||
|         let filterable_fields = index.filterable_attributes_rules(&rtxn).unwrap(); | ||||
|         let fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); | ||||
|         let facets = | ||||
|             filtered_matching_field_names(&filterable_fields, &fields_ids_map, &|features| { | ||||
|                 features.is_filterable() | ||||
|             }); | ||||
|         assert_eq!(facets, btreeset!("title", "nested.object", "nested.machin")); | ||||
|  | ||||
|         // testing the simple query search | ||||
|         let mut search = crate::Search::new(&rtxn, &index); | ||||
| @@ -1438,7 +1457,9 @@ mod tests { | ||||
|  | ||||
|         index | ||||
|             .update_settings(|settings| { | ||||
|                 settings.set_filterable_fields(hashset!(String::from("dog"))); | ||||
|                 settings.set_filterable_fields(vec![FilterableAttributesRule::Field( | ||||
|                     "dog".to_string(), | ||||
|                 )]); | ||||
|             }) | ||||
|             .unwrap(); | ||||
|  | ||||
| @@ -1457,9 +1478,14 @@ mod tests { | ||||
|  | ||||
|         let rtxn = index.read_txn().unwrap(); | ||||
|  | ||||
|         let hidden = index.faceted_fields(&rtxn).unwrap(); | ||||
|         let filterable_fields = index.filterable_attributes_rules(&rtxn).unwrap(); | ||||
|         let fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); | ||||
|         let facets = | ||||
|             filtered_matching_field_names(&filterable_fields, &fields_ids_map, &|features| { | ||||
|                 features.is_filterable() | ||||
|             }); | ||||
|  | ||||
|         assert_eq!(hidden, hashset!(S("dog"), S("dog.race"), S("dog.race.bernese mountain"))); | ||||
|         assert_eq!(facets, btreeset!("dog", "dog.race", "dog.race.bernese mountain")); | ||||
|  | ||||
|         for (s, i) in [("zeroth", 0), ("first", 1), ("second", 2), ("third", 3)] { | ||||
|             let mut search = crate::Search::new(&rtxn, &index); | ||||
| @@ -1480,9 +1506,14 @@ mod tests { | ||||
|  | ||||
|         let rtxn = index.read_txn().unwrap(); | ||||
|  | ||||
|         let facets = index.faceted_fields(&rtxn).unwrap(); | ||||
|         let filterable_fields = index.filterable_attributes_rules(&rtxn).unwrap(); | ||||
|         let fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); | ||||
|         let facets = | ||||
|             filtered_matching_field_names(&filterable_fields, &fields_ids_map, &|features| { | ||||
|                 features.is_filterable() | ||||
|             }); | ||||
|  | ||||
|         assert_eq!(facets, hashset!()); | ||||
|         assert_eq!(facets, btreeset!()); | ||||
|  | ||||
|         // update the settings to test the sortable | ||||
|         index | ||||
| @@ -1506,10 +1537,6 @@ mod tests { | ||||
|  | ||||
|         let rtxn = index.read_txn().unwrap(); | ||||
|  | ||||
|         let facets = index.faceted_fields(&rtxn).unwrap(); | ||||
|  | ||||
|         assert_eq!(facets, hashset!(S("dog.race"), S("dog.race.bernese mountain"))); | ||||
|  | ||||
|         let mut search = crate::Search::new(&rtxn, &index); | ||||
|         search.sort_criteria(vec![crate::AscDesc::Asc(crate::Member::Field(S( | ||||
|             "dog.race.bernese mountain", | ||||
| @@ -1717,8 +1744,13 @@ mod tests { | ||||
|  | ||||
|         let check_ok = |index: &Index| { | ||||
|             let rtxn = index.read_txn().unwrap(); | ||||
|             let facets = index.faceted_fields(&rtxn).unwrap(); | ||||
|             assert_eq!(facets, hashset!(S("colour"), S("colour.green"), S("colour.green.blue"))); | ||||
|             let filterable_fields = index.filterable_attributes_rules(&rtxn).unwrap(); | ||||
|             let fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); | ||||
|             let facets = | ||||
|                 filtered_matching_field_names(&filterable_fields, &fields_ids_map, &|features| { | ||||
|                     features.is_filterable() | ||||
|                 }); | ||||
|             assert_eq!(facets, btreeset!("colour", "colour.green", "colour.green.blue")); | ||||
|  | ||||
|             let colour_id = index.fields_ids_map(&rtxn).unwrap().id("colour").unwrap(); | ||||
|             let colour_green_id = index.fields_ids_map(&rtxn).unwrap().id("colour.green").unwrap(); | ||||
| @@ -1738,7 +1770,7 @@ mod tests { | ||||
|             assert_eq!(bitmap_colour_blue.into_iter().collect::<Vec<_>>(), vec![7]); | ||||
|         }; | ||||
|  | ||||
|         let faceted_fields = hashset!(S("colour")); | ||||
|         let faceted_fields = vec![FilterableAttributesRule::Field("colour".to_string())]; | ||||
|  | ||||
|         let index = TempIndex::new(); | ||||
|         index.add_documents(content()).unwrap(); | ||||
| @@ -1823,8 +1855,13 @@ mod tests { | ||||
|  | ||||
|         let check_ok = |index: &Index| { | ||||
|             let rtxn = index.read_txn().unwrap(); | ||||
|             let facets = index.faceted_fields(&rtxn).unwrap(); | ||||
|             assert_eq!(facets, hashset!(S("colour"), S("colour.green"), S("colour.green.blue"))); | ||||
|             let filterable_fields = index.filterable_attributes_rules(&rtxn).unwrap(); | ||||
|             let fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); | ||||
|             let facets = | ||||
|                 filtered_matching_field_names(&filterable_fields, &fields_ids_map, &|features| { | ||||
|                     features.is_filterable() | ||||
|                 }); | ||||
|             assert_eq!(facets, btreeset!("colour", "colour.green", "colour.green.blue")); | ||||
|  | ||||
|             let colour_id = index.fields_ids_map(&rtxn).unwrap().id("colour").unwrap(); | ||||
|             let colour_green_id = index.fields_ids_map(&rtxn).unwrap().id("colour.green").unwrap(); | ||||
| @@ -1844,7 +1881,7 @@ mod tests { | ||||
|             assert_eq!(bitmap_colour_blue.into_iter().collect::<Vec<_>>(), vec![3]); | ||||
|         }; | ||||
|  | ||||
|         let faceted_fields = hashset!(S("colour")); | ||||
|         let faceted_fields = vec![FilterableAttributesRule::Field("colour".to_string())]; | ||||
|  | ||||
|         let index = TempIndex::new(); | ||||
|         index.add_documents(content()).unwrap(); | ||||
| @@ -1887,8 +1924,13 @@ mod tests { | ||||
|  | ||||
|         let check_ok = |index: &Index| { | ||||
|             let rtxn = index.read_txn().unwrap(); | ||||
|             let facets = index.faceted_fields(&rtxn).unwrap(); | ||||
|             assert_eq!(facets, hashset!(S("tags"), S("tags.green"), S("tags.green.blue"))); | ||||
|             let filterable_fields = index.filterable_attributes_rules(&rtxn).unwrap(); | ||||
|             let fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); | ||||
|             let facets = | ||||
|                 filtered_matching_field_names(&filterable_fields, &fields_ids_map, &|features| { | ||||
|                     features.is_filterable() | ||||
|                 }); | ||||
|             assert_eq!(facets, btreeset!("tags", "tags.green", "tags.green.blue")); | ||||
|  | ||||
|             let tags_id = index.fields_ids_map(&rtxn).unwrap().id("tags").unwrap(); | ||||
|             let tags_green_id = index.fields_ids_map(&rtxn).unwrap().id("tags.green").unwrap(); | ||||
| @@ -1907,7 +1949,7 @@ mod tests { | ||||
|             assert_eq!(bitmap_tags_blue.into_iter().collect::<Vec<_>>(), vec![12]); | ||||
|         }; | ||||
|  | ||||
|         let faceted_fields = hashset!(S("tags")); | ||||
|         let faceted_fields = vec![FilterableAttributesRule::Field("tags".to_string())]; | ||||
|  | ||||
|         let index = TempIndex::new(); | ||||
|         index.add_documents(content()).unwrap(); | ||||
| @@ -2259,7 +2301,9 @@ mod tests { | ||||
|  | ||||
|         index | ||||
|             .update_settings(|settings| { | ||||
|                 settings.set_filterable_fields(hashset! { S("title") }); | ||||
|                 settings.set_filterable_fields(vec![FilterableAttributesRule::Field( | ||||
|                     "title".to_string(), | ||||
|                 )]); | ||||
|             }) | ||||
|             .unwrap(); | ||||
|  | ||||
| @@ -3115,7 +3159,10 @@ mod tests { | ||||
|         index | ||||
|             .update_settings_using_wtxn(&mut wtxn, |settings| { | ||||
|                 settings.set_primary_key(S("docid")); | ||||
|                 settings.set_filterable_fields(hashset! { S("label"), S("label2") }); | ||||
|                 settings.set_filterable_fields(vec![ | ||||
|                     FilterableAttributesRule::Field("label".to_string()), | ||||
|                     FilterableAttributesRule::Field("label2".to_string()), | ||||
|                 ]); | ||||
|             }) | ||||
|             .unwrap(); | ||||
|         wtxn.commit().unwrap(); | ||||
| @@ -3294,7 +3341,9 @@ mod tests { | ||||
|         index | ||||
|             .update_settings_using_wtxn(&mut wtxn, |settings| { | ||||
|                 settings.set_primary_key(S("id")); | ||||
|                 settings.set_filterable_fields(hashset!(S(RESERVED_GEO_FIELD_NAME))); | ||||
|                 settings.set_filterable_fields(vec![FilterableAttributesRule::Field( | ||||
|                     RESERVED_GEO_FIELD_NAME.to_string(), | ||||
|                 )]); | ||||
|                 settings.set_sortable_fields(hashset!(S(RESERVED_GEO_FIELD_NAME))); | ||||
|             }) | ||||
|             .unwrap(); | ||||
|   | ||||
| @@ -1,7 +1,7 @@ | ||||
| use std::borrow::Cow; | ||||
| use std::collections::btree_map::Entry as BEntry; | ||||
| use std::collections::hash_map::Entry as HEntry; | ||||
| use std::collections::{BTreeMap, HashMap, HashSet}; | ||||
| use std::collections::{BTreeMap, HashMap}; | ||||
| use std::fs::File; | ||||
| use std::io::{Read, Seek}; | ||||
|  | ||||
| @@ -18,8 +18,10 @@ use super::helpers::{ | ||||
|     ObkvsMergeAdditionsAndDeletions, | ||||
| }; | ||||
| use super::{create_writer, IndexDocumentsMethod, IndexerConfig, KeepFirst}; | ||||
| use crate::attribute_patterns::PatternMatch; | ||||
| use crate::documents::{DocumentsBatchIndex, EnrichedDocument, EnrichedDocumentsBatchReader}; | ||||
| use crate::error::{Error, InternalError, UserError}; | ||||
| use crate::fields_ids_map::metadata::{FieldIdMapWithMetadata, MetadataBuilder}; | ||||
| use crate::index::{db_name, main_key}; | ||||
| use crate::update::del_add::{ | ||||
|     into_del_add_obkv, into_del_add_obkv_conditional_operation, DelAdd, DelAddOperation, | ||||
| @@ -31,9 +33,7 @@ use crate::update::{AvailableIds, UpdateIndexingStep}; | ||||
| use crate::vector::parsed_vectors::{ExplicitVectors, VectorOrArrayOfVectors}; | ||||
| use crate::vector::settings::WriteBackToDocuments; | ||||
| use crate::vector::ArroyWrapper; | ||||
| use crate::{ | ||||
|     is_faceted_by, FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldsIdsMap, Index, Result, | ||||
| }; | ||||
| use crate::{FieldDistribution, FieldId, FieldIdMapMissingEntry, Index, Result}; | ||||
|  | ||||
| pub struct TransformOutput { | ||||
|     pub primary_key: String, | ||||
| @@ -52,7 +52,7 @@ pub struct TransformOutput { | ||||
| /// containing all those documents. | ||||
| pub struct Transform<'a, 'i> { | ||||
|     pub index: &'i Index, | ||||
|     fields_ids_map: FieldsIdsMap, | ||||
|     fields_ids_map: FieldIdMapWithMetadata, | ||||
|  | ||||
|     indexer_settings: &'a IndexerConfig, | ||||
|     pub index_documents_method: IndexDocumentsMethod, | ||||
| @@ -84,7 +84,7 @@ pub enum Operation { | ||||
| /// | ||||
| /// If new fields are present in the addition, they are added to the index field ids map. | ||||
| fn create_fields_mapping( | ||||
|     index_field_map: &mut FieldsIdsMap, | ||||
|     index_field_map: &mut FieldIdMapWithMetadata, | ||||
|     batch_field_map: &DocumentsBatchIndex, | ||||
| ) -> Result<HashMap<FieldId, FieldId>> { | ||||
|     batch_field_map | ||||
| @@ -141,10 +141,13 @@ impl<'a, 'i> Transform<'a, 'i> { | ||||
|             true, | ||||
|         ); | ||||
|         let documents_ids = index.documents_ids(wtxn)?; | ||||
|         let fields_ids_map = index.fields_ids_map(wtxn)?; | ||||
|         let builder = MetadataBuilder::from_index(index, wtxn)?; | ||||
|         let fields_ids_map = FieldIdMapWithMetadata::new(fields_ids_map, builder); | ||||
|  | ||||
|         Ok(Transform { | ||||
|             index, | ||||
|             fields_ids_map: index.fields_ids_map(wtxn)?, | ||||
|             fields_ids_map, | ||||
|             indexer_settings, | ||||
|             available_documents_ids: AvailableIds::new(&documents_ids), | ||||
|             original_sorter, | ||||
| @@ -354,7 +357,7 @@ impl<'a, 'i> Transform<'a, 'i> { | ||||
|             documents_seen: documents_count, | ||||
|         }); | ||||
|  | ||||
|         self.index.put_fields_ids_map(wtxn, &self.fields_ids_map)?; | ||||
|         self.index.put_fields_ids_map(wtxn, self.fields_ids_map.as_fields_ids_map())?; | ||||
|         self.index.put_primary_key(wtxn, &primary_key)?; | ||||
|         self.documents_count += documents_count; | ||||
|         // Now that we have a valid sorter that contains the user id and the obkv we | ||||
| @@ -371,7 +374,7 @@ impl<'a, 'i> Transform<'a, 'i> { | ||||
|     )] | ||||
|     fn flatten_from_fields_ids_map( | ||||
|         obkv: &KvReader<FieldId>, | ||||
|         fields_ids_map: &mut FieldsIdsMap, | ||||
|         fields_ids_map: &mut FieldIdMapWithMetadata, | ||||
|     ) -> Result<Option<Vec<u8>>> { | ||||
|         if obkv | ||||
|             .iter() | ||||
| @@ -657,7 +660,6 @@ impl<'a, 'i> Transform<'a, 'i> { | ||||
|     fn rebind_existing_document( | ||||
|         old_obkv: &KvReader<FieldId>, | ||||
|         settings_diff: &InnerIndexSettingsDiff, | ||||
|         modified_faceted_fields: &HashSet<String>, | ||||
|         mut injected_vectors: serde_json::Map<String, serde_json::Value>, | ||||
|         old_vectors_fid: Option<FieldId>, | ||||
|         original_obkv_buffer: Option<&mut Vec<u8>>, | ||||
| @@ -667,23 +669,26 @@ impl<'a, 'i> Transform<'a, 'i> { | ||||
|         let is_primary_key = |id: FieldId| -> bool { settings_diff.primary_key_id == Some(id) }; | ||||
|  | ||||
|         // If only a faceted field has been added, keep only this field. | ||||
|         let global_facet_settings_changed = settings_diff.global_facet_settings_changed(); | ||||
|         let facet_fids_changed = settings_diff.facet_fids_changed(); | ||||
|         let necessary_faceted_field = | ||||
|             |id: FieldId| -> bool { | ||||
|  | ||||
|         let necessary_faceted_field = |id: FieldId| -> Option<DelAddOperation> { | ||||
|             if facet_fids_changed { | ||||
|                 let field_name = settings_diff.new.fields_ids_map.name(id).unwrap(); | ||||
|                 if global_facet_settings_changed { | ||||
|                     settings_diff.new.user_defined_faceted_fields.iter().any(|long| { | ||||
|                         is_faceted_by(long, field_name) || is_faceted_by(field_name, long) | ||||
|                     }) | ||||
|                 } else if facet_fids_changed { | ||||
|                     modified_faceted_fields.iter().any(|long| { | ||||
|                         is_faceted_by(long, field_name) || is_faceted_by(field_name, long) | ||||
|                     }) | ||||
|                 } else { | ||||
|                     false | ||||
|                 // if the faceted fields changed, we need to keep all the field that are | ||||
|                 // faceted in the old or new settings. | ||||
|                 match ( | ||||
|                     settings_diff.old.match_faceted_field(field_name), | ||||
|                     settings_diff.new.match_faceted_field(field_name), | ||||
|                 ) { | ||||
|                     (PatternMatch::NoMatch, PatternMatch::NoMatch) => None, | ||||
|                     (PatternMatch::NoMatch, _) => Some(DelAddOperation::Addition), | ||||
|                     (_, PatternMatch::NoMatch) => Some(DelAddOperation::Deletion), | ||||
|                     (_, _) => Some(DelAddOperation::DeletionAndAddition), | ||||
|                 } | ||||
|             }; | ||||
|             } else { | ||||
|                 None | ||||
|             } | ||||
|         }; | ||||
|  | ||||
|         // Alway provide all fields when vectors are involved because | ||||
|         // we need the fields for the prompt/templating. | ||||
| @@ -734,12 +739,22 @@ impl<'a, 'i> Transform<'a, 'i> { | ||||
|                 } | ||||
|             } | ||||
|  | ||||
|             if is_primary_key(id) || necessary_faceted_field(id) || reindex_vectors { | ||||
|             if is_primary_key(id) || reindex_vectors { | ||||
|                 operations.insert(id, DelAddOperation::DeletionAndAddition); | ||||
|                 obkv_writer.insert(id, val)?; | ||||
|             } else if let Some(operation) = settings_diff.reindex_searchable_id(id) { | ||||
|                 operations.insert(id, operation); | ||||
|                 obkv_writer.insert(id, val)?; | ||||
|             } else { | ||||
|                 let facet_operation = necessary_faceted_field(id); | ||||
|                 let searchable_operation = settings_diff.reindex_searchable_id(id); | ||||
|                 let operation = facet_operation | ||||
|                     // TODO: replace `zip.map` with `zip_with` once stable | ||||
|                     .zip(searchable_operation) | ||||
|                     .map(|(op1, op2)| op1.merge(op2)) | ||||
|                     .or(facet_operation) | ||||
|                     .or(searchable_operation); | ||||
|                 if let Some(operation) = operation { | ||||
|                     operations.insert(id, operation); | ||||
|                     obkv_writer.insert(id, val)?; | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|         if !injected_vectors.is_empty() { | ||||
| @@ -856,7 +871,6 @@ impl<'a, 'i> Transform<'a, 'i> { | ||||
|             }; | ||||
|  | ||||
|         if original_sorter.is_some() || flattened_sorter.is_some() { | ||||
|             let modified_faceted_fields = settings_diff.modified_faceted_fields(); | ||||
|             let mut original_obkv_buffer = Vec::new(); | ||||
|             let mut flattened_obkv_buffer = Vec::new(); | ||||
|             let mut document_sorter_key_buffer = Vec::new(); | ||||
| @@ -897,7 +911,6 @@ impl<'a, 'i> Transform<'a, 'i> { | ||||
|                 Self::rebind_existing_document( | ||||
|                     old_obkv, | ||||
|                     &settings_diff, | ||||
|                     &modified_faceted_fields, | ||||
|                     injected_vectors, | ||||
|                     old_vectors_fid, | ||||
|                     Some(&mut original_obkv_buffer).filter(|_| original_sorter.is_some()), | ||||
|   | ||||
| @@ -365,7 +365,7 @@ pub(crate) fn write_typed_chunk_into_index( | ||||
|             let merger = builder.build(); | ||||
|  | ||||
|             let indexer = FacetsUpdate::new(index, FacetType::Number, merger, None, data_size); | ||||
|             indexer.execute(wtxn)?; | ||||
|             indexer.execute(wtxn, &settings_diff.new)?; | ||||
|             is_merged_database = true; | ||||
|         } | ||||
|         TypedChunk::FieldIdFacetStringDocids(_) => { | ||||
| @@ -401,7 +401,7 @@ pub(crate) fn write_typed_chunk_into_index( | ||||
|                 Some(normalized_facet_id_string_merger), | ||||
|                 data_size, | ||||
|             ); | ||||
|             indexer.execute(wtxn)?; | ||||
|             indexer.execute(wtxn, &settings_diff.new)?; | ||||
|             is_merged_database = true; | ||||
|         } | ||||
|         TypedChunk::FieldIdFacetExistsDocids(_) => { | ||||
|   | ||||
| @@ -6,17 +6,20 @@ use std::sync::Arc; | ||||
|  | ||||
| use charabia::{Normalize, Tokenizer, TokenizerBuilder}; | ||||
| use deserr::{DeserializeError, Deserr}; | ||||
| use itertools::{EitherOrBoth, Itertools}; | ||||
| use itertools::{merge_join_by, EitherOrBoth, Itertools}; | ||||
| use roaring::RoaringBitmap; | ||||
| use serde::{Deserialize, Deserializer, Serialize, Serializer}; | ||||
| use time::OffsetDateTime; | ||||
|  | ||||
| use super::del_add::DelAddOperation; | ||||
| use super::del_add::{DelAdd, DelAddOperation}; | ||||
| use super::index_documents::{IndexDocumentsConfig, Transform}; | ||||
| use super::IndexerConfig; | ||||
| use crate::constants::{RESERVED_GEO_FIELD_NAME, RESERVED_VECTORS_FIELD_NAME}; | ||||
| use crate::attribute_patterns::PatternMatch; | ||||
| use crate::constants::RESERVED_GEO_FIELD_NAME; | ||||
| use crate::criterion::Criterion; | ||||
| use crate::error::UserError; | ||||
| use crate::fields_ids_map::metadata::{FieldIdMapWithMetadata, MetadataBuilder}; | ||||
| use crate::filterable_attributes_rules::match_faceted_field; | ||||
| use crate::index::{ | ||||
|     IndexEmbeddingConfig, PrefixSearch, DEFAULT_MIN_WORD_LEN_ONE_TYPO, | ||||
|     DEFAULT_MIN_WORD_LEN_TWO_TYPOS, | ||||
| @@ -31,7 +34,7 @@ use crate::vector::settings::{ | ||||
|     WriteBackToDocuments, | ||||
| }; | ||||
| use crate::vector::{Embedder, EmbeddingConfig, EmbeddingConfigs}; | ||||
| use crate::{FieldId, FieldsIdsMap, Index, LocalizedAttributesRule, LocalizedFieldIds, Result}; | ||||
| use crate::{FieldId, FilterableAttributesRule, Index, LocalizedAttributesRule, Result}; | ||||
|  | ||||
| #[derive(Debug, Clone, PartialEq, Eq, Copy)] | ||||
| pub enum Setting<T> { | ||||
| @@ -155,7 +158,7 @@ pub struct Settings<'a, 't, 'i> { | ||||
|  | ||||
|     searchable_fields: Setting<Vec<String>>, | ||||
|     displayed_fields: Setting<Vec<String>>, | ||||
|     filterable_fields: Setting<HashSet<String>>, | ||||
|     filterable_fields: Setting<Vec<FilterableAttributesRule>>, | ||||
|     sortable_fields: Setting<HashSet<String>>, | ||||
|     criteria: Setting<Vec<Criterion>>, | ||||
|     stop_words: Setting<BTreeSet<String>>, | ||||
| @@ -241,8 +244,8 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { | ||||
|         self.filterable_fields = Setting::Reset; | ||||
|     } | ||||
|  | ||||
|     pub fn set_filterable_fields(&mut self, names: HashSet<String>) { | ||||
|         self.filterable_fields = Setting::Set(names); | ||||
|     pub fn set_filterable_fields(&mut self, rules: Vec<FilterableAttributesRule>) { | ||||
|         self.filterable_fields = Setting::Set(rules); | ||||
|     } | ||||
|  | ||||
|     pub fn set_sortable_fields(&mut self, names: HashSet<String>) { | ||||
| @@ -516,7 +519,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { | ||||
|     } | ||||
|  | ||||
|     /// Updates the index's searchable attributes. | ||||
|     fn update_searchable(&mut self) -> Result<bool> { | ||||
|     fn update_user_defined_searchable_attributes(&mut self) -> Result<bool> { | ||||
|         match self.searchable_fields { | ||||
|             Setting::Set(ref fields) => { | ||||
|                 // Check to see if the searchable fields changed before doing anything else | ||||
| @@ -529,26 +532,10 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { | ||||
|                     return Ok(false); | ||||
|                 } | ||||
|  | ||||
|                 // Since we're updating the settings we can only add new fields at the end of the field id map | ||||
|                 let mut fields_ids_map = self.index.fields_ids_map(self.wtxn)?; | ||||
|                 // fields are deduplicated, only the first occurrence is taken into account | ||||
|                 let names = fields.iter().unique().map(String::as_str).collect::<Vec<_>>(); | ||||
|  | ||||
|                 // Add all the searchable attributes to the field map, and then add the | ||||
|                 // remaining fields from the old field map to the new one | ||||
|                 for name in names.iter() { | ||||
|                     // The fields ids map won't change the field id of already present elements thus only the | ||||
|                     // new fields will be inserted. | ||||
|                     fields_ids_map.insert(name).ok_or(UserError::AttributeLimitReached)?; | ||||
|                 } | ||||
|  | ||||
|                 self.index.put_all_searchable_fields_from_fields_ids_map( | ||||
|                     self.wtxn, | ||||
|                     &names, | ||||
|                     &fields_ids_map.nested_ids(RESERVED_VECTORS_FIELD_NAME), | ||||
|                     &fields_ids_map, | ||||
|                 )?; | ||||
|                 self.index.put_fields_ids_map(self.wtxn, &fields_ids_map)?; | ||||
|                 self.index.put_user_defined_searchable_fields(self.wtxn, &names)?; | ||||
|                 Ok(true) | ||||
|             } | ||||
|             Setting::Reset => Ok(self.index.delete_all_searchable_fields(self.wtxn)?), | ||||
| @@ -760,14 +747,10 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { | ||||
|     fn update_filterable(&mut self) -> Result<()> { | ||||
|         match self.filterable_fields { | ||||
|             Setting::Set(ref fields) => { | ||||
|                 let mut new_facets = HashSet::new(); | ||||
|                 for name in fields { | ||||
|                     new_facets.insert(name.clone()); | ||||
|                 } | ||||
|                 self.index.put_filterable_fields(self.wtxn, &new_facets)?; | ||||
|                 self.index.put_filterable_attributes_rules(self.wtxn, fields)?; | ||||
|             } | ||||
|             Setting::Reset => { | ||||
|                 self.index.delete_filterable_fields(self.wtxn)?; | ||||
|                 self.index.delete_filterable_attributes_rules(self.wtxn)?; | ||||
|             } | ||||
|             Setting::NotSet => (), | ||||
|         } | ||||
| @@ -1257,7 +1240,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { | ||||
|         self.update_separator_tokens()?; | ||||
|         self.update_dictionary()?; | ||||
|         self.update_synonyms()?; | ||||
|         self.update_searchable()?; | ||||
|         self.update_user_defined_searchable_attributes()?; | ||||
|         self.update_exact_attributes()?; | ||||
|         self.update_proximity_precision()?; | ||||
|         self.update_prefix_search()?; | ||||
| @@ -1267,7 +1250,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { | ||||
|         let embedding_config_updates = self.update_embedding_configs()?; | ||||
|  | ||||
|         let mut new_inner_settings = InnerIndexSettings::from_index(self.index, self.wtxn, None)?; | ||||
|         new_inner_settings.recompute_facets(self.wtxn, self.index)?; | ||||
|         new_inner_settings.recompute_searchables(self.wtxn, self.index)?; | ||||
|  | ||||
|         let primary_key_id = self | ||||
|             .index | ||||
| @@ -1319,8 +1302,8 @@ impl InnerIndexSettingsDiff { | ||||
|         settings_update_only: bool, | ||||
|     ) -> Self { | ||||
|         let only_additional_fields = match ( | ||||
|             &old_settings.user_defined_searchable_fields, | ||||
|             &new_settings.user_defined_searchable_fields, | ||||
|             &old_settings.user_defined_searchable_attributes, | ||||
|             &new_settings.user_defined_searchable_attributes, | ||||
|         ) { | ||||
|             (None, None) | (Some(_), None) | (None, Some(_)) => None, // None means * | ||||
|             (Some(old), Some(new)) => { | ||||
| @@ -1342,14 +1325,14 @@ impl InnerIndexSettingsDiff { | ||||
|                 || old_settings.dictionary != new_settings.dictionary | ||||
|                 || old_settings.proximity_precision != new_settings.proximity_precision | ||||
|                 || old_settings.prefix_search != new_settings.prefix_search | ||||
|                 || old_settings.localized_searchable_fields_ids | ||||
|                     != new_settings.localized_searchable_fields_ids | ||||
|                 || old_settings.localized_attributes_rules | ||||
|                     != new_settings.localized_attributes_rules | ||||
|         }; | ||||
|  | ||||
|         let cache_exact_attributes = old_settings.exact_attributes != new_settings.exact_attributes; | ||||
|  | ||||
|         let cache_user_defined_searchables = old_settings.user_defined_searchable_fields | ||||
|             != new_settings.user_defined_searchable_fields; | ||||
|         let cache_user_defined_searchables = old_settings.user_defined_searchable_attributes | ||||
|             != new_settings.user_defined_searchable_attributes; | ||||
|  | ||||
|         // if the user-defined searchables changed, then we need to reindex prompts. | ||||
|         if cache_user_defined_searchables { | ||||
| @@ -1432,30 +1415,70 @@ impl InnerIndexSettingsDiff { | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     /// List the faceted fields from the inner fid map. | ||||
|     /// This is used to list the faceted fields when we are reindexing, | ||||
|     /// but it can't be used in document addition because the field id map must be exhaustive. | ||||
|     pub fn list_faceted_fields_from_fid_map(&self, del_add: DelAdd) -> BTreeSet<FieldId> { | ||||
|         let settings = match del_add { | ||||
|             DelAdd::Deletion => &self.old, | ||||
|             DelAdd::Addition => &self.new, | ||||
|         }; | ||||
|  | ||||
|         settings | ||||
|             .fields_ids_map | ||||
|             .iter_id_metadata() | ||||
|             .filter(|(_, metadata)| metadata.is_faceted(&settings.filterable_attributes_rules)) | ||||
|             .map(|(id, _)| id) | ||||
|             .collect() | ||||
|     } | ||||
|  | ||||
|     pub fn facet_fids_changed(&self) -> bool { | ||||
|         let existing_fields = &self.new.existing_fields; | ||||
|         if existing_fields.iter().any(|field| field.contains('.')) { | ||||
|             return true; | ||||
|         for eob in merge_join_by( | ||||
|             self.old.fields_ids_map.iter().filter(|(_, _, metadata)| { | ||||
|                 metadata.is_faceted(&self.old.filterable_attributes_rules) | ||||
|             }), | ||||
|             self.new.fields_ids_map.iter().filter(|(_, _, metadata)| { | ||||
|                 metadata.is_faceted(&self.new.filterable_attributes_rules) | ||||
|             }), | ||||
|             |(old_fid, _, _), (new_fid, _, _)| old_fid.cmp(new_fid), | ||||
|         ) { | ||||
|             match eob { | ||||
|                 // If there is a difference, we need to reindex facet databases. | ||||
|                 EitherOrBoth::Left(_) | EitherOrBoth::Right(_) => return true, | ||||
|                 // If the field is faceted in both old and new settings, we check the facet-searchable and facet level database. | ||||
|                 EitherOrBoth::Both((_, _, old_metadata), (_, _, new_metadata)) => { | ||||
|                     // Check if the field is facet-searchable in the old and new settings. | ||||
|                     // If there is a difference, we need to reindex facet-search database. | ||||
|                     let old_filterable_features = old_metadata | ||||
|                         .filterable_attributes_features(&self.old.filterable_attributes_rules); | ||||
|                     let new_filterable_features = new_metadata | ||||
|                         .filterable_attributes_features(&self.new.filterable_attributes_rules); | ||||
|                     let is_old_facet_searchable = | ||||
|                         old_filterable_features.is_facet_searchable() && self.old.facet_search; | ||||
|                     let is_new_facet_searchable = | ||||
|                         new_filterable_features.is_facet_searchable() && self.new.facet_search; | ||||
|                     if is_old_facet_searchable != is_new_facet_searchable { | ||||
|                         return true; | ||||
|                     } | ||||
|  | ||||
|                     // Check if the field needs a facet level database in the old and new settings. | ||||
|                     // If there is a difference, we need to reindex facet level databases. | ||||
|                     let old_facet_level_database = old_metadata | ||||
|                         .require_facet_level_database(&self.old.filterable_attributes_rules); | ||||
|                     let new_facet_level_database = new_metadata | ||||
|                         .require_facet_level_database(&self.new.filterable_attributes_rules); | ||||
|                     if old_facet_level_database != new_facet_level_database { | ||||
|                         return true; | ||||
|                     } | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         let old_faceted_fields = &self.old.user_defined_faceted_fields; | ||||
|         if old_faceted_fields.iter().any(|field| field.contains('.')) { | ||||
|             return true; | ||||
|         } | ||||
|  | ||||
|         // If there is new faceted fields we indicate that we must reindex as we must | ||||
|         // index new fields as facets. It means that the distinct attribute, | ||||
|         // an Asc/Desc criterion or a filtered attribute as be added or removed. | ||||
|         let new_faceted_fields = &self.new.user_defined_faceted_fields; | ||||
|         if new_faceted_fields.iter().any(|field| field.contains('.')) { | ||||
|             return true; | ||||
|         } | ||||
|  | ||||
|         (existing_fields - old_faceted_fields) != (existing_fields - new_faceted_fields) | ||||
|         false | ||||
|     } | ||||
|  | ||||
|     pub fn global_facet_settings_changed(&self) -> bool { | ||||
|         self.old.localized_faceted_fields_ids != self.new.localized_faceted_fields_ids | ||||
|         self.old.localized_attributes_rules != self.new.localized_attributes_rules | ||||
|             || self.old.facet_search != self.new.facet_search | ||||
|     } | ||||
|  | ||||
| @@ -1475,10 +1498,6 @@ impl InnerIndexSettingsDiff { | ||||
|         self.old.geo_fields_ids != self.new.geo_fields_ids | ||||
|             || (!self.settings_update_only && self.new.geo_fields_ids.is_some()) | ||||
|     } | ||||
|  | ||||
|     pub fn modified_faceted_fields(&self) -> HashSet<String> { | ||||
|         &self.old.user_defined_faceted_fields ^ &self.new.user_defined_faceted_fields | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[derive(Clone)] | ||||
| @@ -1486,20 +1505,17 @@ pub(crate) struct InnerIndexSettings { | ||||
|     pub stop_words: Option<fst::Set<Vec<u8>>>, | ||||
|     pub allowed_separators: Option<BTreeSet<String>>, | ||||
|     pub dictionary: Option<BTreeSet<String>>, | ||||
|     pub fields_ids_map: FieldsIdsMap, | ||||
|     pub user_defined_faceted_fields: HashSet<String>, | ||||
|     pub user_defined_searchable_fields: Option<Vec<String>>, | ||||
|     pub faceted_fields_ids: HashSet<FieldId>, | ||||
|     pub searchable_fields_ids: Vec<FieldId>, | ||||
|     pub fields_ids_map: FieldIdMapWithMetadata, | ||||
|     pub localized_attributes_rules: Vec<LocalizedAttributesRule>, | ||||
|     pub filterable_attributes_rules: Vec<FilterableAttributesRule>, | ||||
|     pub asc_desc_fields: HashSet<String>, | ||||
|     pub distinct_field: Option<String>, | ||||
|     pub user_defined_searchable_attributes: Option<Vec<String>>, | ||||
|     pub sortable_fields: HashSet<String>, | ||||
|     pub exact_attributes: HashSet<FieldId>, | ||||
|     pub proximity_precision: ProximityPrecision, | ||||
|     pub embedding_configs: EmbeddingConfigs, | ||||
|     pub existing_fields: HashSet<String>, | ||||
|     pub geo_fields_ids: Option<(FieldId, FieldId)>, | ||||
|     pub non_searchable_fields_ids: Vec<FieldId>, | ||||
|     pub non_faceted_fields_ids: Vec<FieldId>, | ||||
|     pub localized_searchable_fields_ids: LocalizedFieldIds, | ||||
|     pub localized_faceted_fields_ids: LocalizedFieldIds, | ||||
|     pub prefix_search: PrefixSearch, | ||||
|     pub facet_search: bool, | ||||
| } | ||||
| @@ -1515,12 +1531,6 @@ impl InnerIndexSettings { | ||||
|         let allowed_separators = index.allowed_separators(rtxn)?; | ||||
|         let dictionary = index.dictionary(rtxn)?; | ||||
|         let mut fields_ids_map = index.fields_ids_map(rtxn)?; | ||||
|         let user_defined_searchable_fields = index.user_defined_searchable_fields(rtxn)?; | ||||
|         let user_defined_searchable_fields = | ||||
|             user_defined_searchable_fields.map(|sf| sf.into_iter().map(String::from).collect()); | ||||
|         let user_defined_faceted_fields = index.user_defined_faceted_fields(rtxn)?; | ||||
|         let mut searchable_fields_ids = index.searchable_fields_ids(rtxn)?; | ||||
|         let mut faceted_fields_ids = index.faceted_fields_ids(rtxn)?; | ||||
|         let exact_attributes = index.exact_attributes_ids(rtxn)?; | ||||
|         let proximity_precision = index.proximity_precision(rtxn)?.unwrap_or_default(); | ||||
|         let embedding_configs = match embedding_configs { | ||||
| @@ -1529,87 +1539,57 @@ impl InnerIndexSettings { | ||||
|         }; | ||||
|         let prefix_search = index.prefix_search(rtxn)?.unwrap_or_default(); | ||||
|         let facet_search = index.facet_search(rtxn)?; | ||||
|         let existing_fields: HashSet<_> = index | ||||
|             .field_distribution(rtxn)? | ||||
|             .into_iter() | ||||
|             .filter_map(|(field, count)| (count != 0).then_some(field)) | ||||
|             .collect(); | ||||
|         // index.fields_ids_map($a)? ==>> fields_ids_map | ||||
|         let geo_fields_ids = match fields_ids_map.id(RESERVED_GEO_FIELD_NAME) { | ||||
|             Some(gfid) => { | ||||
|                 let is_sortable = index.sortable_fields_ids(rtxn)?.contains(&gfid); | ||||
|                 let is_filterable = index.filterable_fields_ids(rtxn)?.contains(&gfid); | ||||
|             Some(_) if index.is_geo_enabled(rtxn)? => { | ||||
|                 // if `_geo` is faceted then we get the `lat` and `lng` | ||||
|                 if is_sortable || is_filterable { | ||||
|                     let field_ids = fields_ids_map | ||||
|                         .insert("_geo.lat") | ||||
|                         .zip(fields_ids_map.insert("_geo.lng")) | ||||
|                         .ok_or(UserError::AttributeLimitReached)?; | ||||
|                     Some(field_ids) | ||||
|                 } else { | ||||
|                     None | ||||
|                 } | ||||
|                 let field_ids = fields_ids_map | ||||
|                     .insert("_geo.lat") | ||||
|                     .zip(fields_ids_map.insert("_geo.lng")) | ||||
|                     .ok_or(UserError::AttributeLimitReached)?; | ||||
|                 Some(field_ids) | ||||
|             } | ||||
|             None => None, | ||||
|             _ => None, | ||||
|         }; | ||||
|         let localized_attributes_rules = index.localized_attributes_rules(rtxn)?; | ||||
|         let localized_searchable_fields_ids = LocalizedFieldIds::new( | ||||
|             &localized_attributes_rules, | ||||
|             &fields_ids_map, | ||||
|             searchable_fields_ids.iter().cloned(), | ||||
|         ); | ||||
|         let localized_faceted_fields_ids = LocalizedFieldIds::new( | ||||
|             &localized_attributes_rules, | ||||
|             &fields_ids_map, | ||||
|             faceted_fields_ids.iter().cloned(), | ||||
|         ); | ||||
|  | ||||
|         let vectors_fids = fields_ids_map.nested_ids(RESERVED_VECTORS_FIELD_NAME); | ||||
|         searchable_fields_ids.retain(|id| !vectors_fids.contains(id)); | ||||
|         faceted_fields_ids.retain(|id| !vectors_fids.contains(id)); | ||||
|         let localized_attributes_rules = | ||||
|             index.localized_attributes_rules(rtxn)?.unwrap_or_default(); | ||||
|         let filterable_attributes_rules = index.filterable_attributes_rules(rtxn)?; | ||||
|         let sortable_fields = index.sortable_fields(rtxn)?; | ||||
|         let asc_desc_fields = index.asc_desc_fields(rtxn)?; | ||||
|         let distinct_field = index.distinct_field(rtxn)?.map(|f| f.to_string()); | ||||
|         let user_defined_searchable_attributes = index | ||||
|             .user_defined_searchable_fields(rtxn)? | ||||
|             .map(|fields| fields.into_iter().map(|f| f.to_string()).collect()); | ||||
|         let builder = MetadataBuilder::from_index(index, rtxn)?; | ||||
|         let fields_ids_map = FieldIdMapWithMetadata::new(fields_ids_map, builder); | ||||
|  | ||||
|         Ok(Self { | ||||
|             stop_words, | ||||
|             allowed_separators, | ||||
|             dictionary, | ||||
|             fields_ids_map, | ||||
|             user_defined_faceted_fields, | ||||
|             user_defined_searchable_fields, | ||||
|             faceted_fields_ids, | ||||
|             searchable_fields_ids, | ||||
|             localized_attributes_rules, | ||||
|             filterable_attributes_rules, | ||||
|             asc_desc_fields, | ||||
|             distinct_field, | ||||
|             user_defined_searchable_attributes, | ||||
|             sortable_fields, | ||||
|             exact_attributes, | ||||
|             proximity_precision, | ||||
|             embedding_configs, | ||||
|             existing_fields, | ||||
|             geo_fields_ids, | ||||
|             non_searchable_fields_ids: vectors_fids.clone(), | ||||
|             non_faceted_fields_ids: vectors_fids.clone(), | ||||
|             localized_searchable_fields_ids, | ||||
|             localized_faceted_fields_ids, | ||||
|             prefix_search, | ||||
|             facet_search, | ||||
|         }) | ||||
|     } | ||||
|  | ||||
|     // find and insert the new field ids | ||||
|     pub fn recompute_facets(&mut self, wtxn: &mut heed::RwTxn<'_>, index: &Index) -> Result<()> { | ||||
|         let new_facets = self | ||||
|             .fields_ids_map | ||||
|             .iter() | ||||
|             .filter(|(fid, _field)| !self.non_faceted_fields_ids.contains(fid)) | ||||
|             .filter(|(_fid, field)| crate::is_faceted(field, &self.user_defined_faceted_fields)) | ||||
|             .map(|(_fid, field)| field.to_string()) | ||||
|             .collect(); | ||||
|         index.put_faceted_fields(wtxn, &new_facets)?; | ||||
|  | ||||
|         self.faceted_fields_ids = index.faceted_fields_ids(wtxn)?; | ||||
|         let localized_attributes_rules = index.localized_attributes_rules(wtxn)?; | ||||
|         self.localized_faceted_fields_ids = LocalizedFieldIds::new( | ||||
|             &localized_attributes_rules, | ||||
|             &self.fields_ids_map, | ||||
|             self.faceted_fields_ids.iter().cloned(), | ||||
|         ); | ||||
|         Ok(()) | ||||
|     pub fn match_faceted_field(&self, field: &str) -> PatternMatch { | ||||
|         match_faceted_field( | ||||
|             field, | ||||
|             &self.filterable_attributes_rules, | ||||
|             &self.sortable_fields, | ||||
|             &self.asc_desc_fields, | ||||
|             &self.distinct_field, | ||||
|         ) | ||||
|     } | ||||
|  | ||||
|     // find and insert the new field ids | ||||
| @@ -1619,7 +1599,7 @@ impl InnerIndexSettings { | ||||
|         index: &Index, | ||||
|     ) -> Result<()> { | ||||
|         let searchable_fields = self | ||||
|             .user_defined_searchable_fields | ||||
|             .user_defined_searchable_attributes | ||||
|             .as_ref() | ||||
|             .map(|searchable| searchable.iter().map(|s| s.as_str()).collect::<Vec<_>>()); | ||||
|  | ||||
| @@ -1628,17 +1608,9 @@ impl InnerIndexSettings { | ||||
|             index.put_all_searchable_fields_from_fields_ids_map( | ||||
|                 wtxn, | ||||
|                 &searchable_fields, | ||||
|                 &self.non_searchable_fields_ids, | ||||
|                 &self.fields_ids_map, | ||||
|             )?; | ||||
|         } | ||||
|         self.searchable_fields_ids = index.searchable_fields_ids(wtxn)?; | ||||
|         let localized_attributes_rules = index.localized_attributes_rules(wtxn)?; | ||||
|         self.localized_searchable_fields_ids = LocalizedFieldIds::new( | ||||
|             &localized_attributes_rules, | ||||
|             &self.fields_ids_map, | ||||
|             self.searchable_fields_ids.iter().cloned(), | ||||
|         ); | ||||
|  | ||||
|         Ok(()) | ||||
|     } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user