mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-11-04 09:56:28 +00:00 
			
		
		
		
	Refactor Settings Indexing process
**Changes:** The transform structure is now relying on FieldIdMapWithMetadata and AttributePatterns to prepare the obkv documents during a settings reindexing. The InnerIndexSettingsDiff and InnerIndexSettings structs are now relying on FieldIdMapWithMetadata, FilterableAttributesRule and AttributePatterns to define the field and the databases that should be reindexed. The faceted_fields_ids, localized_searchable_fields_ids and localized_faceted_fields_ids have been removed in favor of the FieldIdMapWithMetadata. We are now relying on the FieldIdMapWithMetadata to retain vectors_fids from the facets and the searchables. The searchable database computing is now relying on the FieldIdMapWithMetadata to know if a field is searchable and retrieve the locales. The facet database computing is now relying on the FieldIdMapWithMetadata to compute the facet databases, the facet-search and retrieve the locales. The facet level database computing is now relying on the FieldIdMapWithMetadata and the facet level database are cleared depending on the settings differences (clear_facet_levels_based_on_settings_diff). The vector point extraction uses the FieldIdMapWithMetadata instead of FieldsIdsMapWithMetadata. **Impact:** - Dump import - Settings update
This commit is contained in:
		@@ -81,6 +81,17 @@ pub enum DelAddOperation {
 | 
			
		||||
    DeletionAndAddition,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
impl DelAddOperation {
 | 
			
		||||
    /// Merge two DelAddOperation enum variants.
 | 
			
		||||
    pub fn merge(self, other: Self) -> Self {
 | 
			
		||||
        match (self, other) {
 | 
			
		||||
            (Self::Deletion, Self::Deletion) => Self::Deletion,
 | 
			
		||||
            (Self::Addition, Self::Addition) => Self::Addition,
 | 
			
		||||
            _ => Self::DeletionAndAddition,
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/// Creates a Kv<K, Kv<DelAdd, value>> from two Kv<K, value>
 | 
			
		||||
///
 | 
			
		||||
/// putting each deletion obkv's keys under an DelAdd::Deletion
 | 
			
		||||
 
 | 
			
		||||
@@ -6,7 +6,7 @@ use heed::types::Bytes;
 | 
			
		||||
use heed::{BytesDecode, BytesEncode, Error, PutFlags, RoTxn, RwTxn};
 | 
			
		||||
use roaring::RoaringBitmap;
 | 
			
		||||
 | 
			
		||||
use super::{FACET_GROUP_SIZE, FACET_MIN_LEVEL_SIZE};
 | 
			
		||||
use super::{clear_facet_levels, FACET_GROUP_SIZE, FACET_MIN_LEVEL_SIZE};
 | 
			
		||||
use crate::facet::FacetType;
 | 
			
		||||
use crate::heed_codec::facet::{
 | 
			
		||||
    FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec,
 | 
			
		||||
@@ -97,9 +97,7 @@ pub(crate) struct FacetsUpdateBulkInner<R: std::io::Read + std::io::Seek> {
 | 
			
		||||
impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
 | 
			
		||||
    pub fn update(mut self, wtxn: &mut RwTxn<'_>, field_ids: &[u16]) -> Result<()> {
 | 
			
		||||
        self.update_level0(wtxn)?;
 | 
			
		||||
        for &field_id in field_ids.iter() {
 | 
			
		||||
            self.clear_levels(wtxn, field_id)?;
 | 
			
		||||
        }
 | 
			
		||||
        clear_facet_levels(wtxn, &self.db.remap_data_type(), field_ids)?;
 | 
			
		||||
 | 
			
		||||
        for &field_id in field_ids.iter() {
 | 
			
		||||
            let level_readers = self.compute_levels_for_field_id(field_id, wtxn)?;
 | 
			
		||||
@@ -114,14 +112,6 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
 | 
			
		||||
        Ok(())
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    fn clear_levels(&self, wtxn: &mut heed::RwTxn<'_>, field_id: FieldId) -> Result<()> {
 | 
			
		||||
        let left = FacetGroupKey::<&[u8]> { field_id, level: 1, left_bound: &[] };
 | 
			
		||||
        let right = FacetGroupKey::<&[u8]> { field_id, level: u8::MAX, left_bound: &[] };
 | 
			
		||||
        let range = left..=right;
 | 
			
		||||
        self.db.delete_range(wtxn, &range).map(drop)?;
 | 
			
		||||
        Ok(())
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    fn update_level0(&mut self, wtxn: &mut RwTxn<'_>) -> Result<()> {
 | 
			
		||||
        let delta_data = match self.delta_data.take() {
 | 
			
		||||
            Some(x) => x,
 | 
			
		||||
@@ -365,8 +355,6 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
 | 
			
		||||
mod tests {
 | 
			
		||||
    use std::iter::once;
 | 
			
		||||
 | 
			
		||||
    use big_s::S;
 | 
			
		||||
    use maplit::hashset;
 | 
			
		||||
    use roaring::RoaringBitmap;
 | 
			
		||||
 | 
			
		||||
    use crate::documents::mmap_from_objects;
 | 
			
		||||
@@ -374,7 +362,7 @@ mod tests {
 | 
			
		||||
    use crate::heed_codec::StrRefCodec;
 | 
			
		||||
    use crate::index::tests::TempIndex;
 | 
			
		||||
    use crate::update::facet::test_helpers::{ordered_string, FacetIndex};
 | 
			
		||||
    use crate::{db_snap, milli_snap};
 | 
			
		||||
    use crate::{db_snap, milli_snap, FilterableAttributesRule};
 | 
			
		||||
 | 
			
		||||
    #[test]
 | 
			
		||||
    fn insert() {
 | 
			
		||||
@@ -474,7 +462,8 @@ mod tests {
 | 
			
		||||
        index
 | 
			
		||||
            .update_settings(|settings| {
 | 
			
		||||
                settings.set_primary_key("id".to_owned());
 | 
			
		||||
                settings.set_filterable_fields(hashset! { S("id") });
 | 
			
		||||
                settings
 | 
			
		||||
                    .set_filterable_fields(vec![FilterableAttributesRule::Field("id".to_string())]);
 | 
			
		||||
            })
 | 
			
		||||
            .unwrap();
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -89,6 +89,7 @@ use time::OffsetDateTime;
 | 
			
		||||
use tracing::debug;
 | 
			
		||||
 | 
			
		||||
use self::incremental::FacetsUpdateIncremental;
 | 
			
		||||
use super::settings::{InnerIndexSettings, InnerIndexSettingsDiff};
 | 
			
		||||
use super::{FacetsUpdateBulk, MergeDeladdBtreesetString, MergeDeladdCboRoaringBitmaps};
 | 
			
		||||
use crate::facet::FacetType;
 | 
			
		||||
use crate::heed_codec::facet::{
 | 
			
		||||
@@ -147,7 +148,11 @@ impl<'i> FacetsUpdate<'i> {
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    pub fn execute(self, wtxn: &mut heed::RwTxn<'_>) -> Result<()> {
 | 
			
		||||
    pub fn execute(
 | 
			
		||||
        self,
 | 
			
		||||
        wtxn: &mut heed::RwTxn<'_>,
 | 
			
		||||
        new_settings: &InnerIndexSettings,
 | 
			
		||||
    ) -> Result<()> {
 | 
			
		||||
        if self.data_size == 0 {
 | 
			
		||||
            return Ok(());
 | 
			
		||||
        }
 | 
			
		||||
@@ -156,8 +161,7 @@ impl<'i> FacetsUpdate<'i> {
 | 
			
		||||
 | 
			
		||||
        // See self::comparison_bench::benchmark_facet_indexing
 | 
			
		||||
        if self.data_size >= (self.database.len(wtxn)? / 500) {
 | 
			
		||||
            let field_ids =
 | 
			
		||||
                self.index.faceted_fields_ids(wtxn)?.iter().copied().collect::<Vec<_>>();
 | 
			
		||||
            let field_ids = facet_levels_field_ids(new_settings);
 | 
			
		||||
            let bulk_update = FacetsUpdateBulk::new(
 | 
			
		||||
                self.index,
 | 
			
		||||
                field_ids,
 | 
			
		||||
@@ -291,6 +295,53 @@ fn index_facet_search(
 | 
			
		||||
    Ok(())
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/// Clear all the levels greater than 0 for given field ids.
 | 
			
		||||
pub fn clear_facet_levels<'a, I>(
 | 
			
		||||
    wtxn: &mut heed::RwTxn<'_>,
 | 
			
		||||
    db: &heed::Database<FacetGroupKeyCodec<BytesRefCodec>, DecodeIgnore>,
 | 
			
		||||
    field_ids: I,
 | 
			
		||||
) -> Result<()>
 | 
			
		||||
where
 | 
			
		||||
    I: IntoIterator<Item = &'a FieldId>,
 | 
			
		||||
{
 | 
			
		||||
    for field_id in field_ids {
 | 
			
		||||
        let field_id = *field_id;
 | 
			
		||||
        let left = FacetGroupKey::<&[u8]> { field_id, level: 1, left_bound: &[] };
 | 
			
		||||
        let right = FacetGroupKey::<&[u8]> { field_id, level: u8::MAX, left_bound: &[] };
 | 
			
		||||
        let range = left..=right;
 | 
			
		||||
        db.delete_range(wtxn, &range).map(drop)?;
 | 
			
		||||
    }
 | 
			
		||||
    Ok(())
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
pub fn clear_facet_levels_based_on_settings_diff(
 | 
			
		||||
    wtxn: &mut heed::RwTxn<'_>,
 | 
			
		||||
    index: &Index,
 | 
			
		||||
    settings_diff: &InnerIndexSettingsDiff,
 | 
			
		||||
) -> Result<()> {
 | 
			
		||||
    let new_field_ids: BTreeSet<_> = facet_levels_field_ids(&settings_diff.new);
 | 
			
		||||
    let old_field_ids: BTreeSet<_> = facet_levels_field_ids(&settings_diff.old);
 | 
			
		||||
 | 
			
		||||
    let field_ids_to_clear: Vec<_> = old_field_ids.difference(&new_field_ids).copied().collect();
 | 
			
		||||
    clear_facet_levels(wtxn, &index.facet_id_string_docids.remap_types(), &field_ids_to_clear)?;
 | 
			
		||||
    clear_facet_levels(wtxn, &index.facet_id_f64_docids.remap_types(), &field_ids_to_clear)?;
 | 
			
		||||
    Ok(())
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
fn facet_levels_field_ids<B>(settings: &InnerIndexSettings) -> B
 | 
			
		||||
where
 | 
			
		||||
    B: FromIterator<FieldId>,
 | 
			
		||||
{
 | 
			
		||||
    settings
 | 
			
		||||
        .fields_ids_map
 | 
			
		||||
        .iter_id_metadata()
 | 
			
		||||
        .filter(|(_, metadata)| {
 | 
			
		||||
            metadata.require_facet_level_database(&settings.filterable_attributes_rules)
 | 
			
		||||
        })
 | 
			
		||||
        .map(|(id, _)| id)
 | 
			
		||||
        .collect()
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#[cfg(test)]
 | 
			
		||||
pub(crate) mod test_helpers {
 | 
			
		||||
    use std::cell::Cell;
 | 
			
		||||
 
 | 
			
		||||
@@ -95,12 +95,7 @@ pub fn enrich_documents_batch<R: Read + Seek>(
 | 
			
		||||
    // If the settings specifies that a _geo field must be used therefore we must check the
 | 
			
		||||
    // validity of it in all the documents of this batch and this is when we return `Some`.
 | 
			
		||||
    let geo_field_id = match documents_batch_index.id(RESERVED_GEO_FIELD_NAME) {
 | 
			
		||||
        Some(geo_field_id)
 | 
			
		||||
            if index.sortable_fields(rtxn)?.contains(RESERVED_GEO_FIELD_NAME)
 | 
			
		||||
                || index.filterable_fields(rtxn)?.contains(RESERVED_GEO_FIELD_NAME) =>
 | 
			
		||||
        {
 | 
			
		||||
            Some(geo_field_id)
 | 
			
		||||
        }
 | 
			
		||||
        Some(geo_field_id) if index.is_geo_enabled(rtxn)? => Some(geo_field_id),
 | 
			
		||||
        _otherwise => None,
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -150,9 +150,14 @@ fn searchable_fields_changed(
 | 
			
		||||
    obkv: &KvReader<FieldId>,
 | 
			
		||||
    settings_diff: &InnerIndexSettingsDiff,
 | 
			
		||||
) -> bool {
 | 
			
		||||
    let searchable_fields = &settings_diff.new.searchable_fields_ids;
 | 
			
		||||
    for (field_id, field_bytes) in obkv.iter() {
 | 
			
		||||
        if searchable_fields.contains(&field_id) {
 | 
			
		||||
        let Some(metadata) = settings_diff.new.fields_ids_map.metadata(field_id) else {
 | 
			
		||||
            // If the field id is not in the fields ids map, skip it.
 | 
			
		||||
            // This happens for the vectors sub-fields. for example:
 | 
			
		||||
            // "_vectors": { "manual": [1, 2, 3]} -> "_vectors.manual" is not registered.
 | 
			
		||||
            continue;
 | 
			
		||||
        };
 | 
			
		||||
        if metadata.is_searchable() {
 | 
			
		||||
            let del_add = KvReaderDelAdd::from_slice(field_bytes);
 | 
			
		||||
            match (del_add.get(DelAdd::Deletion), del_add.get(DelAdd::Addition)) {
 | 
			
		||||
                // if both fields are None, check the next field.
 | 
			
		||||
@@ -200,8 +205,14 @@ fn tokens_from_document<'a>(
 | 
			
		||||
    buffers.obkv_buffer.clear();
 | 
			
		||||
    let mut document_writer = KvWriterU16::new(&mut buffers.obkv_buffer);
 | 
			
		||||
    for (field_id, field_bytes) in obkv.iter() {
 | 
			
		||||
        let Some(metadata) = settings.fields_ids_map.metadata(field_id) else {
 | 
			
		||||
            // If the field id is not in the fields ids map, skip it.
 | 
			
		||||
            // This happens for the vectors sub-fields. for example:
 | 
			
		||||
            // "_vectors": { "manual": [1, 2, 3]} -> "_vectors.manual" is not registered.
 | 
			
		||||
            continue;
 | 
			
		||||
        };
 | 
			
		||||
        // if field is searchable.
 | 
			
		||||
        if settings.searchable_fields_ids.contains(&field_id) {
 | 
			
		||||
        if metadata.is_searchable() {
 | 
			
		||||
            // extract deletion or addition only.
 | 
			
		||||
            if let Some(field_bytes) = KvReaderDelAdd::from_slice(field_bytes).get(del_add) {
 | 
			
		||||
                // parse json.
 | 
			
		||||
@@ -216,7 +227,7 @@ fn tokens_from_document<'a>(
 | 
			
		||||
                buffers.field_buffer.clear();
 | 
			
		||||
                if let Some(field) = json_to_string(&value, &mut buffers.field_buffer) {
 | 
			
		||||
                    // create an iterator of token with their positions.
 | 
			
		||||
                    let locales = settings.localized_searchable_fields_ids.locales(field_id);
 | 
			
		||||
                    let locales = metadata.locales(&settings.localized_attributes_rules);
 | 
			
		||||
                    let tokens = process_tokens(tokenizer.tokenize_with_allow_list(field, locales))
 | 
			
		||||
                        .take_while(|(p, _)| (*p as u32) < max_positions_per_attributes);
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -12,12 +12,11 @@ use heed::BytesEncode;
 | 
			
		||||
use super::helpers::{create_sorter, sorter_into_reader, try_split_array_at, GrenadParameters};
 | 
			
		||||
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec};
 | 
			
		||||
use crate::heed_codec::{BEU16StrCodec, StrRefCodec};
 | 
			
		||||
use crate::localized_attributes_rules::LocalizedFieldIds;
 | 
			
		||||
use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd};
 | 
			
		||||
use crate::update::index_documents::helpers::{
 | 
			
		||||
    MergeDeladdBtreesetString, MergeDeladdCboRoaringBitmaps,
 | 
			
		||||
};
 | 
			
		||||
use crate::update::settings::InnerIndexSettingsDiff;
 | 
			
		||||
use crate::update::settings::{InnerIndexSettings, InnerIndexSettingsDiff};
 | 
			
		||||
use crate::{FieldId, Result, MAX_FACET_VALUE_LENGTH};
 | 
			
		||||
 | 
			
		||||
/// Extracts the facet string and the documents ids where this facet string appear.
 | 
			
		||||
@@ -33,13 +32,10 @@ pub fn extract_facet_string_docids<R: io::Read + io::Seek>(
 | 
			
		||||
    if settings_diff.settings_update_only() {
 | 
			
		||||
        extract_facet_string_docids_settings(docid_fid_facet_string, indexer, settings_diff)
 | 
			
		||||
    } else {
 | 
			
		||||
        let localized_field_ids = &settings_diff.new.localized_faceted_fields_ids;
 | 
			
		||||
        let facet_search = settings_diff.new.facet_search;
 | 
			
		||||
        extract_facet_string_docids_document_update(
 | 
			
		||||
            docid_fid_facet_string,
 | 
			
		||||
            indexer,
 | 
			
		||||
            localized_field_ids,
 | 
			
		||||
            facet_search,
 | 
			
		||||
            &settings_diff.new,
 | 
			
		||||
        )
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
@@ -52,8 +48,7 @@ pub fn extract_facet_string_docids<R: io::Read + io::Seek>(
 | 
			
		||||
fn extract_facet_string_docids_document_update<R: io::Read + io::Seek>(
 | 
			
		||||
    docid_fid_facet_string: grenad::Reader<R>,
 | 
			
		||||
    indexer: GrenadParameters,
 | 
			
		||||
    localized_field_ids: &LocalizedFieldIds,
 | 
			
		||||
    facet_search: bool,
 | 
			
		||||
    settings: &InnerIndexSettings,
 | 
			
		||||
) -> Result<(grenad::Reader<BufReader<File>>, grenad::Reader<BufReader<File>>)> {
 | 
			
		||||
    let max_memory = indexer.max_memory_by_thread();
 | 
			
		||||
 | 
			
		||||
@@ -92,6 +87,14 @@ fn extract_facet_string_docids_document_update<R: io::Read + io::Seek>(
 | 
			
		||||
        let (field_id_bytes, bytes) = try_split_array_at(key).unwrap();
 | 
			
		||||
        let field_id = FieldId::from_be_bytes(field_id_bytes);
 | 
			
		||||
 | 
			
		||||
        let Some(metadata) = settings.fields_ids_map.metadata(field_id) else {
 | 
			
		||||
            unreachable!("metadata not found for field_id: {}", field_id)
 | 
			
		||||
        };
 | 
			
		||||
 | 
			
		||||
        if !metadata.is_faceted(&settings.filterable_attributes_rules) {
 | 
			
		||||
            continue;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        let (document_id_bytes, normalized_value_bytes) =
 | 
			
		||||
            try_split_array_at::<_, 4>(bytes).unwrap();
 | 
			
		||||
        let document_id = u32::from_be_bytes(document_id_bytes);
 | 
			
		||||
@@ -99,8 +102,10 @@ fn extract_facet_string_docids_document_update<R: io::Read + io::Seek>(
 | 
			
		||||
        let normalized_value = str::from_utf8(normalized_value_bytes)?;
 | 
			
		||||
 | 
			
		||||
        // Facet search normalization
 | 
			
		||||
        if facet_search {
 | 
			
		||||
            let locales = localized_field_ids.locales(field_id);
 | 
			
		||||
        let features =
 | 
			
		||||
            metadata.filterable_attributes_features(&settings.filterable_attributes_rules);
 | 
			
		||||
        if features.is_facet_searchable() {
 | 
			
		||||
            let locales = metadata.locales(&settings.localized_attributes_rules);
 | 
			
		||||
            let hyper_normalized_value = normalize_facet_string(normalized_value, locales);
 | 
			
		||||
 | 
			
		||||
            let set = BTreeSet::from_iter(std::iter::once(normalized_value));
 | 
			
		||||
@@ -178,8 +183,15 @@ fn extract_facet_string_docids_settings<R: io::Read + io::Seek>(
 | 
			
		||||
        let (field_id_bytes, bytes) = try_split_array_at(key).unwrap();
 | 
			
		||||
        let field_id = FieldId::from_be_bytes(field_id_bytes);
 | 
			
		||||
 | 
			
		||||
        let old_locales = settings_diff.old.localized_faceted_fields_ids.locales(field_id);
 | 
			
		||||
        let new_locales = settings_diff.new.localized_faceted_fields_ids.locales(field_id);
 | 
			
		||||
        let Some(old_metadata) = settings_diff.old.fields_ids_map.metadata(field_id) else {
 | 
			
		||||
            unreachable!("old metadata not found for field_id: {}", field_id)
 | 
			
		||||
        };
 | 
			
		||||
        let Some(new_metadata) = settings_diff.new.fields_ids_map.metadata(field_id) else {
 | 
			
		||||
            unreachable!("new metadata not found for field_id: {}", field_id)
 | 
			
		||||
        };
 | 
			
		||||
 | 
			
		||||
        let old_locales = old_metadata.locales(&settings_diff.old.localized_attributes_rules);
 | 
			
		||||
        let new_locales = new_metadata.locales(&settings_diff.new.localized_attributes_rules);
 | 
			
		||||
 | 
			
		||||
        let are_same_locales = old_locales == new_locales;
 | 
			
		||||
        let reindex_facet_search =
 | 
			
		||||
@@ -197,10 +209,15 @@ fn extract_facet_string_docids_settings<R: io::Read + io::Seek>(
 | 
			
		||||
 | 
			
		||||
        // Facet search normalization
 | 
			
		||||
        if settings_diff.new.facet_search {
 | 
			
		||||
            let new_filterable_features = new_metadata
 | 
			
		||||
                .filterable_attributes_features(&settings_diff.new.filterable_attributes_rules);
 | 
			
		||||
            let new_hyper_normalized_value = normalize_facet_string(normalized_value, new_locales);
 | 
			
		||||
            let old_hyper_normalized_value;
 | 
			
		||||
            let old_filterable_features = old_metadata
 | 
			
		||||
                .filterable_attributes_features(&settings_diff.old.filterable_attributes_rules);
 | 
			
		||||
            let old_hyper_normalized_value = if !settings_diff.old.facet_search
 | 
			
		||||
                || deladd_reader.get(DelAdd::Deletion).is_none()
 | 
			
		||||
                || !old_filterable_features.is_facet_searchable()
 | 
			
		||||
            {
 | 
			
		||||
                // if the facet search is disabled in the old settings or if no facet string is deleted,
 | 
			
		||||
                // we don't need to normalize the facet string.
 | 
			
		||||
@@ -215,7 +232,9 @@ fn extract_facet_string_docids_settings<R: io::Read + io::Seek>(
 | 
			
		||||
            let set = BTreeSet::from_iter(std::iter::once(normalized_value));
 | 
			
		||||
 | 
			
		||||
            // if the facet string is the same, we can put the deletion and addition in the same obkv.
 | 
			
		||||
            if old_hyper_normalized_value == Some(&new_hyper_normalized_value) {
 | 
			
		||||
            if old_hyper_normalized_value == Some(&new_hyper_normalized_value)
 | 
			
		||||
                && new_filterable_features.is_facet_searchable()
 | 
			
		||||
            {
 | 
			
		||||
                // nothing to do if we delete and re-add the value.
 | 
			
		||||
                if is_same_value {
 | 
			
		||||
                    continue;
 | 
			
		||||
@@ -249,7 +268,9 @@ fn extract_facet_string_docids_settings<R: io::Read + io::Seek>(
 | 
			
		||||
                }
 | 
			
		||||
 | 
			
		||||
                // addition
 | 
			
		||||
                if deladd_reader.get(DelAdd::Addition).is_some() {
 | 
			
		||||
                if new_filterable_features.is_facet_searchable()
 | 
			
		||||
                    && deladd_reader.get(DelAdd::Addition).is_some()
 | 
			
		||||
                {
 | 
			
		||||
                    // insert new value
 | 
			
		||||
                    let val = SerdeJson::bytes_encode(&set).map_err(heed::Error::Encoding)?;
 | 
			
		||||
                    buffer.clear();
 | 
			
		||||
 
 | 
			
		||||
@@ -76,9 +76,9 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
 | 
			
		||||
    let mut strings_key_buffer = Vec::new();
 | 
			
		||||
 | 
			
		||||
    let old_faceted_fids: BTreeSet<_> =
 | 
			
		||||
        settings_diff.old.faceted_fields_ids.iter().copied().collect();
 | 
			
		||||
        settings_diff.list_faceted_fields_from_fid_map(DelAdd::Deletion);
 | 
			
		||||
    let new_faceted_fids: BTreeSet<_> =
 | 
			
		||||
        settings_diff.new.faceted_fields_ids.iter().copied().collect();
 | 
			
		||||
        settings_diff.list_faceted_fields_from_fid_map(DelAdd::Addition);
 | 
			
		||||
 | 
			
		||||
    if !settings_diff.settings_update_only || settings_diff.reindex_facets() {
 | 
			
		||||
        let mut cursor = obkv_documents.into_cursor()?;
 | 
			
		||||
 
 | 
			
		||||
@@ -15,8 +15,9 @@ use serde_json::Value;
 | 
			
		||||
use super::helpers::{create_writer, writer_into_reader, GrenadParameters};
 | 
			
		||||
use crate::constants::RESERVED_VECTORS_FIELD_NAME;
 | 
			
		||||
use crate::error::FaultSource;
 | 
			
		||||
use crate::fields_ids_map::metadata::FieldIdMapWithMetadata;
 | 
			
		||||
use crate::index::IndexEmbeddingConfig;
 | 
			
		||||
use crate::prompt::{FieldsIdsMapWithMetadata, Prompt};
 | 
			
		||||
use crate::prompt::Prompt;
 | 
			
		||||
use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd};
 | 
			
		||||
use crate::update::settings::InnerIndexSettingsDiff;
 | 
			
		||||
use crate::vector::error::{EmbedErrorKind, PossibleEmbeddingMistakes, UnusedVectorsDistribution};
 | 
			
		||||
@@ -190,12 +191,8 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
 | 
			
		||||
    let reindex_vectors = settings_diff.reindex_vectors();
 | 
			
		||||
 | 
			
		||||
    let old_fields_ids_map = &settings_diff.old.fields_ids_map;
 | 
			
		||||
    let old_fields_ids_map =
 | 
			
		||||
        FieldsIdsMapWithMetadata::new(old_fields_ids_map, &settings_diff.old.searchable_fields_ids);
 | 
			
		||||
 | 
			
		||||
    let new_fields_ids_map = &settings_diff.new.fields_ids_map;
 | 
			
		||||
    let new_fields_ids_map =
 | 
			
		||||
        FieldsIdsMapWithMetadata::new(new_fields_ids_map, &settings_diff.new.searchable_fields_ids);
 | 
			
		||||
 | 
			
		||||
    // the vector field id may have changed
 | 
			
		||||
    let old_vectors_fid = old_fields_ids_map.id(RESERVED_VECTORS_FIELD_NAME);
 | 
			
		||||
@@ -383,7 +380,7 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
 | 
			
		||||
                            );
 | 
			
		||||
                            continue;
 | 
			
		||||
                        }
 | 
			
		||||
                        regenerate_prompt(obkv, prompt, &new_fields_ids_map)?
 | 
			
		||||
                        regenerate_prompt(obkv, prompt, new_fields_ids_map)?
 | 
			
		||||
                    }
 | 
			
		||||
                },
 | 
			
		||||
                // prompt regeneration is only triggered for existing embedders
 | 
			
		||||
@@ -400,7 +397,7 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
 | 
			
		||||
                        regenerate_if_prompt_changed(
 | 
			
		||||
                            obkv,
 | 
			
		||||
                            (old_prompt, prompt),
 | 
			
		||||
                            (&old_fields_ids_map, &new_fields_ids_map),
 | 
			
		||||
                            (old_fields_ids_map, new_fields_ids_map),
 | 
			
		||||
                        )?
 | 
			
		||||
                    } else {
 | 
			
		||||
                        // we can simply ignore user provided vectors as they are not regenerated and are
 | 
			
		||||
@@ -416,7 +413,7 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
 | 
			
		||||
                    prompt,
 | 
			
		||||
                    (add_to_user_provided, remove_from_user_provided),
 | 
			
		||||
                    (old, new),
 | 
			
		||||
                    (&old_fields_ids_map, &new_fields_ids_map),
 | 
			
		||||
                    (old_fields_ids_map, new_fields_ids_map),
 | 
			
		||||
                    document_id,
 | 
			
		||||
                    embedder_name,
 | 
			
		||||
                    embedder_is_manual,
 | 
			
		||||
@@ -486,10 +483,7 @@ fn extract_vector_document_diff(
 | 
			
		||||
    prompt: &Prompt,
 | 
			
		||||
    (add_to_user_provided, remove_from_user_provided): (&mut RoaringBitmap, &mut RoaringBitmap),
 | 
			
		||||
    (old, new): (VectorState, VectorState),
 | 
			
		||||
    (old_fields_ids_map, new_fields_ids_map): (
 | 
			
		||||
        &FieldsIdsMapWithMetadata,
 | 
			
		||||
        &FieldsIdsMapWithMetadata,
 | 
			
		||||
    ),
 | 
			
		||||
    (old_fields_ids_map, new_fields_ids_map): (&FieldIdMapWithMetadata, &FieldIdMapWithMetadata),
 | 
			
		||||
    document_id: impl Fn() -> Value,
 | 
			
		||||
    embedder_name: &str,
 | 
			
		||||
    embedder_is_manual: bool,
 | 
			
		||||
@@ -611,10 +605,7 @@ fn extract_vector_document_diff(
 | 
			
		||||
fn regenerate_if_prompt_changed(
 | 
			
		||||
    obkv: &obkv::KvReader<FieldId>,
 | 
			
		||||
    (old_prompt, new_prompt): (&Prompt, &Prompt),
 | 
			
		||||
    (old_fields_ids_map, new_fields_ids_map): (
 | 
			
		||||
        &FieldsIdsMapWithMetadata,
 | 
			
		||||
        &FieldsIdsMapWithMetadata,
 | 
			
		||||
    ),
 | 
			
		||||
    (old_fields_ids_map, new_fields_ids_map): (&FieldIdMapWithMetadata, &FieldIdMapWithMetadata),
 | 
			
		||||
) -> Result<VectorStateDelta> {
 | 
			
		||||
    let old_prompt = old_prompt
 | 
			
		||||
        .render_kvdeladd(obkv, DelAdd::Deletion, old_fields_ids_map)
 | 
			
		||||
@@ -630,7 +621,7 @@ fn regenerate_if_prompt_changed(
 | 
			
		||||
fn regenerate_prompt(
 | 
			
		||||
    obkv: &obkv::KvReader<FieldId>,
 | 
			
		||||
    prompt: &Prompt,
 | 
			
		||||
    new_fields_ids_map: &FieldsIdsMapWithMetadata,
 | 
			
		||||
    new_fields_ids_map: &FieldIdMapWithMetadata,
 | 
			
		||||
) -> Result<VectorStateDelta> {
 | 
			
		||||
    let prompt = prompt.render_kvdeladd(obkv, DelAdd::Addition, new_fields_ids_map)?;
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -26,6 +26,7 @@ use typed_chunk::{write_typed_chunk_into_index, ChunkAccumulator, TypedChunk};
 | 
			
		||||
pub use self::enrich::{extract_finite_float_from_value, DocumentId};
 | 
			
		||||
pub use self::helpers::*;
 | 
			
		||||
pub use self::transform::{Transform, TransformOutput};
 | 
			
		||||
use super::facet::clear_facet_levels_based_on_settings_diff;
 | 
			
		||||
use super::new::StdResult;
 | 
			
		||||
use crate::documents::{obkv_to_object, DocumentsBatchReader};
 | 
			
		||||
use crate::error::{Error, InternalError};
 | 
			
		||||
@@ -215,9 +216,8 @@ where
 | 
			
		||||
            flattened_documents,
 | 
			
		||||
        } = output;
 | 
			
		||||
 | 
			
		||||
        // update the internal facet and searchable list,
 | 
			
		||||
        // update the searchable list,
 | 
			
		||||
        // because they might have changed due to the nested documents flattening.
 | 
			
		||||
        settings_diff.new.recompute_facets(self.wtxn, self.index)?;
 | 
			
		||||
        settings_diff.new.recompute_searchables(self.wtxn, self.index)?;
 | 
			
		||||
 | 
			
		||||
        let settings_diff = Arc::new(settings_diff);
 | 
			
		||||
@@ -465,6 +465,11 @@ where
 | 
			
		||||
                    }
 | 
			
		||||
                }
 | 
			
		||||
 | 
			
		||||
                // If the settings are only being updated, we may have to clear some of the facet levels.
 | 
			
		||||
                if settings_diff.settings_update_only() {
 | 
			
		||||
                    clear_facet_levels_based_on_settings_diff(self.wtxn, self.index, &settings_diff)?;
 | 
			
		||||
                }
 | 
			
		||||
 | 
			
		||||
                Ok(())
 | 
			
		||||
            }).map_err(InternalError::from)??;
 | 
			
		||||
 | 
			
		||||
@@ -765,18 +770,19 @@ mod tests {
 | 
			
		||||
    use bumpalo::Bump;
 | 
			
		||||
    use fst::IntoStreamer;
 | 
			
		||||
    use heed::RwTxn;
 | 
			
		||||
    use maplit::hashset;
 | 
			
		||||
    use maplit::{btreeset, hashset};
 | 
			
		||||
 | 
			
		||||
    use super::*;
 | 
			
		||||
    use crate::constants::RESERVED_GEO_FIELD_NAME;
 | 
			
		||||
    use crate::documents::mmap_from_objects;
 | 
			
		||||
    use crate::filterable_attributes_rules::filtered_matching_field_names;
 | 
			
		||||
    use crate::index::tests::TempIndex;
 | 
			
		||||
    use crate::index::IndexEmbeddingConfig;
 | 
			
		||||
    use crate::progress::Progress;
 | 
			
		||||
    use crate::search::TermsMatchingStrategy;
 | 
			
		||||
    use crate::update::new::indexer;
 | 
			
		||||
    use crate::update::Setting;
 | 
			
		||||
    use crate::{all_obkv_to_json, db_snap, Filter, Search, UserError};
 | 
			
		||||
    use crate::{all_obkv_to_json, db_snap, Filter, FilterableAttributesRule, Search, UserError};
 | 
			
		||||
 | 
			
		||||
    #[test]
 | 
			
		||||
    fn simple_document_replacement() {
 | 
			
		||||
@@ -1006,7 +1012,9 @@ mod tests {
 | 
			
		||||
 | 
			
		||||
        index
 | 
			
		||||
            .update_settings(|settings| {
 | 
			
		||||
                settings.set_filterable_fields(hashset!(S(RESERVED_GEO_FIELD_NAME)));
 | 
			
		||||
                settings.set_filterable_fields(vec![FilterableAttributesRule::Field(
 | 
			
		||||
                    RESERVED_GEO_FIELD_NAME.to_string(),
 | 
			
		||||
                )]);
 | 
			
		||||
            })
 | 
			
		||||
            .unwrap();
 | 
			
		||||
    }
 | 
			
		||||
@@ -1018,7 +1026,9 @@ mod tests {
 | 
			
		||||
 | 
			
		||||
        index
 | 
			
		||||
            .update_settings(|settings| {
 | 
			
		||||
                settings.set_filterable_fields(hashset!(S(RESERVED_GEO_FIELD_NAME)));
 | 
			
		||||
                settings.set_filterable_fields(vec![FilterableAttributesRule::Field(
 | 
			
		||||
                    RESERVED_GEO_FIELD_NAME.to_string(),
 | 
			
		||||
                )]);
 | 
			
		||||
            })
 | 
			
		||||
            .unwrap();
 | 
			
		||||
 | 
			
		||||
@@ -1234,15 +1244,24 @@ mod tests {
 | 
			
		||||
                let searchable_fields = vec![S("title"), S("nested.object"), S("nested.machin")];
 | 
			
		||||
                settings.set_searchable_fields(searchable_fields);
 | 
			
		||||
 | 
			
		||||
                let faceted_fields = hashset!(S("title"), S("nested.object"), S("nested.machin"));
 | 
			
		||||
                let faceted_fields = vec![
 | 
			
		||||
                    FilterableAttributesRule::Field("title".to_string()),
 | 
			
		||||
                    FilterableAttributesRule::Field("nested.object".to_string()),
 | 
			
		||||
                    FilterableAttributesRule::Field("nested.machin".to_string()),
 | 
			
		||||
                ];
 | 
			
		||||
                settings.set_filterable_fields(faceted_fields);
 | 
			
		||||
            })
 | 
			
		||||
            .unwrap();
 | 
			
		||||
 | 
			
		||||
        let rtxn = index.read_txn().unwrap();
 | 
			
		||||
 | 
			
		||||
        let facets = index.faceted_fields(&rtxn).unwrap();
 | 
			
		||||
        assert_eq!(facets, hashset!(S("title"), S("nested.object"), S("nested.machin")));
 | 
			
		||||
        let filterable_fields = index.filterable_attributes_rules(&rtxn).unwrap();
 | 
			
		||||
        let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
 | 
			
		||||
        let facets =
 | 
			
		||||
            filtered_matching_field_names(&filterable_fields, &fields_ids_map, &|features| {
 | 
			
		||||
                features.is_filterable()
 | 
			
		||||
            });
 | 
			
		||||
        assert_eq!(facets, btreeset!("title", "nested.object", "nested.machin"));
 | 
			
		||||
 | 
			
		||||
        // testing the simple query search
 | 
			
		||||
        let mut search = crate::Search::new(&rtxn, &index);
 | 
			
		||||
@@ -1438,7 +1457,9 @@ mod tests {
 | 
			
		||||
 | 
			
		||||
        index
 | 
			
		||||
            .update_settings(|settings| {
 | 
			
		||||
                settings.set_filterable_fields(hashset!(String::from("dog")));
 | 
			
		||||
                settings.set_filterable_fields(vec![FilterableAttributesRule::Field(
 | 
			
		||||
                    "dog".to_string(),
 | 
			
		||||
                )]);
 | 
			
		||||
            })
 | 
			
		||||
            .unwrap();
 | 
			
		||||
 | 
			
		||||
@@ -1457,9 +1478,14 @@ mod tests {
 | 
			
		||||
 | 
			
		||||
        let rtxn = index.read_txn().unwrap();
 | 
			
		||||
 | 
			
		||||
        let hidden = index.faceted_fields(&rtxn).unwrap();
 | 
			
		||||
        let filterable_fields = index.filterable_attributes_rules(&rtxn).unwrap();
 | 
			
		||||
        let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
 | 
			
		||||
        let facets =
 | 
			
		||||
            filtered_matching_field_names(&filterable_fields, &fields_ids_map, &|features| {
 | 
			
		||||
                features.is_filterable()
 | 
			
		||||
            });
 | 
			
		||||
 | 
			
		||||
        assert_eq!(hidden, hashset!(S("dog"), S("dog.race"), S("dog.race.bernese mountain")));
 | 
			
		||||
        assert_eq!(facets, btreeset!("dog", "dog.race", "dog.race.bernese mountain"));
 | 
			
		||||
 | 
			
		||||
        for (s, i) in [("zeroth", 0), ("first", 1), ("second", 2), ("third", 3)] {
 | 
			
		||||
            let mut search = crate::Search::new(&rtxn, &index);
 | 
			
		||||
@@ -1480,9 +1506,14 @@ mod tests {
 | 
			
		||||
 | 
			
		||||
        let rtxn = index.read_txn().unwrap();
 | 
			
		||||
 | 
			
		||||
        let facets = index.faceted_fields(&rtxn).unwrap();
 | 
			
		||||
        let filterable_fields = index.filterable_attributes_rules(&rtxn).unwrap();
 | 
			
		||||
        let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
 | 
			
		||||
        let facets =
 | 
			
		||||
            filtered_matching_field_names(&filterable_fields, &fields_ids_map, &|features| {
 | 
			
		||||
                features.is_filterable()
 | 
			
		||||
            });
 | 
			
		||||
 | 
			
		||||
        assert_eq!(facets, hashset!());
 | 
			
		||||
        assert_eq!(facets, btreeset!());
 | 
			
		||||
 | 
			
		||||
        // update the settings to test the sortable
 | 
			
		||||
        index
 | 
			
		||||
@@ -1506,10 +1537,6 @@ mod tests {
 | 
			
		||||
 | 
			
		||||
        let rtxn = index.read_txn().unwrap();
 | 
			
		||||
 | 
			
		||||
        let facets = index.faceted_fields(&rtxn).unwrap();
 | 
			
		||||
 | 
			
		||||
        assert_eq!(facets, hashset!(S("dog.race"), S("dog.race.bernese mountain")));
 | 
			
		||||
 | 
			
		||||
        let mut search = crate::Search::new(&rtxn, &index);
 | 
			
		||||
        search.sort_criteria(vec![crate::AscDesc::Asc(crate::Member::Field(S(
 | 
			
		||||
            "dog.race.bernese mountain",
 | 
			
		||||
@@ -1717,8 +1744,13 @@ mod tests {
 | 
			
		||||
 | 
			
		||||
        let check_ok = |index: &Index| {
 | 
			
		||||
            let rtxn = index.read_txn().unwrap();
 | 
			
		||||
            let facets = index.faceted_fields(&rtxn).unwrap();
 | 
			
		||||
            assert_eq!(facets, hashset!(S("colour"), S("colour.green"), S("colour.green.blue")));
 | 
			
		||||
            let filterable_fields = index.filterable_attributes_rules(&rtxn).unwrap();
 | 
			
		||||
            let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
 | 
			
		||||
            let facets =
 | 
			
		||||
                filtered_matching_field_names(&filterable_fields, &fields_ids_map, &|features| {
 | 
			
		||||
                    features.is_filterable()
 | 
			
		||||
                });
 | 
			
		||||
            assert_eq!(facets, btreeset!("colour", "colour.green", "colour.green.blue"));
 | 
			
		||||
 | 
			
		||||
            let colour_id = index.fields_ids_map(&rtxn).unwrap().id("colour").unwrap();
 | 
			
		||||
            let colour_green_id = index.fields_ids_map(&rtxn).unwrap().id("colour.green").unwrap();
 | 
			
		||||
@@ -1738,7 +1770,7 @@ mod tests {
 | 
			
		||||
            assert_eq!(bitmap_colour_blue.into_iter().collect::<Vec<_>>(), vec![7]);
 | 
			
		||||
        };
 | 
			
		||||
 | 
			
		||||
        let faceted_fields = hashset!(S("colour"));
 | 
			
		||||
        let faceted_fields = vec![FilterableAttributesRule::Field("colour".to_string())];
 | 
			
		||||
 | 
			
		||||
        let index = TempIndex::new();
 | 
			
		||||
        index.add_documents(content()).unwrap();
 | 
			
		||||
@@ -1823,8 +1855,13 @@ mod tests {
 | 
			
		||||
 | 
			
		||||
        let check_ok = |index: &Index| {
 | 
			
		||||
            let rtxn = index.read_txn().unwrap();
 | 
			
		||||
            let facets = index.faceted_fields(&rtxn).unwrap();
 | 
			
		||||
            assert_eq!(facets, hashset!(S("colour"), S("colour.green"), S("colour.green.blue")));
 | 
			
		||||
            let filterable_fields = index.filterable_attributes_rules(&rtxn).unwrap();
 | 
			
		||||
            let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
 | 
			
		||||
            let facets =
 | 
			
		||||
                filtered_matching_field_names(&filterable_fields, &fields_ids_map, &|features| {
 | 
			
		||||
                    features.is_filterable()
 | 
			
		||||
                });
 | 
			
		||||
            assert_eq!(facets, btreeset!("colour", "colour.green", "colour.green.blue"));
 | 
			
		||||
 | 
			
		||||
            let colour_id = index.fields_ids_map(&rtxn).unwrap().id("colour").unwrap();
 | 
			
		||||
            let colour_green_id = index.fields_ids_map(&rtxn).unwrap().id("colour.green").unwrap();
 | 
			
		||||
@@ -1844,7 +1881,7 @@ mod tests {
 | 
			
		||||
            assert_eq!(bitmap_colour_blue.into_iter().collect::<Vec<_>>(), vec![3]);
 | 
			
		||||
        };
 | 
			
		||||
 | 
			
		||||
        let faceted_fields = hashset!(S("colour"));
 | 
			
		||||
        let faceted_fields = vec![FilterableAttributesRule::Field("colour".to_string())];
 | 
			
		||||
 | 
			
		||||
        let index = TempIndex::new();
 | 
			
		||||
        index.add_documents(content()).unwrap();
 | 
			
		||||
@@ -1887,8 +1924,13 @@ mod tests {
 | 
			
		||||
 | 
			
		||||
        let check_ok = |index: &Index| {
 | 
			
		||||
            let rtxn = index.read_txn().unwrap();
 | 
			
		||||
            let facets = index.faceted_fields(&rtxn).unwrap();
 | 
			
		||||
            assert_eq!(facets, hashset!(S("tags"), S("tags.green"), S("tags.green.blue")));
 | 
			
		||||
            let filterable_fields = index.filterable_attributes_rules(&rtxn).unwrap();
 | 
			
		||||
            let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
 | 
			
		||||
            let facets =
 | 
			
		||||
                filtered_matching_field_names(&filterable_fields, &fields_ids_map, &|features| {
 | 
			
		||||
                    features.is_filterable()
 | 
			
		||||
                });
 | 
			
		||||
            assert_eq!(facets, btreeset!("tags", "tags.green", "tags.green.blue"));
 | 
			
		||||
 | 
			
		||||
            let tags_id = index.fields_ids_map(&rtxn).unwrap().id("tags").unwrap();
 | 
			
		||||
            let tags_green_id = index.fields_ids_map(&rtxn).unwrap().id("tags.green").unwrap();
 | 
			
		||||
@@ -1907,7 +1949,7 @@ mod tests {
 | 
			
		||||
            assert_eq!(bitmap_tags_blue.into_iter().collect::<Vec<_>>(), vec![12]);
 | 
			
		||||
        };
 | 
			
		||||
 | 
			
		||||
        let faceted_fields = hashset!(S("tags"));
 | 
			
		||||
        let faceted_fields = vec![FilterableAttributesRule::Field("tags".to_string())];
 | 
			
		||||
 | 
			
		||||
        let index = TempIndex::new();
 | 
			
		||||
        index.add_documents(content()).unwrap();
 | 
			
		||||
@@ -2259,7 +2301,9 @@ mod tests {
 | 
			
		||||
 | 
			
		||||
        index
 | 
			
		||||
            .update_settings(|settings| {
 | 
			
		||||
                settings.set_filterable_fields(hashset! { S("title") });
 | 
			
		||||
                settings.set_filterable_fields(vec![FilterableAttributesRule::Field(
 | 
			
		||||
                    "title".to_string(),
 | 
			
		||||
                )]);
 | 
			
		||||
            })
 | 
			
		||||
            .unwrap();
 | 
			
		||||
 | 
			
		||||
@@ -3115,7 +3159,10 @@ mod tests {
 | 
			
		||||
        index
 | 
			
		||||
            .update_settings_using_wtxn(&mut wtxn, |settings| {
 | 
			
		||||
                settings.set_primary_key(S("docid"));
 | 
			
		||||
                settings.set_filterable_fields(hashset! { S("label"), S("label2") });
 | 
			
		||||
                settings.set_filterable_fields(vec![
 | 
			
		||||
                    FilterableAttributesRule::Field("label".to_string()),
 | 
			
		||||
                    FilterableAttributesRule::Field("label2".to_string()),
 | 
			
		||||
                ]);
 | 
			
		||||
            })
 | 
			
		||||
            .unwrap();
 | 
			
		||||
        wtxn.commit().unwrap();
 | 
			
		||||
@@ -3294,7 +3341,9 @@ mod tests {
 | 
			
		||||
        index
 | 
			
		||||
            .update_settings_using_wtxn(&mut wtxn, |settings| {
 | 
			
		||||
                settings.set_primary_key(S("id"));
 | 
			
		||||
                settings.set_filterable_fields(hashset!(S(RESERVED_GEO_FIELD_NAME)));
 | 
			
		||||
                settings.set_filterable_fields(vec![FilterableAttributesRule::Field(
 | 
			
		||||
                    RESERVED_GEO_FIELD_NAME.to_string(),
 | 
			
		||||
                )]);
 | 
			
		||||
                settings.set_sortable_fields(hashset!(S(RESERVED_GEO_FIELD_NAME)));
 | 
			
		||||
            })
 | 
			
		||||
            .unwrap();
 | 
			
		||||
 
 | 
			
		||||
@@ -1,7 +1,7 @@
 | 
			
		||||
use std::borrow::Cow;
 | 
			
		||||
use std::collections::btree_map::Entry as BEntry;
 | 
			
		||||
use std::collections::hash_map::Entry as HEntry;
 | 
			
		||||
use std::collections::{BTreeMap, HashMap, HashSet};
 | 
			
		||||
use std::collections::{BTreeMap, HashMap};
 | 
			
		||||
use std::fs::File;
 | 
			
		||||
use std::io::{Read, Seek};
 | 
			
		||||
 | 
			
		||||
@@ -18,8 +18,10 @@ use super::helpers::{
 | 
			
		||||
    ObkvsMergeAdditionsAndDeletions,
 | 
			
		||||
};
 | 
			
		||||
use super::{create_writer, IndexDocumentsMethod, IndexerConfig, KeepFirst};
 | 
			
		||||
use crate::attribute_patterns::PatternMatch;
 | 
			
		||||
use crate::documents::{DocumentsBatchIndex, EnrichedDocument, EnrichedDocumentsBatchReader};
 | 
			
		||||
use crate::error::{Error, InternalError, UserError};
 | 
			
		||||
use crate::fields_ids_map::metadata::{FieldIdMapWithMetadata, MetadataBuilder};
 | 
			
		||||
use crate::index::{db_name, main_key};
 | 
			
		||||
use crate::update::del_add::{
 | 
			
		||||
    into_del_add_obkv, into_del_add_obkv_conditional_operation, DelAdd, DelAddOperation,
 | 
			
		||||
@@ -31,9 +33,7 @@ use crate::update::{AvailableIds, UpdateIndexingStep};
 | 
			
		||||
use crate::vector::parsed_vectors::{ExplicitVectors, VectorOrArrayOfVectors};
 | 
			
		||||
use crate::vector::settings::WriteBackToDocuments;
 | 
			
		||||
use crate::vector::ArroyWrapper;
 | 
			
		||||
use crate::{
 | 
			
		||||
    is_faceted_by, FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldsIdsMap, Index, Result,
 | 
			
		||||
};
 | 
			
		||||
use crate::{FieldDistribution, FieldId, FieldIdMapMissingEntry, Index, Result};
 | 
			
		||||
 | 
			
		||||
pub struct TransformOutput {
 | 
			
		||||
    pub primary_key: String,
 | 
			
		||||
@@ -52,7 +52,7 @@ pub struct TransformOutput {
 | 
			
		||||
/// containing all those documents.
 | 
			
		||||
pub struct Transform<'a, 'i> {
 | 
			
		||||
    pub index: &'i Index,
 | 
			
		||||
    fields_ids_map: FieldsIdsMap,
 | 
			
		||||
    fields_ids_map: FieldIdMapWithMetadata,
 | 
			
		||||
 | 
			
		||||
    indexer_settings: &'a IndexerConfig,
 | 
			
		||||
    pub index_documents_method: IndexDocumentsMethod,
 | 
			
		||||
@@ -84,7 +84,7 @@ pub enum Operation {
 | 
			
		||||
///
 | 
			
		||||
/// If new fields are present in the addition, they are added to the index field ids map.
 | 
			
		||||
fn create_fields_mapping(
 | 
			
		||||
    index_field_map: &mut FieldsIdsMap,
 | 
			
		||||
    index_field_map: &mut FieldIdMapWithMetadata,
 | 
			
		||||
    batch_field_map: &DocumentsBatchIndex,
 | 
			
		||||
) -> Result<HashMap<FieldId, FieldId>> {
 | 
			
		||||
    batch_field_map
 | 
			
		||||
@@ -141,10 +141,13 @@ impl<'a, 'i> Transform<'a, 'i> {
 | 
			
		||||
            true,
 | 
			
		||||
        );
 | 
			
		||||
        let documents_ids = index.documents_ids(wtxn)?;
 | 
			
		||||
        let fields_ids_map = index.fields_ids_map(wtxn)?;
 | 
			
		||||
        let builder = MetadataBuilder::from_index(index, wtxn)?;
 | 
			
		||||
        let fields_ids_map = FieldIdMapWithMetadata::new(fields_ids_map, builder);
 | 
			
		||||
 | 
			
		||||
        Ok(Transform {
 | 
			
		||||
            index,
 | 
			
		||||
            fields_ids_map: index.fields_ids_map(wtxn)?,
 | 
			
		||||
            fields_ids_map,
 | 
			
		||||
            indexer_settings,
 | 
			
		||||
            available_documents_ids: AvailableIds::new(&documents_ids),
 | 
			
		||||
            original_sorter,
 | 
			
		||||
@@ -354,7 +357,7 @@ impl<'a, 'i> Transform<'a, 'i> {
 | 
			
		||||
            documents_seen: documents_count,
 | 
			
		||||
        });
 | 
			
		||||
 | 
			
		||||
        self.index.put_fields_ids_map(wtxn, &self.fields_ids_map)?;
 | 
			
		||||
        self.index.put_fields_ids_map(wtxn, self.fields_ids_map.as_fields_ids_map())?;
 | 
			
		||||
        self.index.put_primary_key(wtxn, &primary_key)?;
 | 
			
		||||
        self.documents_count += documents_count;
 | 
			
		||||
        // Now that we have a valid sorter that contains the user id and the obkv we
 | 
			
		||||
@@ -371,7 +374,7 @@ impl<'a, 'i> Transform<'a, 'i> {
 | 
			
		||||
    )]
 | 
			
		||||
    fn flatten_from_fields_ids_map(
 | 
			
		||||
        obkv: &KvReader<FieldId>,
 | 
			
		||||
        fields_ids_map: &mut FieldsIdsMap,
 | 
			
		||||
        fields_ids_map: &mut FieldIdMapWithMetadata,
 | 
			
		||||
    ) -> Result<Option<Vec<u8>>> {
 | 
			
		||||
        if obkv
 | 
			
		||||
            .iter()
 | 
			
		||||
@@ -657,7 +660,6 @@ impl<'a, 'i> Transform<'a, 'i> {
 | 
			
		||||
    fn rebind_existing_document(
 | 
			
		||||
        old_obkv: &KvReader<FieldId>,
 | 
			
		||||
        settings_diff: &InnerIndexSettingsDiff,
 | 
			
		||||
        modified_faceted_fields: &HashSet<String>,
 | 
			
		||||
        mut injected_vectors: serde_json::Map<String, serde_json::Value>,
 | 
			
		||||
        old_vectors_fid: Option<FieldId>,
 | 
			
		||||
        original_obkv_buffer: Option<&mut Vec<u8>>,
 | 
			
		||||
@@ -667,23 +669,26 @@ impl<'a, 'i> Transform<'a, 'i> {
 | 
			
		||||
        let is_primary_key = |id: FieldId| -> bool { settings_diff.primary_key_id == Some(id) };
 | 
			
		||||
 | 
			
		||||
        // If only a faceted field has been added, keep only this field.
 | 
			
		||||
        let global_facet_settings_changed = settings_diff.global_facet_settings_changed();
 | 
			
		||||
        let facet_fids_changed = settings_diff.facet_fids_changed();
 | 
			
		||||
        let necessary_faceted_field =
 | 
			
		||||
            |id: FieldId| -> bool {
 | 
			
		||||
 | 
			
		||||
        let necessary_faceted_field = |id: FieldId| -> Option<DelAddOperation> {
 | 
			
		||||
            if facet_fids_changed {
 | 
			
		||||
                let field_name = settings_diff.new.fields_ids_map.name(id).unwrap();
 | 
			
		||||
                if global_facet_settings_changed {
 | 
			
		||||
                    settings_diff.new.user_defined_faceted_fields.iter().any(|long| {
 | 
			
		||||
                        is_faceted_by(long, field_name) || is_faceted_by(field_name, long)
 | 
			
		||||
                    })
 | 
			
		||||
                } else if facet_fids_changed {
 | 
			
		||||
                    modified_faceted_fields.iter().any(|long| {
 | 
			
		||||
                        is_faceted_by(long, field_name) || is_faceted_by(field_name, long)
 | 
			
		||||
                    })
 | 
			
		||||
                } else {
 | 
			
		||||
                    false
 | 
			
		||||
                // if the faceted fields changed, we need to keep all the field that are
 | 
			
		||||
                // faceted in the old or new settings.
 | 
			
		||||
                match (
 | 
			
		||||
                    settings_diff.old.match_faceted_field(field_name),
 | 
			
		||||
                    settings_diff.new.match_faceted_field(field_name),
 | 
			
		||||
                ) {
 | 
			
		||||
                    (PatternMatch::NoMatch, PatternMatch::NoMatch) => None,
 | 
			
		||||
                    (PatternMatch::NoMatch, _) => Some(DelAddOperation::Addition),
 | 
			
		||||
                    (_, PatternMatch::NoMatch) => Some(DelAddOperation::Deletion),
 | 
			
		||||
                    (_, _) => Some(DelAddOperation::DeletionAndAddition),
 | 
			
		||||
                }
 | 
			
		||||
            };
 | 
			
		||||
            } else {
 | 
			
		||||
                None
 | 
			
		||||
            }
 | 
			
		||||
        };
 | 
			
		||||
 | 
			
		||||
        // Alway provide all fields when vectors are involved because
 | 
			
		||||
        // we need the fields for the prompt/templating.
 | 
			
		||||
@@ -734,12 +739,22 @@ impl<'a, 'i> Transform<'a, 'i> {
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            if is_primary_key(id) || necessary_faceted_field(id) || reindex_vectors {
 | 
			
		||||
            if is_primary_key(id) || reindex_vectors {
 | 
			
		||||
                operations.insert(id, DelAddOperation::DeletionAndAddition);
 | 
			
		||||
                obkv_writer.insert(id, val)?;
 | 
			
		||||
            } else if let Some(operation) = settings_diff.reindex_searchable_id(id) {
 | 
			
		||||
                operations.insert(id, operation);
 | 
			
		||||
                obkv_writer.insert(id, val)?;
 | 
			
		||||
            } else {
 | 
			
		||||
                let facet_operation = necessary_faceted_field(id);
 | 
			
		||||
                let searchable_operation = settings_diff.reindex_searchable_id(id);
 | 
			
		||||
                let operation = facet_operation
 | 
			
		||||
                    // TODO: replace `zip.map` with `zip_with` once stable
 | 
			
		||||
                    .zip(searchable_operation)
 | 
			
		||||
                    .map(|(op1, op2)| op1.merge(op2))
 | 
			
		||||
                    .or(facet_operation)
 | 
			
		||||
                    .or(searchable_operation);
 | 
			
		||||
                if let Some(operation) = operation {
 | 
			
		||||
                    operations.insert(id, operation);
 | 
			
		||||
                    obkv_writer.insert(id, val)?;
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
        if !injected_vectors.is_empty() {
 | 
			
		||||
@@ -856,7 +871,6 @@ impl<'a, 'i> Transform<'a, 'i> {
 | 
			
		||||
            };
 | 
			
		||||
 | 
			
		||||
        if original_sorter.is_some() || flattened_sorter.is_some() {
 | 
			
		||||
            let modified_faceted_fields = settings_diff.modified_faceted_fields();
 | 
			
		||||
            let mut original_obkv_buffer = Vec::new();
 | 
			
		||||
            let mut flattened_obkv_buffer = Vec::new();
 | 
			
		||||
            let mut document_sorter_key_buffer = Vec::new();
 | 
			
		||||
@@ -897,7 +911,6 @@ impl<'a, 'i> Transform<'a, 'i> {
 | 
			
		||||
                Self::rebind_existing_document(
 | 
			
		||||
                    old_obkv,
 | 
			
		||||
                    &settings_diff,
 | 
			
		||||
                    &modified_faceted_fields,
 | 
			
		||||
                    injected_vectors,
 | 
			
		||||
                    old_vectors_fid,
 | 
			
		||||
                    Some(&mut original_obkv_buffer).filter(|_| original_sorter.is_some()),
 | 
			
		||||
 
 | 
			
		||||
@@ -365,7 +365,7 @@ pub(crate) fn write_typed_chunk_into_index(
 | 
			
		||||
            let merger = builder.build();
 | 
			
		||||
 | 
			
		||||
            let indexer = FacetsUpdate::new(index, FacetType::Number, merger, None, data_size);
 | 
			
		||||
            indexer.execute(wtxn)?;
 | 
			
		||||
            indexer.execute(wtxn, &settings_diff.new)?;
 | 
			
		||||
            is_merged_database = true;
 | 
			
		||||
        }
 | 
			
		||||
        TypedChunk::FieldIdFacetStringDocids(_) => {
 | 
			
		||||
@@ -401,7 +401,7 @@ pub(crate) fn write_typed_chunk_into_index(
 | 
			
		||||
                Some(normalized_facet_id_string_merger),
 | 
			
		||||
                data_size,
 | 
			
		||||
            );
 | 
			
		||||
            indexer.execute(wtxn)?;
 | 
			
		||||
            indexer.execute(wtxn, &settings_diff.new)?;
 | 
			
		||||
            is_merged_database = true;
 | 
			
		||||
        }
 | 
			
		||||
        TypedChunk::FieldIdFacetExistsDocids(_) => {
 | 
			
		||||
 
 | 
			
		||||
@@ -6,17 +6,20 @@ use std::sync::Arc;
 | 
			
		||||
 | 
			
		||||
use charabia::{Normalize, Tokenizer, TokenizerBuilder};
 | 
			
		||||
use deserr::{DeserializeError, Deserr};
 | 
			
		||||
use itertools::{EitherOrBoth, Itertools};
 | 
			
		||||
use itertools::{merge_join_by, EitherOrBoth, Itertools};
 | 
			
		||||
use roaring::RoaringBitmap;
 | 
			
		||||
use serde::{Deserialize, Deserializer, Serialize, Serializer};
 | 
			
		||||
use time::OffsetDateTime;
 | 
			
		||||
 | 
			
		||||
use super::del_add::DelAddOperation;
 | 
			
		||||
use super::del_add::{DelAdd, DelAddOperation};
 | 
			
		||||
use super::index_documents::{IndexDocumentsConfig, Transform};
 | 
			
		||||
use super::IndexerConfig;
 | 
			
		||||
use crate::constants::{RESERVED_GEO_FIELD_NAME, RESERVED_VECTORS_FIELD_NAME};
 | 
			
		||||
use crate::attribute_patterns::PatternMatch;
 | 
			
		||||
use crate::constants::RESERVED_GEO_FIELD_NAME;
 | 
			
		||||
use crate::criterion::Criterion;
 | 
			
		||||
use crate::error::UserError;
 | 
			
		||||
use crate::fields_ids_map::metadata::{FieldIdMapWithMetadata, MetadataBuilder};
 | 
			
		||||
use crate::filterable_attributes_rules::match_faceted_field;
 | 
			
		||||
use crate::index::{
 | 
			
		||||
    IndexEmbeddingConfig, PrefixSearch, DEFAULT_MIN_WORD_LEN_ONE_TYPO,
 | 
			
		||||
    DEFAULT_MIN_WORD_LEN_TWO_TYPOS,
 | 
			
		||||
@@ -31,7 +34,7 @@ use crate::vector::settings::{
 | 
			
		||||
    WriteBackToDocuments,
 | 
			
		||||
};
 | 
			
		||||
use crate::vector::{Embedder, EmbeddingConfig, EmbeddingConfigs};
 | 
			
		||||
use crate::{FieldId, FieldsIdsMap, Index, LocalizedAttributesRule, LocalizedFieldIds, Result};
 | 
			
		||||
use crate::{FieldId, FilterableAttributesRule, Index, LocalizedAttributesRule, Result};
 | 
			
		||||
 | 
			
		||||
#[derive(Debug, Clone, PartialEq, Eq, Copy)]
 | 
			
		||||
pub enum Setting<T> {
 | 
			
		||||
@@ -155,7 +158,7 @@ pub struct Settings<'a, 't, 'i> {
 | 
			
		||||
 | 
			
		||||
    searchable_fields: Setting<Vec<String>>,
 | 
			
		||||
    displayed_fields: Setting<Vec<String>>,
 | 
			
		||||
    filterable_fields: Setting<HashSet<String>>,
 | 
			
		||||
    filterable_fields: Setting<Vec<FilterableAttributesRule>>,
 | 
			
		||||
    sortable_fields: Setting<HashSet<String>>,
 | 
			
		||||
    criteria: Setting<Vec<Criterion>>,
 | 
			
		||||
    stop_words: Setting<BTreeSet<String>>,
 | 
			
		||||
@@ -241,8 +244,8 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
 | 
			
		||||
        self.filterable_fields = Setting::Reset;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    pub fn set_filterable_fields(&mut self, names: HashSet<String>) {
 | 
			
		||||
        self.filterable_fields = Setting::Set(names);
 | 
			
		||||
    pub fn set_filterable_fields(&mut self, rules: Vec<FilterableAttributesRule>) {
 | 
			
		||||
        self.filterable_fields = Setting::Set(rules);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    pub fn set_sortable_fields(&mut self, names: HashSet<String>) {
 | 
			
		||||
@@ -516,7 +519,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Updates the index's searchable attributes.
 | 
			
		||||
    fn update_searchable(&mut self) -> Result<bool> {
 | 
			
		||||
    fn update_user_defined_searchable_attributes(&mut self) -> Result<bool> {
 | 
			
		||||
        match self.searchable_fields {
 | 
			
		||||
            Setting::Set(ref fields) => {
 | 
			
		||||
                // Check to see if the searchable fields changed before doing anything else
 | 
			
		||||
@@ -529,26 +532,10 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
 | 
			
		||||
                    return Ok(false);
 | 
			
		||||
                }
 | 
			
		||||
 | 
			
		||||
                // Since we're updating the settings we can only add new fields at the end of the field id map
 | 
			
		||||
                let mut fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
 | 
			
		||||
                // fields are deduplicated, only the first occurrence is taken into account
 | 
			
		||||
                let names = fields.iter().unique().map(String::as_str).collect::<Vec<_>>();
 | 
			
		||||
 | 
			
		||||
                // Add all the searchable attributes to the field map, and then add the
 | 
			
		||||
                // remaining fields from the old field map to the new one
 | 
			
		||||
                for name in names.iter() {
 | 
			
		||||
                    // The fields ids map won't change the field id of already present elements thus only the
 | 
			
		||||
                    // new fields will be inserted.
 | 
			
		||||
                    fields_ids_map.insert(name).ok_or(UserError::AttributeLimitReached)?;
 | 
			
		||||
                }
 | 
			
		||||
 | 
			
		||||
                self.index.put_all_searchable_fields_from_fields_ids_map(
 | 
			
		||||
                    self.wtxn,
 | 
			
		||||
                    &names,
 | 
			
		||||
                    &fields_ids_map.nested_ids(RESERVED_VECTORS_FIELD_NAME),
 | 
			
		||||
                    &fields_ids_map,
 | 
			
		||||
                )?;
 | 
			
		||||
                self.index.put_fields_ids_map(self.wtxn, &fields_ids_map)?;
 | 
			
		||||
                self.index.put_user_defined_searchable_fields(self.wtxn, &names)?;
 | 
			
		||||
                Ok(true)
 | 
			
		||||
            }
 | 
			
		||||
            Setting::Reset => Ok(self.index.delete_all_searchable_fields(self.wtxn)?),
 | 
			
		||||
@@ -760,14 +747,10 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
 | 
			
		||||
    fn update_filterable(&mut self) -> Result<()> {
 | 
			
		||||
        match self.filterable_fields {
 | 
			
		||||
            Setting::Set(ref fields) => {
 | 
			
		||||
                let mut new_facets = HashSet::new();
 | 
			
		||||
                for name in fields {
 | 
			
		||||
                    new_facets.insert(name.clone());
 | 
			
		||||
                }
 | 
			
		||||
                self.index.put_filterable_fields(self.wtxn, &new_facets)?;
 | 
			
		||||
                self.index.put_filterable_attributes_rules(self.wtxn, fields)?;
 | 
			
		||||
            }
 | 
			
		||||
            Setting::Reset => {
 | 
			
		||||
                self.index.delete_filterable_fields(self.wtxn)?;
 | 
			
		||||
                self.index.delete_filterable_attributes_rules(self.wtxn)?;
 | 
			
		||||
            }
 | 
			
		||||
            Setting::NotSet => (),
 | 
			
		||||
        }
 | 
			
		||||
@@ -1257,7 +1240,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
 | 
			
		||||
        self.update_separator_tokens()?;
 | 
			
		||||
        self.update_dictionary()?;
 | 
			
		||||
        self.update_synonyms()?;
 | 
			
		||||
        self.update_searchable()?;
 | 
			
		||||
        self.update_user_defined_searchable_attributes()?;
 | 
			
		||||
        self.update_exact_attributes()?;
 | 
			
		||||
        self.update_proximity_precision()?;
 | 
			
		||||
        self.update_prefix_search()?;
 | 
			
		||||
@@ -1267,7 +1250,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
 | 
			
		||||
        let embedding_config_updates = self.update_embedding_configs()?;
 | 
			
		||||
 | 
			
		||||
        let mut new_inner_settings = InnerIndexSettings::from_index(self.index, self.wtxn, None)?;
 | 
			
		||||
        new_inner_settings.recompute_facets(self.wtxn, self.index)?;
 | 
			
		||||
        new_inner_settings.recompute_searchables(self.wtxn, self.index)?;
 | 
			
		||||
 | 
			
		||||
        let primary_key_id = self
 | 
			
		||||
            .index
 | 
			
		||||
@@ -1319,8 +1302,8 @@ impl InnerIndexSettingsDiff {
 | 
			
		||||
        settings_update_only: bool,
 | 
			
		||||
    ) -> Self {
 | 
			
		||||
        let only_additional_fields = match (
 | 
			
		||||
            &old_settings.user_defined_searchable_fields,
 | 
			
		||||
            &new_settings.user_defined_searchable_fields,
 | 
			
		||||
            &old_settings.user_defined_searchable_attributes,
 | 
			
		||||
            &new_settings.user_defined_searchable_attributes,
 | 
			
		||||
        ) {
 | 
			
		||||
            (None, None) | (Some(_), None) | (None, Some(_)) => None, // None means *
 | 
			
		||||
            (Some(old), Some(new)) => {
 | 
			
		||||
@@ -1342,14 +1325,14 @@ impl InnerIndexSettingsDiff {
 | 
			
		||||
                || old_settings.dictionary != new_settings.dictionary
 | 
			
		||||
                || old_settings.proximity_precision != new_settings.proximity_precision
 | 
			
		||||
                || old_settings.prefix_search != new_settings.prefix_search
 | 
			
		||||
                || old_settings.localized_searchable_fields_ids
 | 
			
		||||
                    != new_settings.localized_searchable_fields_ids
 | 
			
		||||
                || old_settings.localized_attributes_rules
 | 
			
		||||
                    != new_settings.localized_attributes_rules
 | 
			
		||||
        };
 | 
			
		||||
 | 
			
		||||
        let cache_exact_attributes = old_settings.exact_attributes != new_settings.exact_attributes;
 | 
			
		||||
 | 
			
		||||
        let cache_user_defined_searchables = old_settings.user_defined_searchable_fields
 | 
			
		||||
            != new_settings.user_defined_searchable_fields;
 | 
			
		||||
        let cache_user_defined_searchables = old_settings.user_defined_searchable_attributes
 | 
			
		||||
            != new_settings.user_defined_searchable_attributes;
 | 
			
		||||
 | 
			
		||||
        // if the user-defined searchables changed, then we need to reindex prompts.
 | 
			
		||||
        if cache_user_defined_searchables {
 | 
			
		||||
@@ -1432,30 +1415,70 @@ impl InnerIndexSettingsDiff {
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// List the faceted fields from the inner fid map.
 | 
			
		||||
    /// This is used to list the faceted fields when we are reindexing,
 | 
			
		||||
    /// but it can't be used in document addition because the field id map must be exhaustive.
 | 
			
		||||
    pub fn list_faceted_fields_from_fid_map(&self, del_add: DelAdd) -> BTreeSet<FieldId> {
 | 
			
		||||
        let settings = match del_add {
 | 
			
		||||
            DelAdd::Deletion => &self.old,
 | 
			
		||||
            DelAdd::Addition => &self.new,
 | 
			
		||||
        };
 | 
			
		||||
 | 
			
		||||
        settings
 | 
			
		||||
            .fields_ids_map
 | 
			
		||||
            .iter_id_metadata()
 | 
			
		||||
            .filter(|(_, metadata)| metadata.is_faceted(&settings.filterable_attributes_rules))
 | 
			
		||||
            .map(|(id, _)| id)
 | 
			
		||||
            .collect()
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    pub fn facet_fids_changed(&self) -> bool {
 | 
			
		||||
        let existing_fields = &self.new.existing_fields;
 | 
			
		||||
        if existing_fields.iter().any(|field| field.contains('.')) {
 | 
			
		||||
            return true;
 | 
			
		||||
        for eob in merge_join_by(
 | 
			
		||||
            self.old.fields_ids_map.iter().filter(|(_, _, metadata)| {
 | 
			
		||||
                metadata.is_faceted(&self.old.filterable_attributes_rules)
 | 
			
		||||
            }),
 | 
			
		||||
            self.new.fields_ids_map.iter().filter(|(_, _, metadata)| {
 | 
			
		||||
                metadata.is_faceted(&self.new.filterable_attributes_rules)
 | 
			
		||||
            }),
 | 
			
		||||
            |(old_fid, _, _), (new_fid, _, _)| old_fid.cmp(new_fid),
 | 
			
		||||
        ) {
 | 
			
		||||
            match eob {
 | 
			
		||||
                // If there is a difference, we need to reindex facet databases.
 | 
			
		||||
                EitherOrBoth::Left(_) | EitherOrBoth::Right(_) => return true,
 | 
			
		||||
                // If the field is faceted in both old and new settings, we check the facet-searchable and facet level database.
 | 
			
		||||
                EitherOrBoth::Both((_, _, old_metadata), (_, _, new_metadata)) => {
 | 
			
		||||
                    // Check if the field is facet-searchable in the old and new settings.
 | 
			
		||||
                    // If there is a difference, we need to reindex facet-search database.
 | 
			
		||||
                    let old_filterable_features = old_metadata
 | 
			
		||||
                        .filterable_attributes_features(&self.old.filterable_attributes_rules);
 | 
			
		||||
                    let new_filterable_features = new_metadata
 | 
			
		||||
                        .filterable_attributes_features(&self.new.filterable_attributes_rules);
 | 
			
		||||
                    let is_old_facet_searchable =
 | 
			
		||||
                        old_filterable_features.is_facet_searchable() && self.old.facet_search;
 | 
			
		||||
                    let is_new_facet_searchable =
 | 
			
		||||
                        new_filterable_features.is_facet_searchable() && self.new.facet_search;
 | 
			
		||||
                    if is_old_facet_searchable != is_new_facet_searchable {
 | 
			
		||||
                        return true;
 | 
			
		||||
                    }
 | 
			
		||||
 | 
			
		||||
                    // Check if the field needs a facet level database in the old and new settings.
 | 
			
		||||
                    // If there is a difference, we need to reindex facet level databases.
 | 
			
		||||
                    let old_facet_level_database = old_metadata
 | 
			
		||||
                        .require_facet_level_database(&self.old.filterable_attributes_rules);
 | 
			
		||||
                    let new_facet_level_database = new_metadata
 | 
			
		||||
                        .require_facet_level_database(&self.new.filterable_attributes_rules);
 | 
			
		||||
                    if old_facet_level_database != new_facet_level_database {
 | 
			
		||||
                        return true;
 | 
			
		||||
                    }
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        let old_faceted_fields = &self.old.user_defined_faceted_fields;
 | 
			
		||||
        if old_faceted_fields.iter().any(|field| field.contains('.')) {
 | 
			
		||||
            return true;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        // If there is new faceted fields we indicate that we must reindex as we must
 | 
			
		||||
        // index new fields as facets. It means that the distinct attribute,
 | 
			
		||||
        // an Asc/Desc criterion or a filtered attribute as be added or removed.
 | 
			
		||||
        let new_faceted_fields = &self.new.user_defined_faceted_fields;
 | 
			
		||||
        if new_faceted_fields.iter().any(|field| field.contains('.')) {
 | 
			
		||||
            return true;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        (existing_fields - old_faceted_fields) != (existing_fields - new_faceted_fields)
 | 
			
		||||
        false
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    pub fn global_facet_settings_changed(&self) -> bool {
 | 
			
		||||
        self.old.localized_faceted_fields_ids != self.new.localized_faceted_fields_ids
 | 
			
		||||
        self.old.localized_attributes_rules != self.new.localized_attributes_rules
 | 
			
		||||
            || self.old.facet_search != self.new.facet_search
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
@@ -1475,10 +1498,6 @@ impl InnerIndexSettingsDiff {
 | 
			
		||||
        self.old.geo_fields_ids != self.new.geo_fields_ids
 | 
			
		||||
            || (!self.settings_update_only && self.new.geo_fields_ids.is_some())
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    pub fn modified_faceted_fields(&self) -> HashSet<String> {
 | 
			
		||||
        &self.old.user_defined_faceted_fields ^ &self.new.user_defined_faceted_fields
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#[derive(Clone)]
 | 
			
		||||
@@ -1486,20 +1505,17 @@ pub(crate) struct InnerIndexSettings {
 | 
			
		||||
    pub stop_words: Option<fst::Set<Vec<u8>>>,
 | 
			
		||||
    pub allowed_separators: Option<BTreeSet<String>>,
 | 
			
		||||
    pub dictionary: Option<BTreeSet<String>>,
 | 
			
		||||
    pub fields_ids_map: FieldsIdsMap,
 | 
			
		||||
    pub user_defined_faceted_fields: HashSet<String>,
 | 
			
		||||
    pub user_defined_searchable_fields: Option<Vec<String>>,
 | 
			
		||||
    pub faceted_fields_ids: HashSet<FieldId>,
 | 
			
		||||
    pub searchable_fields_ids: Vec<FieldId>,
 | 
			
		||||
    pub fields_ids_map: FieldIdMapWithMetadata,
 | 
			
		||||
    pub localized_attributes_rules: Vec<LocalizedAttributesRule>,
 | 
			
		||||
    pub filterable_attributes_rules: Vec<FilterableAttributesRule>,
 | 
			
		||||
    pub asc_desc_fields: HashSet<String>,
 | 
			
		||||
    pub distinct_field: Option<String>,
 | 
			
		||||
    pub user_defined_searchable_attributes: Option<Vec<String>>,
 | 
			
		||||
    pub sortable_fields: HashSet<String>,
 | 
			
		||||
    pub exact_attributes: HashSet<FieldId>,
 | 
			
		||||
    pub proximity_precision: ProximityPrecision,
 | 
			
		||||
    pub embedding_configs: EmbeddingConfigs,
 | 
			
		||||
    pub existing_fields: HashSet<String>,
 | 
			
		||||
    pub geo_fields_ids: Option<(FieldId, FieldId)>,
 | 
			
		||||
    pub non_searchable_fields_ids: Vec<FieldId>,
 | 
			
		||||
    pub non_faceted_fields_ids: Vec<FieldId>,
 | 
			
		||||
    pub localized_searchable_fields_ids: LocalizedFieldIds,
 | 
			
		||||
    pub localized_faceted_fields_ids: LocalizedFieldIds,
 | 
			
		||||
    pub prefix_search: PrefixSearch,
 | 
			
		||||
    pub facet_search: bool,
 | 
			
		||||
}
 | 
			
		||||
@@ -1515,12 +1531,6 @@ impl InnerIndexSettings {
 | 
			
		||||
        let allowed_separators = index.allowed_separators(rtxn)?;
 | 
			
		||||
        let dictionary = index.dictionary(rtxn)?;
 | 
			
		||||
        let mut fields_ids_map = index.fields_ids_map(rtxn)?;
 | 
			
		||||
        let user_defined_searchable_fields = index.user_defined_searchable_fields(rtxn)?;
 | 
			
		||||
        let user_defined_searchable_fields =
 | 
			
		||||
            user_defined_searchable_fields.map(|sf| sf.into_iter().map(String::from).collect());
 | 
			
		||||
        let user_defined_faceted_fields = index.user_defined_faceted_fields(rtxn)?;
 | 
			
		||||
        let mut searchable_fields_ids = index.searchable_fields_ids(rtxn)?;
 | 
			
		||||
        let mut faceted_fields_ids = index.faceted_fields_ids(rtxn)?;
 | 
			
		||||
        let exact_attributes = index.exact_attributes_ids(rtxn)?;
 | 
			
		||||
        let proximity_precision = index.proximity_precision(rtxn)?.unwrap_or_default();
 | 
			
		||||
        let embedding_configs = match embedding_configs {
 | 
			
		||||
@@ -1529,87 +1539,57 @@ impl InnerIndexSettings {
 | 
			
		||||
        };
 | 
			
		||||
        let prefix_search = index.prefix_search(rtxn)?.unwrap_or_default();
 | 
			
		||||
        let facet_search = index.facet_search(rtxn)?;
 | 
			
		||||
        let existing_fields: HashSet<_> = index
 | 
			
		||||
            .field_distribution(rtxn)?
 | 
			
		||||
            .into_iter()
 | 
			
		||||
            .filter_map(|(field, count)| (count != 0).then_some(field))
 | 
			
		||||
            .collect();
 | 
			
		||||
        // index.fields_ids_map($a)? ==>> fields_ids_map
 | 
			
		||||
        let geo_fields_ids = match fields_ids_map.id(RESERVED_GEO_FIELD_NAME) {
 | 
			
		||||
            Some(gfid) => {
 | 
			
		||||
                let is_sortable = index.sortable_fields_ids(rtxn)?.contains(&gfid);
 | 
			
		||||
                let is_filterable = index.filterable_fields_ids(rtxn)?.contains(&gfid);
 | 
			
		||||
            Some(_) if index.is_geo_enabled(rtxn)? => {
 | 
			
		||||
                // if `_geo` is faceted then we get the `lat` and `lng`
 | 
			
		||||
                if is_sortable || is_filterable {
 | 
			
		||||
                    let field_ids = fields_ids_map
 | 
			
		||||
                        .insert("_geo.lat")
 | 
			
		||||
                        .zip(fields_ids_map.insert("_geo.lng"))
 | 
			
		||||
                        .ok_or(UserError::AttributeLimitReached)?;
 | 
			
		||||
                    Some(field_ids)
 | 
			
		||||
                } else {
 | 
			
		||||
                    None
 | 
			
		||||
                }
 | 
			
		||||
                let field_ids = fields_ids_map
 | 
			
		||||
                    .insert("_geo.lat")
 | 
			
		||||
                    .zip(fields_ids_map.insert("_geo.lng"))
 | 
			
		||||
                    .ok_or(UserError::AttributeLimitReached)?;
 | 
			
		||||
                Some(field_ids)
 | 
			
		||||
            }
 | 
			
		||||
            None => None,
 | 
			
		||||
            _ => None,
 | 
			
		||||
        };
 | 
			
		||||
        let localized_attributes_rules = index.localized_attributes_rules(rtxn)?;
 | 
			
		||||
        let localized_searchable_fields_ids = LocalizedFieldIds::new(
 | 
			
		||||
            &localized_attributes_rules,
 | 
			
		||||
            &fields_ids_map,
 | 
			
		||||
            searchable_fields_ids.iter().cloned(),
 | 
			
		||||
        );
 | 
			
		||||
        let localized_faceted_fields_ids = LocalizedFieldIds::new(
 | 
			
		||||
            &localized_attributes_rules,
 | 
			
		||||
            &fields_ids_map,
 | 
			
		||||
            faceted_fields_ids.iter().cloned(),
 | 
			
		||||
        );
 | 
			
		||||
 | 
			
		||||
        let vectors_fids = fields_ids_map.nested_ids(RESERVED_VECTORS_FIELD_NAME);
 | 
			
		||||
        searchable_fields_ids.retain(|id| !vectors_fids.contains(id));
 | 
			
		||||
        faceted_fields_ids.retain(|id| !vectors_fids.contains(id));
 | 
			
		||||
        let localized_attributes_rules =
 | 
			
		||||
            index.localized_attributes_rules(rtxn)?.unwrap_or_default();
 | 
			
		||||
        let filterable_attributes_rules = index.filterable_attributes_rules(rtxn)?;
 | 
			
		||||
        let sortable_fields = index.sortable_fields(rtxn)?;
 | 
			
		||||
        let asc_desc_fields = index.asc_desc_fields(rtxn)?;
 | 
			
		||||
        let distinct_field = index.distinct_field(rtxn)?.map(|f| f.to_string());
 | 
			
		||||
        let user_defined_searchable_attributes = index
 | 
			
		||||
            .user_defined_searchable_fields(rtxn)?
 | 
			
		||||
            .map(|fields| fields.into_iter().map(|f| f.to_string()).collect());
 | 
			
		||||
        let builder = MetadataBuilder::from_index(index, rtxn)?;
 | 
			
		||||
        let fields_ids_map = FieldIdMapWithMetadata::new(fields_ids_map, builder);
 | 
			
		||||
 | 
			
		||||
        Ok(Self {
 | 
			
		||||
            stop_words,
 | 
			
		||||
            allowed_separators,
 | 
			
		||||
            dictionary,
 | 
			
		||||
            fields_ids_map,
 | 
			
		||||
            user_defined_faceted_fields,
 | 
			
		||||
            user_defined_searchable_fields,
 | 
			
		||||
            faceted_fields_ids,
 | 
			
		||||
            searchable_fields_ids,
 | 
			
		||||
            localized_attributes_rules,
 | 
			
		||||
            filterable_attributes_rules,
 | 
			
		||||
            asc_desc_fields,
 | 
			
		||||
            distinct_field,
 | 
			
		||||
            user_defined_searchable_attributes,
 | 
			
		||||
            sortable_fields,
 | 
			
		||||
            exact_attributes,
 | 
			
		||||
            proximity_precision,
 | 
			
		||||
            embedding_configs,
 | 
			
		||||
            existing_fields,
 | 
			
		||||
            geo_fields_ids,
 | 
			
		||||
            non_searchable_fields_ids: vectors_fids.clone(),
 | 
			
		||||
            non_faceted_fields_ids: vectors_fids.clone(),
 | 
			
		||||
            localized_searchable_fields_ids,
 | 
			
		||||
            localized_faceted_fields_ids,
 | 
			
		||||
            prefix_search,
 | 
			
		||||
            facet_search,
 | 
			
		||||
        })
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // find and insert the new field ids
 | 
			
		||||
    pub fn recompute_facets(&mut self, wtxn: &mut heed::RwTxn<'_>, index: &Index) -> Result<()> {
 | 
			
		||||
        let new_facets = self
 | 
			
		||||
            .fields_ids_map
 | 
			
		||||
            .iter()
 | 
			
		||||
            .filter(|(fid, _field)| !self.non_faceted_fields_ids.contains(fid))
 | 
			
		||||
            .filter(|(_fid, field)| crate::is_faceted(field, &self.user_defined_faceted_fields))
 | 
			
		||||
            .map(|(_fid, field)| field.to_string())
 | 
			
		||||
            .collect();
 | 
			
		||||
        index.put_faceted_fields(wtxn, &new_facets)?;
 | 
			
		||||
 | 
			
		||||
        self.faceted_fields_ids = index.faceted_fields_ids(wtxn)?;
 | 
			
		||||
        let localized_attributes_rules = index.localized_attributes_rules(wtxn)?;
 | 
			
		||||
        self.localized_faceted_fields_ids = LocalizedFieldIds::new(
 | 
			
		||||
            &localized_attributes_rules,
 | 
			
		||||
            &self.fields_ids_map,
 | 
			
		||||
            self.faceted_fields_ids.iter().cloned(),
 | 
			
		||||
        );
 | 
			
		||||
        Ok(())
 | 
			
		||||
    pub fn match_faceted_field(&self, field: &str) -> PatternMatch {
 | 
			
		||||
        match_faceted_field(
 | 
			
		||||
            field,
 | 
			
		||||
            &self.filterable_attributes_rules,
 | 
			
		||||
            &self.sortable_fields,
 | 
			
		||||
            &self.asc_desc_fields,
 | 
			
		||||
            &self.distinct_field,
 | 
			
		||||
        )
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // find and insert the new field ids
 | 
			
		||||
@@ -1619,7 +1599,7 @@ impl InnerIndexSettings {
 | 
			
		||||
        index: &Index,
 | 
			
		||||
    ) -> Result<()> {
 | 
			
		||||
        let searchable_fields = self
 | 
			
		||||
            .user_defined_searchable_fields
 | 
			
		||||
            .user_defined_searchable_attributes
 | 
			
		||||
            .as_ref()
 | 
			
		||||
            .map(|searchable| searchable.iter().map(|s| s.as_str()).collect::<Vec<_>>());
 | 
			
		||||
 | 
			
		||||
@@ -1628,17 +1608,9 @@ impl InnerIndexSettings {
 | 
			
		||||
            index.put_all_searchable_fields_from_fields_ids_map(
 | 
			
		||||
                wtxn,
 | 
			
		||||
                &searchable_fields,
 | 
			
		||||
                &self.non_searchable_fields_ids,
 | 
			
		||||
                &self.fields_ids_map,
 | 
			
		||||
            )?;
 | 
			
		||||
        }
 | 
			
		||||
        self.searchable_fields_ids = index.searchable_fields_ids(wtxn)?;
 | 
			
		||||
        let localized_attributes_rules = index.localized_attributes_rules(wtxn)?;
 | 
			
		||||
        self.localized_searchable_fields_ids = LocalizedFieldIds::new(
 | 
			
		||||
            &localized_attributes_rules,
 | 
			
		||||
            &self.fields_ids_map,
 | 
			
		||||
            self.searchable_fields_ids.iter().cloned(),
 | 
			
		||||
        );
 | 
			
		||||
 | 
			
		||||
        Ok(())
 | 
			
		||||
    }
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user