mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-09-06 04:36:32 +00:00
Refactor Settings Indexing process
**Changes:** The transform structure is now relying on FieldIdMapWithMetadata and AttributePatterns to prepare the obkv documents during a settings reindexing. The InnerIndexSettingsDiff and InnerIndexSettings structs are now relying on FieldIdMapWithMetadata, FilterableAttributesRule and AttributePatterns to define the field and the databases that should be reindexed. The faceted_fields_ids, localized_searchable_fields_ids and localized_faceted_fields_ids have been removed in favor of the FieldIdMapWithMetadata. We are now relying on the FieldIdMapWithMetadata to retain vectors_fids from the facets and the searchables. The searchable database computing is now relying on the FieldIdMapWithMetadata to know if a field is searchable and retrieve the locales. The facet database computing is now relying on the FieldIdMapWithMetadata to compute the facet databases, the facet-search and retrieve the locales. The facet level database computing is now relying on the FieldIdMapWithMetadata and the facet level database are cleared depending on the settings differences (clear_facet_levels_based_on_settings_diff). The vector point extraction uses the FieldIdMapWithMetadata instead of FieldsIdsMapWithMetadata. **Impact:** - Dump import - Settings update
This commit is contained in:
@ -26,6 +26,7 @@ use typed_chunk::{write_typed_chunk_into_index, ChunkAccumulator, TypedChunk};
|
||||
pub use self::enrich::{extract_finite_float_from_value, DocumentId};
|
||||
pub use self::helpers::*;
|
||||
pub use self::transform::{Transform, TransformOutput};
|
||||
use super::facet::clear_facet_levels_based_on_settings_diff;
|
||||
use super::new::StdResult;
|
||||
use crate::documents::{obkv_to_object, DocumentsBatchReader};
|
||||
use crate::error::{Error, InternalError};
|
||||
@ -215,9 +216,8 @@ where
|
||||
flattened_documents,
|
||||
} = output;
|
||||
|
||||
// update the internal facet and searchable list,
|
||||
// update the searchable list,
|
||||
// because they might have changed due to the nested documents flattening.
|
||||
settings_diff.new.recompute_facets(self.wtxn, self.index)?;
|
||||
settings_diff.new.recompute_searchables(self.wtxn, self.index)?;
|
||||
|
||||
let settings_diff = Arc::new(settings_diff);
|
||||
@ -465,6 +465,11 @@ where
|
||||
}
|
||||
}
|
||||
|
||||
// If the settings are only being updated, we may have to clear some of the facet levels.
|
||||
if settings_diff.settings_update_only() {
|
||||
clear_facet_levels_based_on_settings_diff(self.wtxn, self.index, &settings_diff)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}).map_err(InternalError::from)??;
|
||||
|
||||
@ -765,18 +770,19 @@ mod tests {
|
||||
use bumpalo::Bump;
|
||||
use fst::IntoStreamer;
|
||||
use heed::RwTxn;
|
||||
use maplit::hashset;
|
||||
use maplit::{btreeset, hashset};
|
||||
|
||||
use super::*;
|
||||
use crate::constants::RESERVED_GEO_FIELD_NAME;
|
||||
use crate::documents::mmap_from_objects;
|
||||
use crate::filterable_attributes_rules::filtered_matching_field_names;
|
||||
use crate::index::tests::TempIndex;
|
||||
use crate::index::IndexEmbeddingConfig;
|
||||
use crate::progress::Progress;
|
||||
use crate::search::TermsMatchingStrategy;
|
||||
use crate::update::new::indexer;
|
||||
use crate::update::Setting;
|
||||
use crate::{all_obkv_to_json, db_snap, Filter, Search, UserError};
|
||||
use crate::{all_obkv_to_json, db_snap, Filter, FilterableAttributesRule, Search, UserError};
|
||||
|
||||
#[test]
|
||||
fn simple_document_replacement() {
|
||||
@ -1006,7 +1012,9 @@ mod tests {
|
||||
|
||||
index
|
||||
.update_settings(|settings| {
|
||||
settings.set_filterable_fields(hashset!(S(RESERVED_GEO_FIELD_NAME)));
|
||||
settings.set_filterable_fields(vec![FilterableAttributesRule::Field(
|
||||
RESERVED_GEO_FIELD_NAME.to_string(),
|
||||
)]);
|
||||
})
|
||||
.unwrap();
|
||||
}
|
||||
@ -1018,7 +1026,9 @@ mod tests {
|
||||
|
||||
index
|
||||
.update_settings(|settings| {
|
||||
settings.set_filterable_fields(hashset!(S(RESERVED_GEO_FIELD_NAME)));
|
||||
settings.set_filterable_fields(vec![FilterableAttributesRule::Field(
|
||||
RESERVED_GEO_FIELD_NAME.to_string(),
|
||||
)]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
@ -1234,15 +1244,24 @@ mod tests {
|
||||
let searchable_fields = vec![S("title"), S("nested.object"), S("nested.machin")];
|
||||
settings.set_searchable_fields(searchable_fields);
|
||||
|
||||
let faceted_fields = hashset!(S("title"), S("nested.object"), S("nested.machin"));
|
||||
let faceted_fields = vec![
|
||||
FilterableAttributesRule::Field("title".to_string()),
|
||||
FilterableAttributesRule::Field("nested.object".to_string()),
|
||||
FilterableAttributesRule::Field("nested.machin".to_string()),
|
||||
];
|
||||
settings.set_filterable_fields(faceted_fields);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
|
||||
let facets = index.faceted_fields(&rtxn).unwrap();
|
||||
assert_eq!(facets, hashset!(S("title"), S("nested.object"), S("nested.machin")));
|
||||
let filterable_fields = index.filterable_attributes_rules(&rtxn).unwrap();
|
||||
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let facets =
|
||||
filtered_matching_field_names(&filterable_fields, &fields_ids_map, &|features| {
|
||||
features.is_filterable()
|
||||
});
|
||||
assert_eq!(facets, btreeset!("title", "nested.object", "nested.machin"));
|
||||
|
||||
// testing the simple query search
|
||||
let mut search = crate::Search::new(&rtxn, &index);
|
||||
@ -1438,7 +1457,9 @@ mod tests {
|
||||
|
||||
index
|
||||
.update_settings(|settings| {
|
||||
settings.set_filterable_fields(hashset!(String::from("dog")));
|
||||
settings.set_filterable_fields(vec![FilterableAttributesRule::Field(
|
||||
"dog".to_string(),
|
||||
)]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
@ -1457,9 +1478,14 @@ mod tests {
|
||||
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
|
||||
let hidden = index.faceted_fields(&rtxn).unwrap();
|
||||
let filterable_fields = index.filterable_attributes_rules(&rtxn).unwrap();
|
||||
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let facets =
|
||||
filtered_matching_field_names(&filterable_fields, &fields_ids_map, &|features| {
|
||||
features.is_filterable()
|
||||
});
|
||||
|
||||
assert_eq!(hidden, hashset!(S("dog"), S("dog.race"), S("dog.race.bernese mountain")));
|
||||
assert_eq!(facets, btreeset!("dog", "dog.race", "dog.race.bernese mountain"));
|
||||
|
||||
for (s, i) in [("zeroth", 0), ("first", 1), ("second", 2), ("third", 3)] {
|
||||
let mut search = crate::Search::new(&rtxn, &index);
|
||||
@ -1480,9 +1506,14 @@ mod tests {
|
||||
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
|
||||
let facets = index.faceted_fields(&rtxn).unwrap();
|
||||
let filterable_fields = index.filterable_attributes_rules(&rtxn).unwrap();
|
||||
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let facets =
|
||||
filtered_matching_field_names(&filterable_fields, &fields_ids_map, &|features| {
|
||||
features.is_filterable()
|
||||
});
|
||||
|
||||
assert_eq!(facets, hashset!());
|
||||
assert_eq!(facets, btreeset!());
|
||||
|
||||
// update the settings to test the sortable
|
||||
index
|
||||
@ -1506,10 +1537,6 @@ mod tests {
|
||||
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
|
||||
let facets = index.faceted_fields(&rtxn).unwrap();
|
||||
|
||||
assert_eq!(facets, hashset!(S("dog.race"), S("dog.race.bernese mountain")));
|
||||
|
||||
let mut search = crate::Search::new(&rtxn, &index);
|
||||
search.sort_criteria(vec![crate::AscDesc::Asc(crate::Member::Field(S(
|
||||
"dog.race.bernese mountain",
|
||||
@ -1717,8 +1744,13 @@ mod tests {
|
||||
|
||||
let check_ok = |index: &Index| {
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let facets = index.faceted_fields(&rtxn).unwrap();
|
||||
assert_eq!(facets, hashset!(S("colour"), S("colour.green"), S("colour.green.blue")));
|
||||
let filterable_fields = index.filterable_attributes_rules(&rtxn).unwrap();
|
||||
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let facets =
|
||||
filtered_matching_field_names(&filterable_fields, &fields_ids_map, &|features| {
|
||||
features.is_filterable()
|
||||
});
|
||||
assert_eq!(facets, btreeset!("colour", "colour.green", "colour.green.blue"));
|
||||
|
||||
let colour_id = index.fields_ids_map(&rtxn).unwrap().id("colour").unwrap();
|
||||
let colour_green_id = index.fields_ids_map(&rtxn).unwrap().id("colour.green").unwrap();
|
||||
@ -1738,7 +1770,7 @@ mod tests {
|
||||
assert_eq!(bitmap_colour_blue.into_iter().collect::<Vec<_>>(), vec![7]);
|
||||
};
|
||||
|
||||
let faceted_fields = hashset!(S("colour"));
|
||||
let faceted_fields = vec![FilterableAttributesRule::Field("colour".to_string())];
|
||||
|
||||
let index = TempIndex::new();
|
||||
index.add_documents(content()).unwrap();
|
||||
@ -1823,8 +1855,13 @@ mod tests {
|
||||
|
||||
let check_ok = |index: &Index| {
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let facets = index.faceted_fields(&rtxn).unwrap();
|
||||
assert_eq!(facets, hashset!(S("colour"), S("colour.green"), S("colour.green.blue")));
|
||||
let filterable_fields = index.filterable_attributes_rules(&rtxn).unwrap();
|
||||
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let facets =
|
||||
filtered_matching_field_names(&filterable_fields, &fields_ids_map, &|features| {
|
||||
features.is_filterable()
|
||||
});
|
||||
assert_eq!(facets, btreeset!("colour", "colour.green", "colour.green.blue"));
|
||||
|
||||
let colour_id = index.fields_ids_map(&rtxn).unwrap().id("colour").unwrap();
|
||||
let colour_green_id = index.fields_ids_map(&rtxn).unwrap().id("colour.green").unwrap();
|
||||
@ -1844,7 +1881,7 @@ mod tests {
|
||||
assert_eq!(bitmap_colour_blue.into_iter().collect::<Vec<_>>(), vec![3]);
|
||||
};
|
||||
|
||||
let faceted_fields = hashset!(S("colour"));
|
||||
let faceted_fields = vec![FilterableAttributesRule::Field("colour".to_string())];
|
||||
|
||||
let index = TempIndex::new();
|
||||
index.add_documents(content()).unwrap();
|
||||
@ -1887,8 +1924,13 @@ mod tests {
|
||||
|
||||
let check_ok = |index: &Index| {
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let facets = index.faceted_fields(&rtxn).unwrap();
|
||||
assert_eq!(facets, hashset!(S("tags"), S("tags.green"), S("tags.green.blue")));
|
||||
let filterable_fields = index.filterable_attributes_rules(&rtxn).unwrap();
|
||||
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
let facets =
|
||||
filtered_matching_field_names(&filterable_fields, &fields_ids_map, &|features| {
|
||||
features.is_filterable()
|
||||
});
|
||||
assert_eq!(facets, btreeset!("tags", "tags.green", "tags.green.blue"));
|
||||
|
||||
let tags_id = index.fields_ids_map(&rtxn).unwrap().id("tags").unwrap();
|
||||
let tags_green_id = index.fields_ids_map(&rtxn).unwrap().id("tags.green").unwrap();
|
||||
@ -1907,7 +1949,7 @@ mod tests {
|
||||
assert_eq!(bitmap_tags_blue.into_iter().collect::<Vec<_>>(), vec![12]);
|
||||
};
|
||||
|
||||
let faceted_fields = hashset!(S("tags"));
|
||||
let faceted_fields = vec![FilterableAttributesRule::Field("tags".to_string())];
|
||||
|
||||
let index = TempIndex::new();
|
||||
index.add_documents(content()).unwrap();
|
||||
@ -2259,7 +2301,9 @@ mod tests {
|
||||
|
||||
index
|
||||
.update_settings(|settings| {
|
||||
settings.set_filterable_fields(hashset! { S("title") });
|
||||
settings.set_filterable_fields(vec![FilterableAttributesRule::Field(
|
||||
"title".to_string(),
|
||||
)]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
@ -3115,7 +3159,10 @@ mod tests {
|
||||
index
|
||||
.update_settings_using_wtxn(&mut wtxn, |settings| {
|
||||
settings.set_primary_key(S("docid"));
|
||||
settings.set_filterable_fields(hashset! { S("label"), S("label2") });
|
||||
settings.set_filterable_fields(vec![
|
||||
FilterableAttributesRule::Field("label".to_string()),
|
||||
FilterableAttributesRule::Field("label2".to_string()),
|
||||
]);
|
||||
})
|
||||
.unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
@ -3294,7 +3341,9 @@ mod tests {
|
||||
index
|
||||
.update_settings_using_wtxn(&mut wtxn, |settings| {
|
||||
settings.set_primary_key(S("id"));
|
||||
settings.set_filterable_fields(hashset!(S(RESERVED_GEO_FIELD_NAME)));
|
||||
settings.set_filterable_fields(vec![FilterableAttributesRule::Field(
|
||||
RESERVED_GEO_FIELD_NAME.to_string(),
|
||||
)]);
|
||||
settings.set_sortable_fields(hashset!(S(RESERVED_GEO_FIELD_NAME)));
|
||||
})
|
||||
.unwrap();
|
||||
|
Reference in New Issue
Block a user