mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-25 21:16:28 +00:00 
			
		
		
		
	Refactor the FieldIdMapWithMetadata
**Changes:** The FieldIdMapWithMetadata structure now stores more information about fields. The metadata_for_field function computes all the needed information relying on the user provided data instead of the enriched data (searchable/sortable) which may solve an indexing bug on sortable attributes that was not matching the nested fields. The FieldIdMapWithMetadata structure was duplicated in the embeddings as FieldsIdsMapWithMetadata, so the FieldsIdsMapWithMetadata has been removed in favor of FieldIdMapWithMetadata. The Facet distribution is now relying on the FieldIdMapWithMetadata with metadata to match is a field can be faceted. **Impact:** - searchable attributes matching - searchable attributes weight computation - sortable attributes matching - faceted fields matching - prompt computing - facet distribution
This commit is contained in:
		| @@ -105,6 +105,11 @@ impl<'indexing> GlobalFieldsIdsMap<'indexing> { | |||||||
|  |  | ||||||
|         self.local.name(id) |         self.local.name(id) | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     /// Get the metadata of a field based on its id. | ||||||
|  |     pub fn metadata(&self, id: FieldId) -> Option<Metadata> { | ||||||
|  |         self.local.metadata(id).or_else(|| self.global.read().unwrap().metadata(id)) | ||||||
|  |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| impl<'indexing> MutFieldIdMapper for GlobalFieldsIdsMap<'indexing> { | impl<'indexing> MutFieldIdMapper for GlobalFieldsIdsMap<'indexing> { | ||||||
|   | |||||||
| @@ -5,14 +5,29 @@ use charabia::Language; | |||||||
| use heed::RoTxn; | use heed::RoTxn; | ||||||
|  |  | ||||||
| use super::FieldsIdsMap; | use super::FieldsIdsMap; | ||||||
| use crate::{FieldId, Index, LocalizedAttributesRule, Result}; | use crate::attribute_patterns::{match_field_legacy, PatternMatch}; | ||||||
|  | use crate::constants::{RESERVED_GEO_FIELD_NAME, RESERVED_VECTORS_FIELD_NAME}; | ||||||
|  | use crate::{ | ||||||
|  |     is_faceted_by, FieldId, FilterableAttributesFeatures, FilterableAttributesRule, Index, | ||||||
|  |     LocalizedAttributesRule, Result, Weight, | ||||||
|  | }; | ||||||
|  |  | ||||||
| #[derive(Debug, Clone, Copy)] | #[derive(Debug, Clone, Copy)] | ||||||
| pub struct Metadata { | pub struct Metadata { | ||||||
|     pub searchable: bool, |     /// The weight as defined in the FieldidsWeightsMap of the searchable attribute if it is searchable. | ||||||
|     pub filterable: bool, |     pub searchable: Option<Weight>, | ||||||
|  |     /// The field is part of the sortable attributes. | ||||||
|     pub sortable: bool, |     pub sortable: bool, | ||||||
|     localized_attributes_rule_id: Option<NonZeroU16>, |     /// The field is defined as the distinct attribute. | ||||||
|  |     pub distinct: bool, | ||||||
|  |     /// The field has been defined as asc/desc in the ranking rules. | ||||||
|  |     pub asc_desc: bool, | ||||||
|  |     /// The field is a geo field. | ||||||
|  |     pub geo: bool, | ||||||
|  |     /// The id of the localized attributes rule if the field is localized. | ||||||
|  |     pub localized_attributes_rule_id: Option<NonZeroU16>, | ||||||
|  |     /// The id of the filterable attributes rule if the field is filterable. | ||||||
|  |     pub filterable_attributes_rule_id: Option<NonZeroU16>, | ||||||
| } | } | ||||||
|  |  | ||||||
| #[derive(Debug, Clone)] | #[derive(Debug, Clone)] | ||||||
| @@ -106,76 +121,215 @@ impl Metadata { | |||||||
|         let rule = rules.get((localized_attributes_rule_id - 1) as usize).unwrap(); |         let rule = rules.get((localized_attributes_rule_id - 1) as usize).unwrap(); | ||||||
|         Some(rule.locales()) |         Some(rule.locales()) | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     pub fn filterable_attributes<'rules>( | ||||||
|  |         &self, | ||||||
|  |         rules: &'rules [FilterableAttributesRule], | ||||||
|  |     ) -> Option<&'rules FilterableAttributesRule> { | ||||||
|  |         let filterable_attributes_rule_id = self.filterable_attributes_rule_id?.get(); | ||||||
|  |         // - 1: `filterable_attributes_rule_id` is NonZero | ||||||
|  |         let rule = rules.get((filterable_attributes_rule_id - 1) as usize).unwrap(); | ||||||
|  |         Some(rule) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub fn filterable_attributes_features( | ||||||
|  |         &self, | ||||||
|  |         rules: &[FilterableAttributesRule], | ||||||
|  |     ) -> FilterableAttributesFeatures { | ||||||
|  |         self.filterable_attributes(rules) | ||||||
|  |             .map(|rule| rule.features()) | ||||||
|  |             // if there is no filterable attributes rule, return no features | ||||||
|  |             .unwrap_or_else(FilterableAttributesFeatures::no_features) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub fn is_sortable(&self) -> bool { | ||||||
|  |         self.sortable | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub fn is_searchable(&self) -> bool { | ||||||
|  |         self.searchable.is_some() | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub fn searchable_weight(&self) -> Option<Weight> { | ||||||
|  |         self.searchable | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub fn is_distinct(&self) -> bool { | ||||||
|  |         self.distinct | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub fn is_asc_desc(&self) -> bool { | ||||||
|  |         self.asc_desc | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub fn is_geo(&self) -> bool { | ||||||
|  |         self.geo | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     /// Returns `true` if the field is part of the facet databases. (sortable, distinct, asc_desc, filterable or facet searchable) | ||||||
|  |     pub fn is_faceted(&self, rules: &[FilterableAttributesRule]) -> bool { | ||||||
|  |         if self.is_distinct() || self.is_sortable() || self.is_asc_desc() { | ||||||
|  |             return true; | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         let features = self.filterable_attributes_features(rules); | ||||||
|  |         if features.is_filterable() || features.is_facet_searchable() { | ||||||
|  |             return true; | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         false | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub fn require_facet_level_database(&self, rules: &[FilterableAttributesRule]) -> bool { | ||||||
|  |         let features = self.filterable_attributes_features(rules); | ||||||
|  |  | ||||||
|  |         self.is_sortable() || self.is_asc_desc() || features.is_filterable_comparison() | ||||||
|  |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| #[derive(Debug, Clone)] | #[derive(Debug, Clone)] | ||||||
| pub struct MetadataBuilder { | pub struct MetadataBuilder { | ||||||
|     searchable_attributes: Vec<String>, |     searchable_attributes: Option<Vec<String>>, | ||||||
|     filterable_attributes: HashSet<String>, |     filterable_attributes: Vec<FilterableAttributesRule>, | ||||||
|     sortable_attributes: HashSet<String>, |     sortable_attributes: HashSet<String>, | ||||||
|     localized_attributes: Option<Vec<LocalizedAttributesRule>>, |     localized_attributes: Option<Vec<LocalizedAttributesRule>>, | ||||||
|  |     distinct_attribute: Option<String>, | ||||||
|  |     asc_desc_attributes: HashSet<String>, | ||||||
| } | } | ||||||
|  |  | ||||||
| impl MetadataBuilder { | impl MetadataBuilder { | ||||||
|     pub fn from_index(index: &Index, rtxn: &RoTxn) -> Result<Self> { |     pub fn from_index(index: &Index, rtxn: &RoTxn) -> Result<Self> { | ||||||
|         let searchable_attributes = |         let searchable_attributes = match index.user_defined_searchable_fields(rtxn)? { | ||||||
|             index.searchable_fields(rtxn)?.into_iter().map(|s| s.to_string()).collect(); |             Some(fields) if fields.contains(&"*") => None, | ||||||
|         let filterable_attributes = index.filterable_fields(rtxn)?; |             None => None, | ||||||
|  |             Some(fields) => Some(fields.into_iter().map(|s| s.to_string()).collect()), | ||||||
|  |         }; | ||||||
|  |         let filterable_attributes = index.filterable_attributes_rules(rtxn)?; | ||||||
|         let sortable_attributes = index.sortable_fields(rtxn)?; |         let sortable_attributes = index.sortable_fields(rtxn)?; | ||||||
|         let localized_attributes = index.localized_attributes_rules(rtxn)?; |         let localized_attributes = index.localized_attributes_rules(rtxn)?; | ||||||
|  |         let distinct_attribute = index.distinct_field(rtxn)?.map(|s| s.to_string()); | ||||||
|  |         let asc_desc_attributes = index.asc_desc_fields(rtxn)?; | ||||||
|  |  | ||||||
|         Ok(Self { |         Ok(Self { | ||||||
|             searchable_attributes, |             searchable_attributes, | ||||||
|             filterable_attributes, |             filterable_attributes, | ||||||
|             sortable_attributes, |             sortable_attributes, | ||||||
|             localized_attributes, |             localized_attributes, | ||||||
|  |             distinct_attribute, | ||||||
|  |             asc_desc_attributes, | ||||||
|         }) |         }) | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     #[cfg(test)] | ||||||
|  |     /// Build a new `MetadataBuilder` from the given parameters. | ||||||
|  |     /// | ||||||
|  |     /// This is used for testing, prefer using `MetadataBuilder::from_index` instead. | ||||||
|     pub fn new( |     pub fn new( | ||||||
|         searchable_attributes: Vec<String>, |         searchable_attributes: Option<Vec<String>>, | ||||||
|         filterable_attributes: HashSet<String>, |         filterable_attributes: Vec<FilterableAttributesRule>, | ||||||
|         sortable_attributes: HashSet<String>, |         sortable_attributes: HashSet<String>, | ||||||
|         localized_attributes: Option<Vec<LocalizedAttributesRule>>, |         localized_attributes: Option<Vec<LocalizedAttributesRule>>, | ||||||
|  |         distinct_attribute: Option<String>, | ||||||
|  |         asc_desc_attributes: HashSet<String>, | ||||||
|     ) -> Self { |     ) -> Self { | ||||||
|  |         let searchable_attributes = match searchable_attributes { | ||||||
|  |             Some(fields) if fields.iter().any(|f| f == "*") => None, | ||||||
|  |             None => None, | ||||||
|  |             Some(fields) => Some(fields), | ||||||
|  |         }; | ||||||
|  |  | ||||||
|         Self { |         Self { | ||||||
|             searchable_attributes, |             searchable_attributes, | ||||||
|             filterable_attributes, |             filterable_attributes, | ||||||
|             sortable_attributes, |             sortable_attributes, | ||||||
|             localized_attributes, |             localized_attributes, | ||||||
|  |             distinct_attribute, | ||||||
|  |             asc_desc_attributes, | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     pub fn metadata_for_field(&self, field: &str) -> Metadata { |     pub fn metadata_for_field(&self, field: &str) -> Metadata { | ||||||
|         let searchable = self |         if is_faceted_by(field, RESERVED_VECTORS_FIELD_NAME) { | ||||||
|             .searchable_attributes |             // Vectors fields are not searchable, filterable, distinct or asc_desc | ||||||
|  |             return Metadata { | ||||||
|  |                 searchable: None, | ||||||
|  |                 sortable: false, | ||||||
|  |                 distinct: false, | ||||||
|  |                 asc_desc: false, | ||||||
|  |                 geo: false, | ||||||
|  |                 localized_attributes_rule_id: None, | ||||||
|  |                 filterable_attributes_rule_id: None, | ||||||
|  |             }; | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         // A field is sortable if it is faceted by a sortable attribute | ||||||
|  |         let sortable = self | ||||||
|  |             .sortable_attributes | ||||||
|             .iter() |             .iter() | ||||||
|             .any(|attribute| attribute == "*" || attribute == field); |             .any(|pattern| match_field_legacy(pattern, field) == PatternMatch::Match); | ||||||
|  |  | ||||||
|         let filterable = self.filterable_attributes.contains(field); |         let filterable_attributes_rule_id = self | ||||||
|  |             .filterable_attributes | ||||||
|  |             .iter() | ||||||
|  |             .position(|attribute| attribute.match_str(field) == PatternMatch::Match) | ||||||
|  |             // saturating_add(1): make `id` `NonZero` | ||||||
|  |             .map(|id| NonZeroU16::new(id.saturating_add(1).try_into().unwrap()).unwrap()); | ||||||
|  |  | ||||||
|         let sortable = self.sortable_attributes.contains(field); |         if match_field_legacy(RESERVED_GEO_FIELD_NAME, field) == PatternMatch::Match { | ||||||
|  |             // Geo fields are not searchable, distinct or asc_desc | ||||||
|  |             return Metadata { | ||||||
|  |                 searchable: None, | ||||||
|  |                 sortable, | ||||||
|  |                 distinct: false, | ||||||
|  |                 asc_desc: false, | ||||||
|  |                 geo: true, | ||||||
|  |                 localized_attributes_rule_id: None, | ||||||
|  |                 filterable_attributes_rule_id, | ||||||
|  |             }; | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         let searchable = match &self.searchable_attributes { | ||||||
|  |             // A field is searchable if it is faceted by a searchable attribute | ||||||
|  |             Some(attributes) => attributes | ||||||
|  |                 .iter() | ||||||
|  |                 .enumerate() | ||||||
|  |                 .find(|(_i, pattern)| is_faceted_by(field, pattern)) | ||||||
|  |                 .map(|(i, _)| i as u16), | ||||||
|  |             None => Some(0), | ||||||
|  |         }; | ||||||
|  |  | ||||||
|  |         let distinct = | ||||||
|  |             self.distinct_attribute.as_ref().is_some_and(|distinct_field| field == distinct_field); | ||||||
|  |         let asc_desc = self.asc_desc_attributes.contains(field); | ||||||
|  |  | ||||||
|         let localized_attributes_rule_id = self |         let localized_attributes_rule_id = self | ||||||
|             .localized_attributes |             .localized_attributes | ||||||
|             .iter() |             .iter() | ||||||
|             .flat_map(|v| v.iter()) |             .flat_map(|v| v.iter()) | ||||||
|             .position(|rule| rule.match_str(field)) |             .position(|rule| rule.match_str(field) == PatternMatch::Match) | ||||||
|             // saturating_add(1): make `id` `NonZero` |             // saturating_add(1): make `id` `NonZero` | ||||||
|             .map(|id| NonZeroU16::new(id.saturating_add(1).try_into().unwrap()).unwrap()); |             .map(|id| NonZeroU16::new(id.saturating_add(1).try_into().unwrap()).unwrap()); | ||||||
|  |  | ||||||
|         Metadata { searchable, filterable, sortable, localized_attributes_rule_id } |         Metadata { | ||||||
|  |             searchable, | ||||||
|  |             sortable, | ||||||
|  |             distinct, | ||||||
|  |             asc_desc, | ||||||
|  |             geo: false, | ||||||
|  |             localized_attributes_rule_id, | ||||||
|  |             filterable_attributes_rule_id, | ||||||
|  |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     pub fn searchable_attributes(&self) -> &[String] { |     pub fn searchable_attributes(&self) -> Option<&[String]> { | ||||||
|         self.searchable_attributes.as_slice() |         self.searchable_attributes.as_deref() | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     pub fn sortable_attributes(&self) -> &HashSet<String> { |     pub fn sortable_attributes(&self) -> &HashSet<String> { | ||||||
|         &self.sortable_attributes |         &self.sortable_attributes | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     pub fn filterable_attributes(&self) -> &HashSet<String> { |     pub fn filterable_attributes(&self) -> &[FilterableAttributesRule] { | ||||||
|         &self.filterable_attributes |         &self.filterable_attributes | ||||||
|     } |     } | ||||||
|  |  | ||||||
|   | |||||||
| @@ -1,6 +1,5 @@ | |||||||
| use std::borrow::Cow; | use std::borrow::Cow; | ||||||
| use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet}; | use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet}; | ||||||
| use std::convert::TryInto; |  | ||||||
| use std::fs::File; | use std::fs::File; | ||||||
| use std::path::Path; | use std::path::Path; | ||||||
|  |  | ||||||
| @@ -10,10 +9,11 @@ use roaring::RoaringBitmap; | |||||||
| use rstar::RTree; | use rstar::RTree; | ||||||
| use serde::{Deserialize, Serialize}; | use serde::{Deserialize, Serialize}; | ||||||
|  |  | ||||||
| use crate::constants::{self, RESERVED_VECTORS_FIELD_NAME}; | use crate::constants::{self, RESERVED_GEO_FIELD_NAME, RESERVED_VECTORS_FIELD_NAME}; | ||||||
| use crate::database_stats::DatabaseStats; | use crate::database_stats::DatabaseStats; | ||||||
| use crate::documents::PrimaryKey; | use crate::documents::PrimaryKey; | ||||||
| use crate::error::{InternalError, UserError}; | use crate::error::{InternalError, UserError}; | ||||||
|  | use crate::fields_ids_map::metadata::FieldIdMapWithMetadata; | ||||||
| use crate::fields_ids_map::FieldsIdsMap; | use crate::fields_ids_map::FieldsIdsMap; | ||||||
| use crate::heed_codec::facet::{ | use crate::heed_codec::facet::{ | ||||||
|     FacetGroupKeyCodec, FacetGroupValueCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec, |     FacetGroupKeyCodec, FacetGroupValueCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec, | ||||||
| @@ -27,8 +27,9 @@ use crate::vector::{ArroyStats, ArroyWrapper, Embedding, EmbeddingConfig}; | |||||||
| use crate::{ | use crate::{ | ||||||
|     default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds, |     default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds, | ||||||
|     FacetDistribution, FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldIdWordCountCodec, |     FacetDistribution, FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldIdWordCountCodec, | ||||||
|     FieldidsWeightsMap, GeoPoint, LocalizedAttributesRule, ObkvCodec, Result, RoaringBitmapCodec, |     FieldidsWeightsMap, FilterableAttributesRule, GeoPoint, LocalizedAttributesRule, ObkvCodec, | ||||||
|     RoaringBitmapLenCodec, Search, U8StrStrCodec, Weight, BEU16, BEU32, BEU64, |     Result, RoaringBitmapCodec, RoaringBitmapLenCodec, Search, U8StrStrCodec, Weight, BEU16, BEU32, | ||||||
|  |     BEU64, | ||||||
| }; | }; | ||||||
|  |  | ||||||
| pub const DEFAULT_MIN_WORD_LEN_ONE_TYPO: u8 = 5; | pub const DEFAULT_MIN_WORD_LEN_ONE_TYPO: u8 = 5; | ||||||
| @@ -738,8 +739,7 @@ impl Index { | |||||||
|         &self, |         &self, | ||||||
|         wtxn: &mut RwTxn<'_>, |         wtxn: &mut RwTxn<'_>, | ||||||
|         user_fields: &[&str], |         user_fields: &[&str], | ||||||
|         non_searchable_fields_ids: &[FieldId], |         fields_ids_map: &FieldIdMapWithMetadata, | ||||||
|         fields_ids_map: &FieldsIdsMap, |  | ||||||
|     ) -> Result<()> { |     ) -> Result<()> { | ||||||
|         // We can write the user defined searchable fields as-is. |         // We can write the user defined searchable fields as-is. | ||||||
|         self.put_user_defined_searchable_fields(wtxn, user_fields)?; |         self.put_user_defined_searchable_fields(wtxn, user_fields)?; | ||||||
| @@ -747,29 +747,17 @@ impl Index { | |||||||
|         let mut weights = FieldidsWeightsMap::default(); |         let mut weights = FieldidsWeightsMap::default(); | ||||||
|  |  | ||||||
|         // Now we generate the real searchable fields: |         // Now we generate the real searchable fields: | ||||||
|         // 1. Take the user defined searchable fields as-is to keep the priority defined by the attributes criterion. |  | ||||||
|         // 2. Iterate over the user defined searchable fields. |  | ||||||
|         // 3. If a user defined field is a subset of a field defined in the fields_ids_map |  | ||||||
|         // (ie doggo.name is a subset of doggo) right after doggo and with the same weight. |  | ||||||
|         let mut real_fields = Vec::new(); |         let mut real_fields = Vec::new(); | ||||||
|  |         for (id, field_from_map, metadata) in fields_ids_map.iter() { | ||||||
|         for (id, field_from_map) in fields_ids_map.iter() { |             if let Some(weight) = metadata.searchable_weight() { | ||||||
|             for (weight, user_field) in user_fields.iter().enumerate() { |  | ||||||
|                 if crate::is_faceted_by(field_from_map, user_field) |  | ||||||
|                     && !real_fields.contains(&field_from_map) |  | ||||||
|                     && !non_searchable_fields_ids.contains(&id) |  | ||||||
|                 { |  | ||||||
|                 real_fields.push(field_from_map); |                 real_fields.push(field_from_map); | ||||||
|  |  | ||||||
|                     let weight: u16 = |  | ||||||
|                         weight.try_into().map_err(|_| UserError::AttributeLimitReached)?; |  | ||||||
|                 weights.insert(id, weight); |                 weights.insert(id, weight); | ||||||
|             } |             } | ||||||
|         } |         } | ||||||
|         } |  | ||||||
|  |  | ||||||
|         self.put_searchable_fields(wtxn, &real_fields)?; |         self.put_searchable_fields(wtxn, &real_fields)?; | ||||||
|         self.put_fieldids_weights_map(wtxn, &weights)?; |         self.put_fieldids_weights_map(wtxn, &weights)?; | ||||||
|  |  | ||||||
|         Ok(()) |         Ok(()) | ||||||
|     } |     } | ||||||
|  |  | ||||||
|   | |||||||
| @@ -7,14 +7,14 @@ use liquid::model::{ | |||||||
| }; | }; | ||||||
| use liquid::{ObjectView, ValueView}; | use liquid::{ObjectView, ValueView}; | ||||||
|  |  | ||||||
| use super::{FieldMetadata, FieldsIdsMapWithMetadata}; | use crate::fields_ids_map::metadata::{FieldIdMapWithMetadata, Metadata}; | ||||||
| use crate::GlobalFieldsIdsMap; | use crate::GlobalFieldsIdsMap; | ||||||
|  |  | ||||||
| #[derive(Debug, Clone, Copy)] | #[derive(Debug, Clone, Copy)] | ||||||
| pub struct FieldValue<'a, D: ObjectView> { | pub struct FieldValue<'a, D: ObjectView> { | ||||||
|     name: &'a str, |     name: &'a str, | ||||||
|     document: &'a D, |     document: &'a D, | ||||||
|     metadata: FieldMetadata, |     metadata: Metadata, | ||||||
| } | } | ||||||
|  |  | ||||||
| impl<'a, D: ObjectView> ValueView for FieldValue<'a, D> { | impl<'a, D: ObjectView> ValueView for FieldValue<'a, D> { | ||||||
| @@ -67,7 +67,10 @@ impl<'a, D: ObjectView> FieldValue<'a, D> { | |||||||
|     } |     } | ||||||
|  |  | ||||||
|     pub fn is_searchable(&self) -> &bool { |     pub fn is_searchable(&self) -> &bool { | ||||||
|         &self.metadata.searchable |         match self.metadata.is_searchable() { | ||||||
|  |             true => &true, | ||||||
|  |             false => &false, | ||||||
|  |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     pub fn is_empty(&self) -> bool { |     pub fn is_empty(&self) -> bool { | ||||||
| @@ -125,15 +128,11 @@ pub struct BorrowedFields<'a, 'map, D: ObjectView> { | |||||||
| } | } | ||||||
|  |  | ||||||
| impl<'a, D: ObjectView> OwnedFields<'a, D> { | impl<'a, D: ObjectView> OwnedFields<'a, D> { | ||||||
|     pub fn new(document: &'a D, field_id_map: &'a FieldsIdsMapWithMetadata<'a>) -> Self { |     pub fn new(document: &'a D, field_id_map: &'a FieldIdMapWithMetadata) -> Self { | ||||||
|         Self( |         Self( | ||||||
|             std::iter::repeat(document) |             std::iter::repeat(document) | ||||||
|                 .zip(field_id_map.iter()) |                 .zip(field_id_map.iter()) | ||||||
|                 .map(|(document, (fid, name))| FieldValue { |                 .map(|(document, (_fid, name, metadata))| FieldValue { document, name, metadata }) | ||||||
|                     document, |  | ||||||
|                     name, |  | ||||||
|                     metadata: field_id_map.metadata(fid).unwrap_or_default(), |  | ||||||
|                 }) |  | ||||||
|                 .collect(), |                 .collect(), | ||||||
|         ) |         ) | ||||||
|     } |     } | ||||||
| @@ -187,7 +186,7 @@ impl<'a, 'map, D: ObjectView> ArrayView for BorrowedFields<'a, 'map, D> { | |||||||
|             let fv = self.doc_alloc.alloc(FieldValue { |             let fv = self.doc_alloc.alloc(FieldValue { | ||||||
|                 name: self.doc_alloc.alloc_str(&k), |                 name: self.doc_alloc.alloc_str(&k), | ||||||
|                 document: self.document, |                 document: self.document, | ||||||
|                 metadata: FieldMetadata { searchable: metadata.searchable }, |                 metadata, | ||||||
|             }); |             }); | ||||||
|             fv as _ |             fv as _ | ||||||
|         })) |         })) | ||||||
| @@ -207,7 +206,7 @@ impl<'a, 'map, D: ObjectView> ArrayView for BorrowedFields<'a, 'map, D> { | |||||||
|         let fv = self.doc_alloc.alloc(FieldValue { |         let fv = self.doc_alloc.alloc(FieldValue { | ||||||
|             name: self.doc_alloc.alloc_str(&key), |             name: self.doc_alloc.alloc_str(&key), | ||||||
|             document: self.document, |             document: self.document, | ||||||
|             metadata: FieldMetadata { searchable: metadata.searchable }, |             metadata, | ||||||
|         }); |         }); | ||||||
|         Some(fv as _) |         Some(fv as _) | ||||||
|     } |     } | ||||||
|   | |||||||
| @@ -5,11 +5,9 @@ mod fields; | |||||||
| mod template_checker; | mod template_checker; | ||||||
|  |  | ||||||
| use std::cell::RefCell; | use std::cell::RefCell; | ||||||
| use std::collections::BTreeMap; |  | ||||||
| use std::convert::TryFrom; | use std::convert::TryFrom; | ||||||
| use std::fmt::Debug; | use std::fmt::Debug; | ||||||
| use std::num::NonZeroUsize; | use std::num::NonZeroUsize; | ||||||
| use std::ops::Deref; |  | ||||||
|  |  | ||||||
| use bumpalo::Bump; | use bumpalo::Bump; | ||||||
| use document::ParseableDocument; | use document::ParseableDocument; | ||||||
| @@ -18,8 +16,9 @@ use fields::{BorrowedFields, OwnedFields}; | |||||||
|  |  | ||||||
| use self::context::Context; | use self::context::Context; | ||||||
| use self::document::Document; | use self::document::Document; | ||||||
|  | use crate::fields_ids_map::metadata::FieldIdMapWithMetadata; | ||||||
| use crate::update::del_add::DelAdd; | use crate::update::del_add::DelAdd; | ||||||
| use crate::{FieldId, FieldsIdsMap, GlobalFieldsIdsMap}; | use crate::GlobalFieldsIdsMap; | ||||||
|  |  | ||||||
| pub struct Prompt { | pub struct Prompt { | ||||||
|     template: liquid::Template, |     template: liquid::Template, | ||||||
| @@ -145,9 +144,9 @@ impl Prompt { | |||||||
|         &self, |         &self, | ||||||
|         document: &obkv::KvReaderU16, |         document: &obkv::KvReaderU16, | ||||||
|         side: DelAdd, |         side: DelAdd, | ||||||
|         field_id_map: &FieldsIdsMapWithMetadata, |         field_id_map: &FieldIdMapWithMetadata, | ||||||
|     ) -> Result<String, RenderPromptError> { |     ) -> Result<String, RenderPromptError> { | ||||||
|         let document = Document::new(document, side, field_id_map); |         let document = Document::new(document, side, field_id_map.as_fields_ids_map()); | ||||||
|         let fields = OwnedFields::new(&document, field_id_map); |         let fields = OwnedFields::new(&document, field_id_map); | ||||||
|         let context = Context::new(&document, &fields); |         let context = Context::new(&document, &fields); | ||||||
|  |  | ||||||
| @@ -172,40 +171,6 @@ fn truncate(s: &mut String, max_bytes: usize) { | |||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| pub struct FieldsIdsMapWithMetadata<'a> { |  | ||||||
|     fields_ids_map: &'a FieldsIdsMap, |  | ||||||
|     metadata: BTreeMap<FieldId, FieldMetadata>, |  | ||||||
| } |  | ||||||
|  |  | ||||||
| impl<'a> FieldsIdsMapWithMetadata<'a> { |  | ||||||
|     pub fn new(fields_ids_map: &'a FieldsIdsMap, searchable_fields_ids: &'_ [FieldId]) -> Self { |  | ||||||
|         let mut metadata: BTreeMap<FieldId, FieldMetadata> = |  | ||||||
|             fields_ids_map.ids().map(|id| (id, Default::default())).collect(); |  | ||||||
|         for searchable_field_id in searchable_fields_ids { |  | ||||||
|             let Some(metadata) = metadata.get_mut(searchable_field_id) else { continue }; |  | ||||||
|             metadata.searchable = true; |  | ||||||
|         } |  | ||||||
|         Self { fields_ids_map, metadata } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     pub fn metadata(&self, field_id: FieldId) -> Option<FieldMetadata> { |  | ||||||
|         self.metadata.get(&field_id).copied() |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| impl<'a> Deref for FieldsIdsMapWithMetadata<'a> { |  | ||||||
|     type Target = FieldsIdsMap; |  | ||||||
|  |  | ||||||
|     fn deref(&self) -> &Self::Target { |  | ||||||
|         self.fields_ids_map |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| #[derive(Debug, Default, Clone, Copy)] |  | ||||||
| pub struct FieldMetadata { |  | ||||||
|     pub searchable: bool, |  | ||||||
| } |  | ||||||
|  |  | ||||||
| #[cfg(test)] | #[cfg(test)] | ||||||
| mod test { | mod test { | ||||||
|     use super::Prompt; |     use super::Prompt; | ||||||
|   | |||||||
| @@ -1,4 +1,4 @@ | |||||||
| use std::collections::{BTreeMap, HashMap, HashSet}; | use std::collections::{BTreeMap, BTreeSet, HashMap}; | ||||||
| use std::fmt::Display; | use std::fmt::Display; | ||||||
| use std::ops::ControlFlow; | use std::ops::ControlFlow; | ||||||
| use std::{fmt, mem}; | use std::{fmt, mem}; | ||||||
| @@ -9,8 +9,9 @@ use indexmap::IndexMap; | |||||||
| use roaring::RoaringBitmap; | use roaring::RoaringBitmap; | ||||||
| use serde::{Deserialize, Serialize}; | use serde::{Deserialize, Serialize}; | ||||||
|  |  | ||||||
| use crate::error::UserError; | use crate::attribute_patterns::match_field_legacy; | ||||||
| use crate::facet::FacetType; | use crate::facet::FacetType; | ||||||
|  | use crate::fields_ids_map::metadata::{FieldIdMapWithMetadata, Metadata, MetadataBuilder}; | ||||||
| use crate::heed_codec::facet::{ | use crate::heed_codec::facet::{ | ||||||
|     FacetGroupKeyCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec, OrderedF64Codec, |     FacetGroupKeyCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec, OrderedF64Codec, | ||||||
| }; | }; | ||||||
| @@ -18,7 +19,7 @@ use crate::heed_codec::{BytesRefCodec, StrRefCodec}; | |||||||
| use crate::search::facet::facet_distribution_iter::{ | use crate::search::facet::facet_distribution_iter::{ | ||||||
|     count_iterate_over_facet_distribution, lexicographically_iterate_over_facet_distribution, |     count_iterate_over_facet_distribution, lexicographically_iterate_over_facet_distribution, | ||||||
| }; | }; | ||||||
| use crate::{FieldId, Index, Result}; | use crate::{Error, FieldId, FilterableAttributesRule, Index, PatternMatch, Result, UserError}; | ||||||
|  |  | ||||||
| /// The default number of values by facets that will | /// The default number of values by facets that will | ||||||
| /// be fetched from the key-value store. | /// be fetched from the key-value store. | ||||||
| @@ -287,37 +288,23 @@ impl<'a> FacetDistribution<'a> { | |||||||
|     } |     } | ||||||
|  |  | ||||||
|     pub fn compute_stats(&self) -> Result<BTreeMap<String, (f64, f64)>> { |     pub fn compute_stats(&self) -> Result<BTreeMap<String, (f64, f64)>> { | ||||||
|         let fields_ids_map = self.index.fields_ids_map(self.rtxn)?; |  | ||||||
|         let filterable_fields = self.index.filterable_fields(self.rtxn)?; |  | ||||||
|         let candidates = if let Some(candidates) = self.candidates.clone() { |         let candidates = if let Some(candidates) = self.candidates.clone() { | ||||||
|             candidates |             candidates | ||||||
|         } else { |         } else { | ||||||
|             return Ok(Default::default()); |             return Ok(Default::default()); | ||||||
|         }; |         }; | ||||||
|  |  | ||||||
|         let fields = match &self.facets { |         let fields_ids_map = self.index.fields_ids_map(self.rtxn)?; | ||||||
|             Some(facets) => { |         let fields_ids_map = FieldIdMapWithMetadata::new( | ||||||
|                 let invalid_fields: HashSet<_> = facets |             fields_ids_map, | ||||||
|                     .iter() |             MetadataBuilder::from_index(self.index, self.rtxn)?, | ||||||
|                     .map(|(name, _)| name) |         ); | ||||||
|                     .filter(|facet| !crate::is_faceted(facet, &filterable_fields)) |         let filterable_attributes_rules = self.index.filterable_attributes_rules(self.rtxn)?; | ||||||
|                     .collect(); |         self.check_faceted_fields(&fields_ids_map, &filterable_attributes_rules)?; | ||||||
|                 if !invalid_fields.is_empty() { |  | ||||||
|                     return Err(UserError::InvalidFacetsDistribution { |  | ||||||
|                         invalid_facets_name: invalid_fields.into_iter().cloned().collect(), |  | ||||||
|                         valid_facets_name: filterable_fields.into_iter().collect(), |  | ||||||
|                     } |  | ||||||
|                     .into()); |  | ||||||
|                 } else { |  | ||||||
|                     facets.iter().map(|(name, _)| name).cloned().collect() |  | ||||||
|                 } |  | ||||||
|             } |  | ||||||
|             None => filterable_fields, |  | ||||||
|         }; |  | ||||||
|  |  | ||||||
|         let mut distribution = BTreeMap::new(); |         let mut distribution = BTreeMap::new(); | ||||||
|         for (fid, name) in fields_ids_map.iter() { |         for (fid, name, metadata) in fields_ids_map.iter() { | ||||||
|             if crate::is_faceted(name, &fields) { |             if self.select_field(name, &metadata, &filterable_attributes_rules) { | ||||||
|                 let min_value = if let Some(min_value) = crate::search::facet::facet_min_value( |                 let min_value = if let Some(min_value) = crate::search::facet::facet_min_value( | ||||||
|                     self.index, |                     self.index, | ||||||
|                     self.rtxn, |                     self.rtxn, | ||||||
| @@ -348,31 +335,16 @@ impl<'a> FacetDistribution<'a> { | |||||||
|  |  | ||||||
|     pub fn execute(&self) -> Result<BTreeMap<String, IndexMap<String, u64>>> { |     pub fn execute(&self) -> Result<BTreeMap<String, IndexMap<String, u64>>> { | ||||||
|         let fields_ids_map = self.index.fields_ids_map(self.rtxn)?; |         let fields_ids_map = self.index.fields_ids_map(self.rtxn)?; | ||||||
|         let filterable_fields = self.index.filterable_fields(self.rtxn)?; |         let fields_ids_map = FieldIdMapWithMetadata::new( | ||||||
|  |             fields_ids_map, | ||||||
|         let fields = match self.facets { |             MetadataBuilder::from_index(self.index, self.rtxn)?, | ||||||
|             Some(ref facets) => { |         ); | ||||||
|                 let invalid_fields: HashSet<_> = facets |         let filterable_attributes_rules = self.index.filterable_attributes_rules(self.rtxn)?; | ||||||
|                     .iter() |         self.check_faceted_fields(&fields_ids_map, &filterable_attributes_rules)?; | ||||||
|                     .map(|(name, _)| name) |  | ||||||
|                     .filter(|facet| !crate::is_faceted(facet, &filterable_fields)) |  | ||||||
|                     .collect(); |  | ||||||
|                 if !invalid_fields.is_empty() { |  | ||||||
|                     return Err(UserError::InvalidFacetsDistribution { |  | ||||||
|                         invalid_facets_name: invalid_fields.into_iter().cloned().collect(), |  | ||||||
|                         valid_facets_name: filterable_fields.into_iter().collect(), |  | ||||||
|                     } |  | ||||||
|                     .into()); |  | ||||||
|                 } else { |  | ||||||
|                     facets.iter().map(|(name, _)| name).cloned().collect() |  | ||||||
|                 } |  | ||||||
|             } |  | ||||||
|             None => filterable_fields, |  | ||||||
|         }; |  | ||||||
|  |  | ||||||
|         let mut distribution = BTreeMap::new(); |         let mut distribution = BTreeMap::new(); | ||||||
|         for (fid, name) in fields_ids_map.iter() { |         for (fid, name, metadata) in fields_ids_map.iter() { | ||||||
|             if crate::is_faceted(name, &fields) { |             if self.select_field(name, &metadata, &filterable_attributes_rules) { | ||||||
|                 let order_by = self |                 let order_by = self | ||||||
|                     .facets |                     .facets | ||||||
|                     .as_ref() |                     .as_ref() | ||||||
| @@ -385,6 +357,61 @@ impl<'a> FacetDistribution<'a> { | |||||||
|  |  | ||||||
|         Ok(distribution) |         Ok(distribution) | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     /// Select a field if it is faceted and in the facets. | ||||||
|  |     fn select_field( | ||||||
|  |         &self, | ||||||
|  |         name: &str, | ||||||
|  |         metadata: &Metadata, | ||||||
|  |         filterable_attributes_rules: &[FilterableAttributesRule], | ||||||
|  |     ) -> bool { | ||||||
|  |         match &self.facets { | ||||||
|  |             Some(facets) => { | ||||||
|  |                 // The list of facets provided by the user is a legacy pattern ("dog.age" must be selected with "dog"). | ||||||
|  |                 facets.keys().any(|key| match_field_legacy(key, name) == PatternMatch::Match) | ||||||
|  |             } | ||||||
|  |             None => metadata.is_faceted(filterable_attributes_rules), | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     /// Check if the fields in the facets are valid faceted fields. | ||||||
|  |     fn check_faceted_fields( | ||||||
|  |         &self, | ||||||
|  |         fields_ids_map: &FieldIdMapWithMetadata, | ||||||
|  |         filterable_attributes_rules: &[FilterableAttributesRule], | ||||||
|  |     ) -> Result<()> { | ||||||
|  |         let mut invalid_facets = BTreeSet::new(); | ||||||
|  |         if let Some(facets) = &self.facets { | ||||||
|  |             for (field, _) in facets { | ||||||
|  |                 let is_valid_faceted_field = | ||||||
|  |                     fields_ids_map.id_with_metadata(field).map_or(false, |(_, metadata)| { | ||||||
|  |                         metadata.is_faceted(filterable_attributes_rules) | ||||||
|  |                     }); | ||||||
|  |                 if !is_valid_faceted_field { | ||||||
|  |                     invalid_facets.insert(field.to_string()); | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         if !invalid_facets.is_empty() { | ||||||
|  |             let valid_facets_name = fields_ids_map | ||||||
|  |                 .iter() | ||||||
|  |                 .filter_map(|(_, name, metadata)| { | ||||||
|  |                     if metadata.is_faceted(filterable_attributes_rules) { | ||||||
|  |                         Some(name.to_string()) | ||||||
|  |                     } else { | ||||||
|  |                         None | ||||||
|  |                     } | ||||||
|  |                 }) | ||||||
|  |                 .collect(); | ||||||
|  |             return Err(Error::UserError(UserError::InvalidFacetsDistribution { | ||||||
|  |                 invalid_facets_name: invalid_facets, | ||||||
|  |                 valid_facets_name, | ||||||
|  |             })); | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         Ok(()) | ||||||
|  |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| impl fmt::Debug for FacetDistribution<'_> { | impl fmt::Debug for FacetDistribution<'_> { | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user