mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-30 23:46:28 +00:00 
			
		
		
		
	Stops returning an option in the internal searchable fields
This commit is contained in:
		
							
								
								
									
										28
									
								
								milli/src/fieldids_weights_map.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										28
									
								
								milli/src/fieldids_weights_map.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,28 @@ | ||||
| use std::collections::HashMap; | ||||
|  | ||||
| use serde::{Deserialize, Serialize}; | ||||
|  | ||||
| use crate::{FieldId, Weight}; | ||||
|  | ||||
| #[derive(Debug, Default, Serialize, Deserialize)] | ||||
| pub struct FieldidsWeightsMap { | ||||
|     map: HashMap<FieldId, Weight>, | ||||
| } | ||||
|  | ||||
| impl FieldidsWeightsMap { | ||||
|     pub fn insert(&mut self, fid: FieldId, weight: Weight) -> Option<Weight> { | ||||
|         self.map.insert(fid, weight) | ||||
|     } | ||||
|  | ||||
|     pub fn remove(&mut self, fid: FieldId) -> Option<Weight> { | ||||
|         self.map.remove(&fid) | ||||
|     } | ||||
|  | ||||
|     pub fn weight(&self, fid: FieldId) -> Option<Weight> { | ||||
|         self.map.get(&fid).copied() | ||||
|     } | ||||
|  | ||||
|     pub fn max_weight(&self) -> Option<Weight> { | ||||
|         self.map.values().copied().max() | ||||
|     } | ||||
| } | ||||
| @@ -1,5 +1,6 @@ | ||||
| use std::borrow::Cow; | ||||
| use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet}; | ||||
| use std::convert::TryInto; | ||||
| use std::fs::File; | ||||
| use std::path::Path; | ||||
|  | ||||
| @@ -25,8 +26,9 @@ use crate::proximity::ProximityPrecision; | ||||
| use crate::vector::EmbeddingConfig; | ||||
| use crate::{ | ||||
|     default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds, | ||||
|     FacetDistribution, FieldDistribution, FieldId, FieldIdWordCountCodec, GeoPoint, ObkvCodec, | ||||
|     Result, RoaringBitmapCodec, RoaringBitmapLenCodec, Search, U8StrStrCodec, BEU16, BEU32, BEU64, | ||||
|     FacetDistribution, FieldDistribution, FieldId, FieldIdWordCountCodec, FieldidsWeightsMap, | ||||
|     GeoPoint, ObkvCodec, Result, RoaringBitmapCodec, RoaringBitmapLenCodec, Search, U8StrStrCodec, | ||||
|     BEU16, BEU32, BEU64, | ||||
| }; | ||||
|  | ||||
| pub const DEFAULT_MIN_WORD_LEN_ONE_TYPO: u8 = 5; | ||||
| @@ -42,6 +44,7 @@ pub mod main_key { | ||||
|     pub const SORTABLE_FIELDS_KEY: &str = "sortable-fields"; | ||||
|     pub const FIELD_DISTRIBUTION_KEY: &str = "fields-distribution"; | ||||
|     pub const FIELDS_IDS_MAP_KEY: &str = "fields-ids-map"; | ||||
|     pub const FIELDIDS_WEIGHTS_MAP_KEY: &str = "fieldids-weights-map"; | ||||
|     pub const GEO_FACETED_DOCUMENTS_IDS_KEY: &str = "geo-faceted-documents-ids"; | ||||
|     pub const GEO_RTREE_KEY: &str = "geo-rtree"; | ||||
|     pub const PRIMARY_KEY_KEY: &str = "primary-key"; | ||||
| @@ -414,6 +417,32 @@ impl Index { | ||||
|             .unwrap_or_default()) | ||||
|     } | ||||
|  | ||||
|     /* fieldids weights map */ | ||||
|     // This maps the fields ids to their weights. | ||||
|     // Their weights is defined by the ordering of the searchable attributes. | ||||
|  | ||||
|     /// Writes the fieldids weights map which associates the field ids to their weights | ||||
|     pub(crate) fn put_fieldids_weights_map( | ||||
|         &self, | ||||
|         wtxn: &mut RwTxn, | ||||
|         map: &FieldidsWeightsMap, | ||||
|     ) -> heed::Result<()> { | ||||
|         self.main.remap_types::<Str, SerdeJson<_>>().put( | ||||
|             wtxn, | ||||
|             main_key::FIELDIDS_WEIGHTS_MAP_KEY, | ||||
|             map, | ||||
|         ) | ||||
|     } | ||||
|  | ||||
|     /// Get the fieldids weights map which associates the field ids to their weights | ||||
|     pub fn fieldids_weights_map(&self, rtxn: &RoTxn) -> heed::Result<FieldidsWeightsMap> { | ||||
|         Ok(self | ||||
|             .main | ||||
|             .remap_types::<Str, SerdeJson<_>>() | ||||
|             .get(rtxn, main_key::FIELDIDS_WEIGHTS_MAP_KEY)? | ||||
|             .unwrap_or_default()) | ||||
|     } | ||||
|  | ||||
|     /* geo rtree */ | ||||
|  | ||||
|     /// Writes the provided `rtree` which associates coordinates to documents ids. | ||||
| @@ -578,10 +607,12 @@ impl Index { | ||||
|         wtxn: &mut RwTxn, | ||||
|         user_fields: &[&str], | ||||
|         fields_ids_map: &FieldsIdsMap, | ||||
|     ) -> heed::Result<()> { | ||||
|     ) -> Result<()> { | ||||
|         // We can write the user defined searchable fields as-is. | ||||
|         self.put_user_defined_searchable_fields(wtxn, user_fields)?; | ||||
|  | ||||
|         let mut weights = self.fieldids_weights_map(&wtxn)?; | ||||
|  | ||||
|         // Now we generate the real searchable fields: | ||||
|         // 1. Take the user defined searchable fields as-is to keep the priority defined by the attributes criterion. | ||||
|         // 2. Iterate over the user defined searchable fields. | ||||
| @@ -589,17 +620,23 @@ impl Index { | ||||
|         // (ie doggo.name is a subset of doggo) then we push it at the end of the fields. | ||||
|         let mut real_fields = user_fields.to_vec(); | ||||
|  | ||||
|         for field_from_map in fields_ids_map.names() { | ||||
|             for user_field in user_fields { | ||||
|         for (id, field_from_map) in fields_ids_map.iter() { | ||||
|             for (weight, user_field) in user_fields.iter().enumerate() { | ||||
|                 if crate::is_faceted_by(field_from_map, user_field) | ||||
|                     && !user_fields.contains(&field_from_map) | ||||
|                 { | ||||
|                     real_fields.push(field_from_map); | ||||
|  | ||||
|                     let weight: u16 = | ||||
|                         weight.try_into().map_err(|_| UserError::AttributeLimitReached)?; | ||||
|                     weights.insert(id, weight as u16); | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         self.put_searchable_fields(wtxn, &real_fields) | ||||
|         self.put_searchable_fields(wtxn, &real_fields)?; | ||||
|         self.put_fieldids_weights_map(wtxn, &weights)?; | ||||
|         Ok(()) | ||||
|     } | ||||
|  | ||||
|     pub(crate) fn delete_all_searchable_fields(&self, wtxn: &mut RwTxn) -> heed::Result<bool> { | ||||
| @@ -623,28 +660,31 @@ impl Index { | ||||
|     } | ||||
|  | ||||
|     /// Returns the searchable fields, those are the fields that are indexed, | ||||
|     /// if the searchable fields aren't there it means that **all** the fields are indexed. | ||||
|     pub fn searchable_fields<'t>(&self, rtxn: &'t RoTxn) -> heed::Result<Option<Vec<&'t str>>> { | ||||
|     pub fn searchable_fields<'t>(&self, rtxn: &'t RoTxn) -> heed::Result<Vec<Cow<'t, str>>> { | ||||
|         self.main | ||||
|             .remap_types::<Str, SerdeBincode<Vec<&'t str>>>() | ||||
|             .get(rtxn, main_key::SEARCHABLE_FIELDS_KEY) | ||||
|             .get(rtxn, main_key::SEARCHABLE_FIELDS_KEY)? | ||||
|             .map(|fields| Ok(fields.into_iter().map(|field| Cow::Borrowed(field)).collect())) | ||||
|             .unwrap_or_else(|| { | ||||
|                 Ok(self | ||||
|                     .fields_ids_map(rtxn)? | ||||
|                     .names() | ||||
|                     .map(|field| Cow::Owned(field.to_string())) | ||||
|                     .collect()) | ||||
|             }) | ||||
|     } | ||||
|  | ||||
|     /// Identical to `searchable_fields`, but returns the ids instead. | ||||
|     pub fn searchable_fields_ids(&self, rtxn: &RoTxn) -> Result<Option<Vec<FieldId>>> { | ||||
|         match self.searchable_fields(rtxn)? { | ||||
|             Some(fields) => { | ||||
|     pub fn searchable_fields_ids(&self, rtxn: &RoTxn) -> Result<Vec<FieldId>> { | ||||
|         let fields = self.searchable_fields(rtxn)?; | ||||
|         let fields_ids_map = self.fields_ids_map(rtxn)?; | ||||
|         let mut fields_ids = Vec::new(); | ||||
|         for name in fields { | ||||
|                     if let Some(field_id) = fields_ids_map.id(name) { | ||||
|             if let Some(field_id) = fields_ids_map.id(&name) { | ||||
|                 fields_ids.push(field_id); | ||||
|             } | ||||
|         } | ||||
|                 Ok(Some(fields_ids)) | ||||
|             } | ||||
|             None => Ok(None), | ||||
|         } | ||||
|         Ok(fields_ids) | ||||
|     } | ||||
|  | ||||
|     /// Writes the searchable fields, when this list is specified, only these are indexed. | ||||
| @@ -1710,7 +1750,11 @@ pub(crate) mod tests { | ||||
|             ])) | ||||
|             .unwrap(); | ||||
|  | ||||
|         db_snap!(index, field_distribution, 1); | ||||
|         db_snap!(index, field_distribution, @r###" | ||||
|         age              1      | | ||||
|         id               2      | | ||||
|         name             2      | | ||||
|         "###); | ||||
|  | ||||
|         db_snap!(index, word_docids, | ||||
|         @r###" | ||||
| @@ -1722,18 +1766,6 @@ pub(crate) mod tests { | ||||
|         "### | ||||
|         ); | ||||
|  | ||||
|         db_snap!(index, field_distribution); | ||||
|  | ||||
|         db_snap!(index, field_distribution, | ||||
|             @r###" | ||||
|         age              1      | | ||||
|         id               2      | | ||||
|         name             2      | | ||||
|         "### | ||||
|         ); | ||||
|  | ||||
|         // snapshot_index!(&index, "1", include: "^field_distribution$"); | ||||
|  | ||||
|         // we add all the documents a second time. we are supposed to get the same | ||||
|         // field_distribution in the end | ||||
|         index | ||||
| @@ -1820,7 +1852,7 @@ pub(crate) mod tests { | ||||
|         // ensure we get the right real searchable fields + user defined searchable fields | ||||
|         let rtxn = index.read_txn().unwrap(); | ||||
|  | ||||
|         let real = index.searchable_fields(&rtxn).unwrap().unwrap(); | ||||
|         let real = index.searchable_fields(&rtxn).unwrap(); | ||||
|         assert_eq!(real, &["doggo", "name", "doggo.name", "doggo.age"]); | ||||
|  | ||||
|         let user_defined = index.user_defined_searchable_fields(&rtxn).unwrap().unwrap(); | ||||
| @@ -1840,7 +1872,7 @@ pub(crate) mod tests { | ||||
|         // ensure we get the right real searchable fields + user defined searchable fields | ||||
|         let rtxn = index.read_txn().unwrap(); | ||||
|  | ||||
|         let real = index.searchable_fields(&rtxn).unwrap().unwrap(); | ||||
|         let real = index.searchable_fields(&rtxn).unwrap(); | ||||
|         assert_eq!(real, &["doggo", "name"]); | ||||
|         let user_defined = index.user_defined_searchable_fields(&rtxn).unwrap().unwrap(); | ||||
|         assert_eq!(user_defined, &["doggo", "name"]); | ||||
| @@ -1856,7 +1888,7 @@ pub(crate) mod tests { | ||||
|         // ensure we get the right real searchable fields + user defined searchable fields | ||||
|         let rtxn = index.read_txn().unwrap(); | ||||
|  | ||||
|         let real = index.searchable_fields(&rtxn).unwrap().unwrap(); | ||||
|         let real = index.searchable_fields(&rtxn).unwrap(); | ||||
|         assert_eq!(real, &["doggo", "name", "doggo.name", "doggo.age"]); | ||||
|  | ||||
|         let user_defined = index.user_defined_searchable_fields(&rtxn).unwrap().unwrap(); | ||||
|   | ||||
| @@ -28,6 +28,7 @@ pub mod vector; | ||||
| #[cfg(test)] | ||||
| #[macro_use] | ||||
| pub mod snapshot_tests; | ||||
| mod fieldids_weights_map; | ||||
|  | ||||
| use std::collections::{BTreeMap, HashMap}; | ||||
| use std::convert::{TryFrom, TryInto}; | ||||
| @@ -52,6 +53,7 @@ pub use self::error::{ | ||||
|     Error, FieldIdMapMissingEntry, InternalError, SerializationError, UserError, | ||||
| }; | ||||
| pub use self::external_documents_ids::ExternalDocumentsIds; | ||||
| pub use self::fieldids_weights_map::FieldidsWeightsMap; | ||||
| pub use self::fields_ids_map::FieldsIdsMap; | ||||
| pub use self::heed_codec::{ | ||||
|     BEU16StrCodec, BEU32StrCodec, BoRoaringBitmapCodec, BoRoaringBitmapLenCodec, | ||||
| @@ -77,6 +79,7 @@ pub type FastMap4<K, V> = HashMap<K, V, BuildHasherDefault<FxHasher32>>; | ||||
| pub type FastMap8<K, V> = HashMap<K, V, BuildHasherDefault<FxHasher64>>; | ||||
| pub type FieldDistribution = BTreeMap<String, u64>; | ||||
| pub type FieldId = u16; | ||||
| pub type Weight = u16; | ||||
| pub type Object = serde_json::Map<String, serde_json::Value>; | ||||
| pub type Position = u32; | ||||
| pub type RelativePosition = u16; | ||||
|   | ||||
| @@ -315,11 +315,7 @@ impl<'ctx> SearchContext<'ctx> { | ||||
|                         .map_err(heed::Error::Decoding)? | ||||
|                 } else { | ||||
|                     // Compute the distance at the attribute level and store it in the cache. | ||||
|                     let fids = if let Some(fids) = self.index.searchable_fields_ids(self.txn)? { | ||||
|                         fids | ||||
|                     } else { | ||||
|                         self.index.fields_ids_map(self.txn)?.ids().collect() | ||||
|                     }; | ||||
|                     let fids = self.index.searchable_fields_ids(self.txn)?; | ||||
|                     let mut docids = RoaringBitmap::new(); | ||||
|                     for fid in fids { | ||||
|                         // for each field, intersect left word bitmap and right word bitmap, | ||||
| @@ -408,11 +404,7 @@ impl<'ctx> SearchContext<'ctx> { | ||||
|             let prefix_docids = match proximity_precision { | ||||
|                 ProximityPrecision::ByAttribute => { | ||||
|                     // Compute the distance at the attribute level and store it in the cache. | ||||
|                     let fids = if let Some(fids) = self.index.searchable_fields_ids(self.txn)? { | ||||
|                         fids | ||||
|                     } else { | ||||
|                         self.index.fields_ids_map(self.txn)?.ids().collect() | ||||
|                     }; | ||||
|                     let fids = self.index.searchable_fields_ids(self.txn)?; | ||||
|                     let mut prefix_docids = RoaringBitmap::new(); | ||||
|                     // for each field, intersect left word bitmap and right word bitmap, | ||||
|                     // then merge the result in a global bitmap before storing it in the cache. | ||||
|   | ||||
| @@ -184,13 +184,7 @@ impl State { | ||||
|             return Ok(State::Empty(query_graph.clone())); | ||||
|         } | ||||
|  | ||||
|         let searchable_fields_ids = { | ||||
|             if let Some(fids) = ctx.index.searchable_fields_ids(ctx.txn)? { | ||||
|                 fids | ||||
|             } else { | ||||
|                 ctx.index.fields_ids_map(ctx.txn)?.ids().collect() | ||||
|             } | ||||
|         }; | ||||
|         let searchable_fields_ids = ctx.index.searchable_fields_ids(ctx.txn)?; | ||||
|  | ||||
|         let mut candidates_per_attribute = Vec::with_capacity(searchable_fields_ids.len()); | ||||
|         // then check that there exists at least one attribute that has all of the terms | ||||
|   | ||||
| @@ -96,27 +96,22 @@ impl<'ctx> SearchContext<'ctx> { | ||||
|                 contains_wildcard = true; | ||||
|                 continue; | ||||
|             } | ||||
|             let searchable_contains_name = | ||||
|                 searchable_names.as_ref().map(|sn| sn.iter().any(|name| name == field_name)); | ||||
|             let searchable_contains_name = searchable_names.iter().any(|name| name == field_name); | ||||
|             let fid = match (fids_map.id(field_name), searchable_contains_name) { | ||||
|                 // The Field id exist and the field is searchable | ||||
|                 (Some(fid), Some(true)) | (Some(fid), None) => fid, | ||||
|                 (Some(fid), true) => fid, | ||||
|                 // The field is searchable but the Field id doesn't exist => Internal Error | ||||
|                 (None, Some(true)) => { | ||||
|                 (None, true) => { | ||||
|                     return Err(FieldIdMapMissingEntry::FieldName { | ||||
|                         field_name: field_name.to_string(), | ||||
|                         process: "search", | ||||
|                     } | ||||
|                     .into()) | ||||
|                 } | ||||
|                 // The field is not searchable, but the searchableAttributes are set to * => ignore field | ||||
|                 (None, None) => continue, | ||||
|                 // The field is not searchable => User error | ||||
|                 (_fid, Some(false)) => { | ||||
|                     let (valid_fields, hidden_fields) = match searchable_names { | ||||
|                         Some(sn) => self.index.remove_hidden_fields(self.txn, sn)?, | ||||
|                         None => self.index.remove_hidden_fields(self.txn, fids_map.names())?, | ||||
|                     }; | ||||
|                 (_fid, false) => { | ||||
|                     let (valid_fields, hidden_fields) = | ||||
|                         self.index.remove_hidden_fields(self.txn, searchable_names)?; | ||||
|  | ||||
|                     let field = field_name.to_string(); | ||||
|                     return Err(UserError::InvalidSearchableAttribute { | ||||
|   | ||||
| @@ -77,17 +77,7 @@ impl RankingRuleGraphTrait for FidGraph { | ||||
|         } | ||||
|  | ||||
|         // always lookup the max_fid if we don't already and add an artificial condition for max scoring | ||||
|         let max_fid: Option<u16> = { | ||||
|             if let Some(max_fid) = ctx | ||||
|                 .index | ||||
|                 .searchable_fields_ids(ctx.txn)? | ||||
|                 .map(|field_ids| field_ids.into_iter().max()) | ||||
|             { | ||||
|                 max_fid | ||||
|             } else { | ||||
|                 ctx.index.fields_ids_map(ctx.txn)?.ids().max() | ||||
|             } | ||||
|         }; | ||||
|         let max_fid: Option<u16> = ctx.index.searchable_fields_ids(ctx.txn)?.into_iter().max(); | ||||
|  | ||||
|         if let Some(max_fid) = max_fid { | ||||
|             if !all_fields.contains(&max_fid) { | ||||
|   | ||||
| @@ -186,7 +186,7 @@ fn searchable_fields_changed( | ||||
| ) -> bool { | ||||
|     let searchable_fields = &settings_diff.new.searchable_fields_ids; | ||||
|     for (field_id, field_bytes) in obkv.iter() { | ||||
|         if searchable_fields.as_ref().map_or(true, |sf| sf.contains(&field_id)) { | ||||
|         if searchable_fields.contains(&field_id) { | ||||
|             let del_add = KvReaderDelAdd::new(field_bytes); | ||||
|             match (del_add.get(DelAdd::Deletion), del_add.get(DelAdd::Addition)) { | ||||
|                 // if both fields are None, check the next field. | ||||
| @@ -298,7 +298,7 @@ fn lang_safe_tokens_from_document<'a>( | ||||
| /// Extract words mapped with their positions of a document. | ||||
| fn tokens_from_document<'a>( | ||||
|     obkv: &KvReader<FieldId>, | ||||
|     searchable_fields: &Option<Vec<FieldId>>, | ||||
|     searchable_fields: &[FieldId], | ||||
|     tokenizer: &Tokenizer, | ||||
|     max_positions_per_attributes: u32, | ||||
|     del_add: DelAdd, | ||||
| @@ -309,7 +309,7 @@ fn tokens_from_document<'a>( | ||||
|     let mut document_writer = KvWriterU16::new(&mut buffers.obkv_buffer); | ||||
|     for (field_id, field_bytes) in obkv.iter() { | ||||
|         // if field is searchable. | ||||
|         if searchable_fields.as_ref().map_or(true, |sf| sf.contains(&field_id)) { | ||||
|         if searchable_fields.as_ref().contains(&field_id) { | ||||
|             // extract deletion or addition only. | ||||
|             if let Some(field_bytes) = KvReaderDelAdd::new(field_bytes).get(del_add) { | ||||
|                 // parse json. | ||||
|   | ||||
| @@ -468,14 +468,9 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { | ||||
|             Setting::Set(ref fields) => { | ||||
|                 // Check to see if the searchable fields changed before doing anything else | ||||
|                 let old_fields = self.index.searchable_fields(self.wtxn)?; | ||||
|                 let did_change = match old_fields { | ||||
|                     // If old_fields is Some, let's check to see if the fields actually changed | ||||
|                     Some(old_fields) => { | ||||
|                 let did_change = { | ||||
|                     let new_fields = fields.iter().map(String::as_str).collect::<Vec<_>>(); | ||||
|                     new_fields != old_fields | ||||
|                     } | ||||
|                     // If old_fields is None, the fields have changed (because they are being set) | ||||
|                     None => true, | ||||
|                 }; | ||||
|                 if !did_change { | ||||
|                     return Ok(false); | ||||
| @@ -1172,7 +1167,7 @@ pub(crate) struct InnerIndexSettings { | ||||
|     pub user_defined_faceted_fields: HashSet<String>, | ||||
|     pub user_defined_searchable_fields: Option<Vec<String>>, | ||||
|     pub faceted_fields_ids: HashSet<FieldId>, | ||||
|     pub searchable_fields_ids: Option<Vec<FieldId>>, | ||||
|     pub searchable_fields_ids: Vec<FieldId>, | ||||
|     pub exact_attributes: HashSet<FieldId>, | ||||
|     pub proximity_precision: ProximityPrecision, | ||||
|     pub embedding_configs: EmbeddingConfigs, | ||||
| @@ -1517,6 +1512,7 @@ mod tests { | ||||
|     use big_s::S; | ||||
|     use heed::types::Bytes; | ||||
|     use maplit::{btreemap, btreeset, hashset}; | ||||
|     use meili_snap::snapshot; | ||||
|  | ||||
|     use super::*; | ||||
|     use crate::error::Error; | ||||
| @@ -1576,7 +1572,7 @@ mod tests { | ||||
|         // Check that the searchable field have been reset and documents are found now. | ||||
|         let rtxn = index.read_txn().unwrap(); | ||||
|         let searchable_fields = index.searchable_fields(&rtxn).unwrap(); | ||||
|         assert_eq!(searchable_fields, None); | ||||
|         snapshot!(format!("{searchable_fields:?}"), @r###"["name", "id", "age"]"###); | ||||
|         let result = index.search(&rtxn).query("23").execute().unwrap(); | ||||
|         assert_eq!(result.documents_ids.len(), 1); | ||||
|         let documents = index.documents(&rtxn, result.documents_ids).unwrap(); | ||||
|   | ||||
		Reference in New Issue
	
	Block a user