mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-26 05:26:27 +00:00 
			
		
		
		
	Introduce a struct to compute facets values
This commit is contained in:
		| @@ -9,7 +9,7 @@ use roaring::RoaringBitmap; | ||||
|  | ||||
| use crate::facet::FacetType; | ||||
| use crate::fields_ids_map::FieldsIdsMap; | ||||
| use crate::{default_criteria, Criterion, Search}; | ||||
| use crate::{default_criteria, Criterion, Search, FacetDistribution}; | ||||
| use crate::{BEU32, DocumentId, FieldId, ExternalDocumentsIds}; | ||||
| use crate::{ | ||||
|     RoaringBitmapCodec, BEU32StrCodec, StrStrU8Codec, ObkvCodec, | ||||
| @@ -351,6 +351,10 @@ impl Index { | ||||
|         Ok(self.documents_ids(rtxn).map(|docids| docids.len() as usize)?) | ||||
|     } | ||||
|  | ||||
|     pub fn facets<'a>(&'a self, rtxn: &'a RoTxn) -> FacetDistribution<'a> { | ||||
|         FacetDistribution::new(rtxn, self) | ||||
|     } | ||||
|  | ||||
|     pub fn search<'a>(&'a self, rtxn: &'a RoTxn) -> Search<'a> { | ||||
|         Search::new(rtxn, self) | ||||
|     } | ||||
|   | ||||
| @@ -28,7 +28,7 @@ pub use self::fields_ids_map::FieldsIdsMap; | ||||
| pub use self::heed_codec::{BEU32StrCodec, StrStrU8Codec, ObkvCodec}; | ||||
| pub use self::heed_codec::{RoaringBitmapCodec, BoRoaringBitmapCodec, CboRoaringBitmapCodec}; | ||||
| pub use self::index::Index; | ||||
| pub use self::search::{Search, FacetCondition, SearchResult}; | ||||
| pub use self::search::{Search, FacetDistribution, FacetCondition, SearchResult}; | ||||
| pub use self::update_store::UpdateStore; | ||||
|  | ||||
| pub type FastMap4<K, V> = HashMap<K, V, BuildHasherDefault<FxHasher32>>; | ||||
|   | ||||
							
								
								
									
										106
									
								
								src/search/facet/facet_distribution.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										106
									
								
								src/search/facet/facet_distribution.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,106 @@ | ||||
| use std::collections::{HashSet, HashMap}; | ||||
| use std::fmt; | ||||
| use std::ops::Bound::Unbounded; | ||||
|  | ||||
| use roaring::RoaringBitmap; | ||||
| use serde_json::Value; | ||||
|  | ||||
| use crate::facet::FacetType; | ||||
| use crate::heed_codec::facet::{FacetValueStringCodec, FacetLevelValueF64Codec, FacetLevelValueI64Codec}; | ||||
| use crate::search::facet::FacetRange; | ||||
| use crate::{Index, FieldId}; | ||||
|  | ||||
| pub struct FacetDistribution<'a> { | ||||
|     facets: Option<HashSet<String>>, | ||||
|     candidates: Option<RoaringBitmap>, | ||||
|     rtxn: &'a heed::RoTxn<'a>, | ||||
|     index: &'a Index, | ||||
| } | ||||
|  | ||||
| impl<'a> FacetDistribution<'a> { | ||||
|     pub fn new(rtxn: &'a heed::RoTxn, index: &'a Index) -> FacetDistribution<'a> { | ||||
|         FacetDistribution { facets: None, candidates: None, rtxn, index } | ||||
|     } | ||||
|  | ||||
|     pub fn candidates(&mut self, candidates: RoaringBitmap) -> &mut Self { | ||||
|         self.candidates = Some(candidates); | ||||
|         self | ||||
|     } | ||||
|  | ||||
|     pub fn facets<I: IntoIterator<Item=A>, A: AsRef<str>>(&mut self, names: I) -> &mut Self { | ||||
|         self.facets = Some(names.into_iter().map(|s| s.as_ref().to_string()).collect()); | ||||
|         self | ||||
|     } | ||||
|  | ||||
|     fn facet_values(&self, field_id: FieldId, field_type: FacetType) -> heed::Result<Vec<Value>> { | ||||
|         let db = self.index.facet_field_id_value_docids; | ||||
|         let iter = match field_type { | ||||
|             FacetType::String => { | ||||
|                 let iter = db | ||||
|                     .prefix_iter(&self.rtxn, &[field_id])? | ||||
|                     .remap_key_type::<FacetValueStringCodec>() | ||||
|                     .map(|r| r.map(|((_, v), docids)| (Value::from(v), docids))); | ||||
|                 Box::new(iter) as Box::<dyn Iterator<Item=_>> | ||||
|             }, | ||||
|             FacetType::Integer => { | ||||
|                 let db = db.remap_key_type::<FacetLevelValueI64Codec>(); | ||||
|                 let range = FacetRange::<i64, _>::new( | ||||
|                     self.rtxn, db, field_id, 0, Unbounded, Unbounded, | ||||
|                 )?; | ||||
|                 Box::new(range.map(|r| r.map(|((_, _, v, _), docids)| (Value::from(v), docids)))) | ||||
|             }, | ||||
|             FacetType::Float => { | ||||
|                 let db = db.remap_key_type::<FacetLevelValueF64Codec>(); | ||||
|                 let range = FacetRange::<f64, _>::new( | ||||
|                     self.rtxn, db, field_id, 0, Unbounded, Unbounded, | ||||
|                 )?; | ||||
|                 Box::new(range.map(|r| r.map(|((_, _, v, _), docids)| (Value::from(v), docids)))) | ||||
|             }, | ||||
|         }; | ||||
|  | ||||
|         let mut facet_values = Vec::new(); | ||||
|         for result in iter { | ||||
|             let (value, docids) = result?; | ||||
|             match &self.candidates { | ||||
|                 Some(candidates) => if !docids.is_disjoint(candidates) { | ||||
|                     facet_values.push(value); | ||||
|                 }, | ||||
|                 None => facet_values.push(value), | ||||
|             } | ||||
|         } | ||||
|         Ok(facet_values) | ||||
|     } | ||||
|  | ||||
|     pub fn execute(&self) -> heed::Result<HashMap<String, Vec<Value>>> { | ||||
|         let fields_ids_map = self.index.fields_ids_map(self.rtxn)?; | ||||
|         let faceted_fields = self.index.faceted_fields(self.rtxn)?; | ||||
|         let fields_ids: Vec<_> = match &self.facets { | ||||
|             Some(names) => { | ||||
|                 names.iter().filter_map(|n| { | ||||
|                     let id = fields_ids_map.id(n)?; | ||||
|                     faceted_fields.get(&id).cloned().map(|t| (id, t)) | ||||
|                 }).collect() | ||||
|             }, | ||||
|             None => faceted_fields.iter().map(|(id, t)| (*id, *t)).collect(), | ||||
|         }; | ||||
|  | ||||
|         let mut facets_values = HashMap::new(); | ||||
|         for (fid, ftype) in fields_ids { | ||||
|             let facet_name = fields_ids_map.name(fid).unwrap(); | ||||
|             let values = self.facet_values(fid, ftype)?; | ||||
|             facets_values.insert(facet_name.to_string(), values); | ||||
|         } | ||||
|  | ||||
|         Ok(facets_values) | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl fmt::Debug for FacetDistribution<'_> { | ||||
|     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | ||||
|         let FacetDistribution { facets, candidates, rtxn: _, index: _ } = self; | ||||
|         f.debug_struct("FacetDistribution") | ||||
|             .field("facets", facets) | ||||
|             .field("candidates", candidates) | ||||
|             .finish() | ||||
|     } | ||||
| } | ||||
| @@ -13,11 +13,13 @@ use crate::heed_codec::CboRoaringBitmapCodec; | ||||
| use crate::{Index, FieldId}; | ||||
|  | ||||
| pub use self::facet_condition::{FacetCondition, FacetNumberOperator, FacetStringOperator}; | ||||
| pub use self::facet_distribution::FacetDistribution; | ||||
|  | ||||
| mod facet_condition; | ||||
| mod facet_distribution; | ||||
| mod parser; | ||||
|  | ||||
| struct FacetRange<'t, T: 't, KC> { | ||||
| pub struct FacetRange<'t, T: 't, KC> { | ||||
|     iter: RoRange<'t, KC, LazyDecode<CboRoaringBitmapCodec>>, | ||||
|     end: Bound<T>, | ||||
| } | ||||
| @@ -27,7 +29,7 @@ where | ||||
|     KC: for<'a> BytesEncode<'a, EItem = (FieldId, u8, T, T)>, | ||||
|     T: PartialOrd + Copy + Bounded, | ||||
| { | ||||
|     fn new( | ||||
|     pub fn new( | ||||
|         rtxn: &'t heed::RoTxn, | ||||
|         db: Database<KC, CboRoaringBitmapCodec>, | ||||
|         field_id: FieldId, | ||||
| @@ -78,7 +80,7 @@ where | ||||
|     } | ||||
| } | ||||
|  | ||||
| struct FacetRevRange<'t, T: 't, KC> { | ||||
| pub struct FacetRevRange<'t, T: 't, KC> { | ||||
|     iter: RoRevRange<'t, KC, LazyDecode<CboRoaringBitmapCodec>>, | ||||
|     end: Bound<T>, | ||||
| } | ||||
| @@ -88,7 +90,7 @@ where | ||||
|     KC: for<'a> BytesEncode<'a, EItem = (FieldId, u8, T, T)>, | ||||
|     T: PartialOrd + Copy + Bounded, | ||||
| { | ||||
|     fn new( | ||||
|     pub fn new( | ||||
|         rtxn: &'t heed::RoTxn, | ||||
|         db: Database<KC, CboRoaringBitmapCodec>, | ||||
|         field_id: FieldId, | ||||
|   | ||||
| @@ -20,7 +20,7 @@ use crate::mdfs::Mdfs; | ||||
| use crate::query_tokens::{query_tokens, QueryToken}; | ||||
| use crate::{Index, FieldId, DocumentId, Criterion}; | ||||
|  | ||||
| pub use self::facet::{FacetCondition, FacetNumberOperator, FacetStringOperator}; | ||||
| pub use self::facet::{FacetCondition, FacetDistribution, FacetNumberOperator, FacetStringOperator}; | ||||
| pub use self::facet::{FacetIter}; | ||||
|  | ||||
| // Building these factories is not free. | ||||
| @@ -313,22 +313,26 @@ impl<'a> Search<'a> { | ||||
|                 // there is some facet conditions we return a placeholder. | ||||
|                 let documents_ids = match order_by_facet { | ||||
|                     Some((fid, ftype, is_ascending)) => { | ||||
|                         self.facet_ordered(fid, ftype, is_ascending, facet_candidates, limit)? | ||||
|                         self.facet_ordered(fid, ftype, is_ascending, facet_candidates.clone(), limit)? | ||||
|                     }, | ||||
|                     None => facet_candidates.iter().take(limit).collect(), | ||||
|                 }; | ||||
|                 return Ok(SearchResult { documents_ids, ..Default::default() }) | ||||
|                 return Ok(SearchResult { | ||||
|                     documents_ids, | ||||
|                     candidates: facet_candidates, | ||||
|                     ..Default::default() | ||||
|                 }) | ||||
|             }, | ||||
|             (None, None) => { | ||||
|                 // If the query is not set or results in no DFAs we return a placeholder. | ||||
|                 let documents_ids = self.index.documents_ids(self.rtxn)?; | ||||
|                 let all_docids = self.index.documents_ids(self.rtxn)?; | ||||
|                 let documents_ids = match order_by_facet { | ||||
|                     Some((fid, ftype, is_ascending)) => { | ||||
|                         self.facet_ordered(fid, ftype, is_ascending, documents_ids, limit)? | ||||
|                         self.facet_ordered(fid, ftype, is_ascending, all_docids.clone(), limit)? | ||||
|                     }, | ||||
|                     None => documents_ids.iter().take(limit).collect(), | ||||
|                     None => all_docids.iter().take(limit).collect(), | ||||
|                 }; | ||||
|                 return Ok(SearchResult { documents_ids, ..Default::default() }) | ||||
|                 return Ok(SearchResult { documents_ids, candidates: all_docids,..Default::default() }) | ||||
|             }, | ||||
|         }; | ||||
|  | ||||
| @@ -336,7 +340,7 @@ impl<'a> Search<'a> { | ||||
|  | ||||
|         // The mana depth first search is a revised DFS that explore | ||||
|         // solutions in the order of their proximities. | ||||
|         let mut mdfs = Mdfs::new(self.index, self.rtxn, &derived_words, candidates); | ||||
|         let mut mdfs = Mdfs::new(self.index, self.rtxn, &derived_words, candidates.clone()); | ||||
|         let mut documents = Vec::new(); | ||||
|  | ||||
|         // We execute the Mdfs iterator until we find enough documents. | ||||
| @@ -364,7 +368,7 @@ impl<'a> Search<'a> { | ||||
|             None => documents.into_iter().flatten().take(limit).collect(), | ||||
|         }; | ||||
|  | ||||
|         Ok(SearchResult { found_words, documents_ids }) | ||||
|         Ok(SearchResult { found_words, candidates, documents_ids }) | ||||
|     } | ||||
| } | ||||
|  | ||||
| @@ -383,6 +387,7 @@ impl fmt::Debug for Search<'_> { | ||||
| #[derive(Default)] | ||||
| pub struct SearchResult { | ||||
|     pub found_words: HashSet<String>, | ||||
|     pub candidates: RoaringBitmap, | ||||
|     // TODO those documents ids should be associated with their criteria scores. | ||||
|     pub documents_ids: Vec<DocumentId>, | ||||
| } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user