mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-26 13:36:27 +00:00 
			
		
		
		
	Clean and make the facet order configurable internally
This commit is contained in:
		
				
					committed by
					
						 Clément Renault
						Clément Renault
					
				
			
			
				
	
			
			
			
						parent
						
							f42bef2f66
						
					
				
				
					commit
					80bbd4b6f3
				
			| @@ -9,12 +9,14 @@ use roaring::RoaringBitmap; | |||||||
| use crate::error::UserError; | use crate::error::UserError; | ||||||
| use crate::facet::FacetType; | use crate::facet::FacetType; | ||||||
| use crate::heed_codec::facet::{ | use crate::heed_codec::facet::{ | ||||||
|     FacetGroupKeyCodec, FacetGroupValueCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec, |     FacetGroupKeyCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec, OrderedF64Codec, | ||||||
|     OrderedF64Codec, |  | ||||||
| }; | }; | ||||||
| use crate::heed_codec::{ByteSliceRefCodec, StrRefCodec}; | use crate::heed_codec::{ByteSliceRefCodec, StrRefCodec}; | ||||||
| use crate::search::facet::facet_distribution_iter; | use crate::search::facet::facet_distribution_iter; | ||||||
| use crate::{FieldId, Index, Result}; | use crate::{FieldId, Index, Result}; | ||||||
|  | use facet_distribution_iter::{ | ||||||
|  |     count_iterate_over_facet_distribution, lexicographically_iterate_over_facet_distribution, | ||||||
|  | }; | ||||||
|  |  | ||||||
| /// The default number of values by facets that will | /// The default number of values by facets that will | ||||||
| /// be fetched from the key-value store. | /// be fetched from the key-value store. | ||||||
| @@ -24,10 +26,20 @@ pub const DEFAULT_VALUES_PER_FACET: usize = 100; | |||||||
| /// the system to choose between one algorithm or another. | /// the system to choose between one algorithm or another. | ||||||
| const CANDIDATES_THRESHOLD: u64 = 3000; | const CANDIDATES_THRESHOLD: u64 = 3000; | ||||||
|  |  | ||||||
|  | /// How should we fetch the facets? | ||||||
|  | #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] | ||||||
|  | pub enum OrderBy { | ||||||
|  |     /// By lexicographic order... | ||||||
|  |     Lexicographic, | ||||||
|  |     /// Or by number of docids in common? | ||||||
|  |     Count, | ||||||
|  | } | ||||||
|  |  | ||||||
| pub struct FacetDistribution<'a> { | pub struct FacetDistribution<'a> { | ||||||
|     facets: Option<HashSet<String>>, |     facets: Option<HashSet<String>>, | ||||||
|     candidates: Option<RoaringBitmap>, |     candidates: Option<RoaringBitmap>, | ||||||
|     max_values_per_facet: usize, |     max_values_per_facet: usize, | ||||||
|  |     order_by: OrderBy, | ||||||
|     rtxn: &'a heed::RoTxn<'a>, |     rtxn: &'a heed::RoTxn<'a>, | ||||||
|     index: &'a Index, |     index: &'a Index, | ||||||
| } | } | ||||||
| @@ -38,6 +50,7 @@ impl<'a> FacetDistribution<'a> { | |||||||
|             facets: None, |             facets: None, | ||||||
|             candidates: None, |             candidates: None, | ||||||
|             max_values_per_facet: DEFAULT_VALUES_PER_FACET, |             max_values_per_facet: DEFAULT_VALUES_PER_FACET, | ||||||
|  |             order_by: OrderBy::Count, | ||||||
|             rtxn, |             rtxn, | ||||||
|             index, |             index, | ||||||
|         } |         } | ||||||
| @@ -53,6 +66,11 @@ impl<'a> FacetDistribution<'a> { | |||||||
|         self |         self | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     pub fn order_by(&mut self, order_by: OrderBy) -> &mut Self { | ||||||
|  |         self.order_by = order_by; | ||||||
|  |         self | ||||||
|  |     } | ||||||
|  |  | ||||||
|     pub fn candidates(&mut self, candidates: RoaringBitmap) -> &mut Self { |     pub fn candidates(&mut self, candidates: RoaringBitmap) -> &mut Self { | ||||||
|         self.candidates = Some(candidates); |         self.candidates = Some(candidates); | ||||||
|         self |         self | ||||||
| @@ -134,9 +152,15 @@ impl<'a> FacetDistribution<'a> { | |||||||
|         &self, |         &self, | ||||||
|         field_id: FieldId, |         field_id: FieldId, | ||||||
|         candidates: &RoaringBitmap, |         candidates: &RoaringBitmap, | ||||||
|  |         order_by: OrderBy, | ||||||
|         distribution: &mut BTreeMap<String, u64>, |         distribution: &mut BTreeMap<String, u64>, | ||||||
|     ) -> heed::Result<()> { |     ) -> heed::Result<()> { | ||||||
|         facet_distribution_iter::lexicographically_iterate_over_facet_distribution( |         let search_function = match order_by { | ||||||
|  |             OrderBy::Lexicographic => lexicographically_iterate_over_facet_distribution, | ||||||
|  |             OrderBy::Count => count_iterate_over_facet_distribution, | ||||||
|  |         }; | ||||||
|  |  | ||||||
|  |         search_function( | ||||||
|             self.rtxn, |             self.rtxn, | ||||||
|             self.index |             self.index | ||||||
|                 .facet_id_f64_docids |                 .facet_id_f64_docids | ||||||
| @@ -159,9 +183,15 @@ impl<'a> FacetDistribution<'a> { | |||||||
|         &self, |         &self, | ||||||
|         field_id: FieldId, |         field_id: FieldId, | ||||||
|         candidates: &RoaringBitmap, |         candidates: &RoaringBitmap, | ||||||
|  |         order_by: OrderBy, | ||||||
|         distribution: &mut BTreeMap<String, u64>, |         distribution: &mut BTreeMap<String, u64>, | ||||||
|     ) -> heed::Result<()> { |     ) -> heed::Result<()> { | ||||||
|         facet_distribution_iter::lexicographically_iterate_over_facet_distribution( |         let search_function = match order_by { | ||||||
|  |             OrderBy::Lexicographic => lexicographically_iterate_over_facet_distribution, | ||||||
|  |             OrderBy::Count => count_iterate_over_facet_distribution, | ||||||
|  |         }; | ||||||
|  |  | ||||||
|  |         search_function( | ||||||
|             self.rtxn, |             self.rtxn, | ||||||
|             self.index |             self.index | ||||||
|                 .facet_id_string_docids |                 .facet_id_string_docids | ||||||
| @@ -189,98 +219,42 @@ impl<'a> FacetDistribution<'a> { | |||||||
|         ) |         ) | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     /// Placeholder search, a.k.a. no candidates were specified. We iterate throught the |  | ||||||
|     /// facet values one by one and iterate on the facet level 0 for numbers. |  | ||||||
|     fn facet_values_from_raw_facet_database( |  | ||||||
|         &self, |  | ||||||
|         field_id: FieldId, |  | ||||||
|     ) -> heed::Result<BTreeMap<String, u64>> { |  | ||||||
|         let mut distribution = BTreeMap::new(); |  | ||||||
|  |  | ||||||
|         let db = self.index.facet_id_f64_docids; |  | ||||||
|         let mut prefix = vec![]; |  | ||||||
|         prefix.extend_from_slice(&field_id.to_be_bytes()); |  | ||||||
|         prefix.push(0); // read values from level 0 only |  | ||||||
|  |  | ||||||
|         let iter = db |  | ||||||
|             .as_polymorph() |  | ||||||
|             .prefix_iter::<_, ByteSlice, ByteSlice>(self.rtxn, prefix.as_slice())? |  | ||||||
|             .remap_types::<FacetGroupKeyCodec<OrderedF64Codec>, FacetGroupValueCodec>(); |  | ||||||
|  |  | ||||||
|         for result in iter { |  | ||||||
|             let (key, value) = result?; |  | ||||||
|             distribution.insert(key.left_bound.to_string(), value.bitmap.len()); |  | ||||||
|             if distribution.len() == self.max_values_per_facet { |  | ||||||
|                 break; |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         let iter = self |  | ||||||
|             .index |  | ||||||
|             .facet_id_string_docids |  | ||||||
|             .as_polymorph() |  | ||||||
|             .prefix_iter::<_, ByteSlice, ByteSlice>(self.rtxn, prefix.as_slice())? |  | ||||||
|             .remap_types::<FacetGroupKeyCodec<StrRefCodec>, FacetGroupValueCodec>(); |  | ||||||
|  |  | ||||||
|         for result in iter { |  | ||||||
|             let (key, value) = result?; |  | ||||||
|  |  | ||||||
|             let docid = value.bitmap.iter().next().unwrap(); |  | ||||||
|             let key: (FieldId, _, &'a str) = (field_id, docid, key.left_bound); |  | ||||||
|             let original_string = |  | ||||||
|                 self.index.field_id_docid_facet_strings.get(self.rtxn, &key)?.unwrap().to_owned(); |  | ||||||
|  |  | ||||||
|             distribution.insert(original_string, value.bitmap.len()); |  | ||||||
|             if distribution.len() == self.max_values_per_facet { |  | ||||||
|                 break; |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         Ok(distribution) |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     fn facet_values(&self, field_id: FieldId) -> heed::Result<BTreeMap<String, u64>> { |     fn facet_values(&self, field_id: FieldId) -> heed::Result<BTreeMap<String, u64>> { | ||||||
|         // use FacetType::{Number, String}; |         use FacetType::{Number, String}; | ||||||
|  |  | ||||||
|         let candidates = match self.candidates.as_ref() { |  | ||||||
|             Some(candidates) => candidates.clone(), |  | ||||||
|             None => todo!("fetch candidates"), |  | ||||||
|         }; |  | ||||||
|  |  | ||||||
|         let mut distribution = BTreeMap::new(); |         let mut distribution = BTreeMap::new(); | ||||||
|  |         match (self.order_by, &self.candidates) { | ||||||
|         let number_distribution = facet_distribution_iter::count_iterate_over_facet_distribution( |             (OrderBy::Lexicographic, Some(cnd)) if cnd.len() <= CANDIDATES_THRESHOLD => { | ||||||
|             self.rtxn, |                 // Classic search, candidates were specified, we must return facet values only related | ||||||
|             self.index |                 // to those candidates. We also enter here for facet strings for performance reasons. | ||||||
|                 .facet_id_f64_docids |                 self.facet_distribution_from_documents(field_id, Number, cnd, &mut distribution)?; | ||||||
|                 .remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(), |                 self.facet_distribution_from_documents(field_id, String, cnd, &mut distribution)?; | ||||||
|             field_id, |             } | ||||||
|             &candidates, |             _ => { | ||||||
|         )?; |                 let universe; | ||||||
|  |                 let candidates; | ||||||
|         for (count, facet_key, _) in number_distribution { |                 match &self.candidates { | ||||||
|             let facet_key = OrderedF64Codec::bytes_decode(facet_key).unwrap(); |                     Some(cnd) => candidates = cnd, | ||||||
|             distribution.insert(facet_key.to_string(), count); |                     None => { | ||||||
|  |                         universe = self.index.documents_ids(self.rtxn)?; | ||||||
|  |                         candidates = &universe; | ||||||
|  |                     } | ||||||
|                 } |                 } | ||||||
|  |  | ||||||
|         let string_distribution = facet_distribution_iter::count_iterate_over_facet_distribution( |                 self.facet_numbers_distribution_from_facet_levels( | ||||||
|             self.rtxn, |  | ||||||
|             self.index |  | ||||||
|                 .facet_id_string_docids |  | ||||||
|                 .remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(), |  | ||||||
|                     field_id, |                     field_id, | ||||||
|             &candidates, |                     candidates, | ||||||
|  |                     self.order_by, | ||||||
|  |                     &mut distribution, | ||||||
|  |                 )?; | ||||||
|  |                 self.facet_strings_distribution_from_facet_levels( | ||||||
|  |                     field_id, | ||||||
|  |                     candidates, | ||||||
|  |                     self.order_by, | ||||||
|  |                     &mut distribution, | ||||||
|                 )?; |                 )?; | ||||||
|  |  | ||||||
|         for (count, facet_key, any_docid) in string_distribution { |  | ||||||
|             let facet_key = StrRefCodec::bytes_decode(facet_key).unwrap(); |  | ||||||
|  |  | ||||||
|             let key: (FieldId, _, &str) = (field_id, any_docid, facet_key); |  | ||||||
|             let original_string = |  | ||||||
|                 self.index.field_id_docid_facet_strings.get(self.rtxn, &key)?.unwrap().to_owned(); |  | ||||||
|  |  | ||||||
|             distribution.insert(original_string, count); |  | ||||||
|             } |             } | ||||||
|  |         }; | ||||||
|  |  | ||||||
|         Ok(distribution) |         Ok(distribution) | ||||||
|     } |     } | ||||||
| @@ -381,13 +355,20 @@ impl<'a> FacetDistribution<'a> { | |||||||
|  |  | ||||||
| impl fmt::Debug for FacetDistribution<'_> { | impl fmt::Debug for FacetDistribution<'_> { | ||||||
|     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | ||||||
|         let FacetDistribution { facets, candidates, max_values_per_facet, rtxn: _, index: _ } = |         let FacetDistribution { | ||||||
|             self; |             facets, | ||||||
|  |             candidates, | ||||||
|  |             max_values_per_facet, | ||||||
|  |             order_by, | ||||||
|  |             rtxn: _, | ||||||
|  |             index: _, | ||||||
|  |         } = self; | ||||||
|  |  | ||||||
|         f.debug_struct("FacetDistribution") |         f.debug_struct("FacetDistribution") | ||||||
|             .field("facets", facets) |             .field("facets", facets) | ||||||
|             .field("candidates", candidates) |             .field("candidates", candidates) | ||||||
|             .field("max_values_per_facet", max_values_per_facet) |             .field("max_values_per_facet", max_values_per_facet) | ||||||
|  |             .field("order_by", order_by) | ||||||
|             .finish() |             .finish() | ||||||
|     } |     } | ||||||
| } | } | ||||||
|   | |||||||
| @@ -46,12 +46,16 @@ where | |||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| pub fn count_iterate_over_facet_distribution<'t>( | pub fn count_iterate_over_facet_distribution<'t, CB>( | ||||||
|     rtxn: &'t heed::RoTxn<'t>, |     rtxn: &'t heed::RoTxn<'t>, | ||||||
|     db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>, |     db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>, | ||||||
|     field_id: u16, |     field_id: u16, | ||||||
|     candidates: &RoaringBitmap, |     candidates: &RoaringBitmap, | ||||||
| ) -> Result<Vec<(u64, &'t [u8], u32)>> { |     mut callback: CB, | ||||||
|  | ) -> Result<()> | ||||||
|  | where | ||||||
|  |     CB: FnMut(&'t [u8], u64, DocumentId) -> Result<ControlFlow<()>>, | ||||||
|  | { | ||||||
|     #[derive(Debug, PartialOrd, Ord, PartialEq, Eq)] |     #[derive(Debug, PartialOrd, Ord, PartialEq, Eq)] | ||||||
|     struct LevelEntry<'t> { |     struct LevelEntry<'t> { | ||||||
|         /// The number of candidates in this entry. |         /// The number of candidates in this entry. | ||||||
| @@ -68,8 +72,6 @@ pub fn count_iterate_over_facet_distribution<'t>( | |||||||
|  |  | ||||||
|     // Represents the list of keys that we must explore. |     // Represents the list of keys that we must explore. | ||||||
|     let mut heap = BinaryHeap::new(); |     let mut heap = BinaryHeap::new(); | ||||||
|     let mut results = Vec::new(); |  | ||||||
|  |  | ||||||
|     let highest_level = get_highest_level( |     let highest_level = get_highest_level( | ||||||
|         rtxn, |         rtxn, | ||||||
|         db.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(), |         db.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(), | ||||||
| @@ -103,10 +105,9 @@ pub fn count_iterate_over_facet_distribution<'t>( | |||||||
|         while let Some(LevelEntry { count, level, left_bound, group_size, any_docid }) = heap.pop() |         while let Some(LevelEntry { count, level, left_bound, group_size, any_docid }) = heap.pop() | ||||||
|         { |         { | ||||||
|             if let Reverse(0) = level { |             if let Reverse(0) = level { | ||||||
|                 results.push((count, left_bound, any_docid)); |                 match (callback)(left_bound, count, any_docid)? { | ||||||
|                 // TODO better just call the user callback and ask for a ControlFlow |                     ControlFlow::Continue(_) => (), | ||||||
|                 if results.len() == 20 { |                     ControlFlow::Break(_) => return Ok(()), | ||||||
|                     break; |  | ||||||
|                 } |                 } | ||||||
|             } else { |             } else { | ||||||
|                 let starting_key = |                 let starting_key = | ||||||
| @@ -132,11 +133,9 @@ pub fn count_iterate_over_facet_distribution<'t>( | |||||||
|                 } |                 } | ||||||
|             } |             } | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         Ok(results) |  | ||||||
|     } else { |  | ||||||
|         Ok(Default::default()) |  | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     Ok(()) | ||||||
| } | } | ||||||
|  |  | ||||||
| /// Iterate over the facets values by lexicographic order. | /// Iterate over the facets values by lexicographic order. | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user