mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-30 23:46:28 +00:00 
			
		
		
		
	Remove max values by facet limit for facet distribution
This commit is contained in:
		| @@ -1,6 +1,6 @@ | ||||
| use std::collections::{BTreeMap, HashSet}; | ||||
| use std::ops::Bound::Unbounded; | ||||
| use std::{cmp, fmt, mem}; | ||||
| use std::{fmt, mem}; | ||||
|  | ||||
| use heed::types::ByteSlice; | ||||
| use roaring::RoaringBitmap; | ||||
| @@ -13,14 +13,6 @@ use crate::heed_codec::facet::{ | ||||
| use crate::search::facet::{FacetNumberIter, FacetNumberRange, FacetStringIter}; | ||||
| use crate::{FieldId, Index, Result}; | ||||
|  | ||||
| /// The default number of values by facets that will | ||||
| /// be fetched from the key-value store. | ||||
| const DEFAULT_VALUES_BY_FACET: usize = 100; | ||||
|  | ||||
| /// The hard limit in the number of values by facets that will be fetched from | ||||
| /// the key-value store. Searching for more values could slow down the engine. | ||||
| const MAX_VALUES_BY_FACET: usize = 1000; | ||||
|  | ||||
| /// Threshold on the number of candidates that will make | ||||
| /// the system to choose between one algorithm or another. | ||||
| const CANDIDATES_THRESHOLD: u64 = 3000; | ||||
| @@ -28,20 +20,13 @@ const CANDIDATES_THRESHOLD: u64 = 3000; | ||||
| pub struct FacetDistribution<'a> { | ||||
|     facets: Option<HashSet<String>>, | ||||
|     candidates: Option<RoaringBitmap>, | ||||
|     max_values_by_facet: usize, | ||||
|     rtxn: &'a heed::RoTxn<'a>, | ||||
|     index: &'a Index, | ||||
| } | ||||
|  | ||||
| impl<'a> FacetDistribution<'a> { | ||||
|     pub fn new(rtxn: &'a heed::RoTxn, index: &'a Index) -> FacetDistribution<'a> { | ||||
|         FacetDistribution { | ||||
|             facets: None, | ||||
|             candidates: None, | ||||
|             max_values_by_facet: DEFAULT_VALUES_BY_FACET, | ||||
|             rtxn, | ||||
|             index, | ||||
|         } | ||||
|         FacetDistribution { facets: None, candidates: None, rtxn, index } | ||||
|     } | ||||
|  | ||||
|     pub fn facets<I: IntoIterator<Item = A>, A: AsRef<str>>(&mut self, names: I) -> &mut Self { | ||||
| @@ -54,11 +39,6 @@ impl<'a> FacetDistribution<'a> { | ||||
|         self | ||||
|     } | ||||
|  | ||||
|     pub fn max_values_by_facet(&mut self, max: usize) -> &mut Self { | ||||
|         self.max_values_by_facet = cmp::min(max, MAX_VALUES_BY_FACET); | ||||
|         self | ||||
|     } | ||||
|  | ||||
|     /// There is a small amount of candidates OR we ask for facet string values so we | ||||
|     /// decide to iterate over the facet values of each one of them, one by one. | ||||
|     fn facet_distribution_from_documents( | ||||
| @@ -72,7 +52,6 @@ impl<'a> FacetDistribution<'a> { | ||||
|             FacetType::Number => { | ||||
|                 let mut key_buffer: Vec<_> = field_id.to_be_bytes().iter().copied().collect(); | ||||
|  | ||||
|                 let distribution_prelength = distribution.len(); | ||||
|                 let db = self.index.field_id_docid_facet_f64s; | ||||
|                 for docid in candidates.into_iter() { | ||||
|                     key_buffer.truncate(mem::size_of::<FieldId>()); | ||||
| @@ -85,9 +64,6 @@ impl<'a> FacetDistribution<'a> { | ||||
|                     for result in iter { | ||||
|                         let ((_, _, value), ()) = result?; | ||||
|                         *distribution.entry(value.to_string()).or_insert(0) += 1; | ||||
|                         if distribution.len() - distribution_prelength == self.max_values_by_facet { | ||||
|                             break; | ||||
|                         } | ||||
|                     } | ||||
|                 } | ||||
|             } | ||||
| @@ -110,10 +86,6 @@ impl<'a> FacetDistribution<'a> { | ||||
|                             .entry(normalized_value) | ||||
|                             .or_insert_with(|| (original_value, 0)); | ||||
|                         *count += 1; | ||||
|  | ||||
|                         if normalized_distribution.len() == self.max_values_by_facet { | ||||
|                             break; | ||||
|                         } | ||||
|                     } | ||||
|                 } | ||||
|  | ||||
| @@ -144,9 +116,6 @@ impl<'a> FacetDistribution<'a> { | ||||
|             if !docids.is_empty() { | ||||
|                 distribution.insert(value.to_string(), docids.len()); | ||||
|             } | ||||
|             if distribution.len() == self.max_values_by_facet { | ||||
|                 break; | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         Ok(()) | ||||
| @@ -167,9 +136,6 @@ impl<'a> FacetDistribution<'a> { | ||||
|             if !docids.is_empty() { | ||||
|                 distribution.insert(original.to_string(), docids.len()); | ||||
|             } | ||||
|             if distribution.len() == self.max_values_by_facet { | ||||
|                 break; | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         Ok(()) | ||||
| @@ -189,9 +155,6 @@ impl<'a> FacetDistribution<'a> { | ||||
|         for result in range { | ||||
|             let ((_, _, value, _), docids) = result?; | ||||
|             distribution.insert(value.to_string(), docids.len()); | ||||
|             if distribution.len() == self.max_values_by_facet { | ||||
|                 break; | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         let iter = self | ||||
| @@ -205,9 +168,6 @@ impl<'a> FacetDistribution<'a> { | ||||
|         for result in iter { | ||||
|             let ((_, normalized_value), (original_value, docids)) = result?; | ||||
|             normalized_distribution.insert(normalized_value, (original_value, docids.len())); | ||||
|             if distribution.len() == self.max_values_by_facet { | ||||
|                 break; | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         let iter = normalized_distribution | ||||
| @@ -289,12 +249,11 @@ impl<'a> FacetDistribution<'a> { | ||||
|  | ||||
| impl fmt::Debug for FacetDistribution<'_> { | ||||
|     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | ||||
|         let FacetDistribution { facets, candidates, max_values_by_facet, rtxn: _, index: _ } = self; | ||||
|         let FacetDistribution { facets, candidates, rtxn: _, index: _ } = self; | ||||
|  | ||||
|         f.debug_struct("FacetDistribution") | ||||
|             .field("facets", facets) | ||||
|             .field("candidates", candidates) | ||||
|             .field("max_values_by_facet", max_values_by_facet) | ||||
|             .finish() | ||||
|     } | ||||
| } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user