mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-30 23:46:28 +00:00 
			
		
		
		
	Remove the MapDistinct struct as now distinct attributes are faceted
This commit is contained in:
		| @@ -189,8 +189,6 @@ impl<'a> Distinct<'_> for FacetDistinct<'a> { | ||||
|  | ||||
| #[cfg(test)] | ||||
| mod test { | ||||
|     use std::collections::HashSet; | ||||
|  | ||||
|     use super::super::test::{generate_index, validate_distinct_candidates}; | ||||
|     use super::*; | ||||
|  | ||||
| @@ -198,10 +196,7 @@ mod test { | ||||
|         ($name:ident, $distinct:literal) => { | ||||
|             #[test] | ||||
|             fn $name() { | ||||
|                 use std::iter::FromIterator; | ||||
|  | ||||
|                 let facets = HashSet::from_iter(Some(($distinct.to_string()))); | ||||
|                 let (index, fid, candidates) = generate_index($distinct, facets); | ||||
|                 let (index, fid, candidates) = generate_index($distinct); | ||||
|                 let txn = index.read_txn().unwrap(); | ||||
|                 let mut map_distinct = FacetDistinct::new(fid, &index, &txn); | ||||
|                 let excluded = RoaringBitmap::new(); | ||||
|   | ||||
| @@ -1,138 +0,0 @@ | ||||
| use std::collections::HashMap; | ||||
|  | ||||
| use roaring::RoaringBitmap; | ||||
| use serde_json::Value; | ||||
|  | ||||
| use super::{Distinct, DocIter}; | ||||
| use crate::{DocumentId, FieldId, Index}; | ||||
|  | ||||
| /// A distinct implementer that is backed by an `HashMap`. | ||||
| /// | ||||
| /// Each time a document is seen, the value | ||||
| /// for its distinct field is added to the map. If the map already contains an entry for this | ||||
| /// value, then the document is filtered out, and is added to the excluded set. | ||||
| pub struct MapDistinct<'a> { | ||||
|     distinct: FieldId, | ||||
|     map: HashMap<String, usize>, | ||||
|     index: &'a Index, | ||||
|     txn: &'a heed::RoTxn<'a>, | ||||
| } | ||||
|  | ||||
| impl<'a> MapDistinct<'a> { | ||||
|     pub fn new(distinct: FieldId, index: &'a Index, txn: &'a heed::RoTxn<'a>) -> Self { | ||||
|         Self { | ||||
|             distinct, | ||||
|             map: HashMap::new(), | ||||
|             index, | ||||
|             txn, | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| pub struct MapDistinctIter<'a, 'b> { | ||||
|     distinct: FieldId, | ||||
|     map: &'b mut HashMap<String, usize>, | ||||
|     index: &'a Index, | ||||
|     txn: &'a heed::RoTxn<'a>, | ||||
|     candidates: roaring::bitmap::IntoIter, | ||||
|     excluded: RoaringBitmap, | ||||
| } | ||||
|  | ||||
| impl<'a, 'b> MapDistinctIter<'a, 'b> { | ||||
|     /// Performs the next iteration of the mafacetp distinct. This is a convenience method that is | ||||
|     /// called by the Iterator::next implementation that transposes the result. It makes error | ||||
|     /// handling easier. | ||||
|     fn next_inner(&mut self) -> anyhow::Result<Option<DocumentId>> { | ||||
|         let map = &mut self.map; | ||||
|         let mut filter = |value: Value| { | ||||
|             let entry = map.entry(value.to_string()).or_insert(0); | ||||
|             *entry += 1; | ||||
|             *entry <= 1 | ||||
|         }; | ||||
|  | ||||
|         while let Some(id) = self.candidates.next() { | ||||
|             let document = self.index.documents(&self.txn, Some(id))?[0].1; | ||||
|             let value = document | ||||
|                 .get(self.distinct) | ||||
|                 .map(serde_json::from_slice::<Value>) | ||||
|                 .transpose()?; | ||||
|  | ||||
|             let accept = match value { | ||||
|                 Some(Value::Array(values)) => { | ||||
|                     let mut accept = true; | ||||
|                     for value in values { | ||||
|                         accept &= filter(value); | ||||
|                     } | ||||
|                     accept | ||||
|                 } | ||||
|                 Some(Value::Null) | Some(Value::Object(_)) | None => true, | ||||
|                 Some(value) => filter(value), | ||||
|             }; | ||||
|  | ||||
|             if accept { | ||||
|                 return Ok(Some(id)); | ||||
|             } else { | ||||
|                 self.excluded.insert(id); | ||||
|             } | ||||
|         } | ||||
|         Ok(None) | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl Iterator for MapDistinctIter<'_, '_> { | ||||
|     type Item = anyhow::Result<DocumentId>; | ||||
|  | ||||
|     fn next(&mut self) -> Option<Self::Item> { | ||||
|         self.next_inner().transpose() | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl DocIter for MapDistinctIter<'_, '_> { | ||||
|     fn into_excluded(self) -> RoaringBitmap { | ||||
|         self.excluded | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl<'a, 'b> Distinct<'b> for MapDistinct<'a> { | ||||
|     type Iter = MapDistinctIter<'a, 'b>; | ||||
|  | ||||
|     fn distinct(&'b mut self, candidates: RoaringBitmap, excluded: RoaringBitmap) -> Self::Iter { | ||||
|         MapDistinctIter { | ||||
|             distinct: self.distinct, | ||||
|             map: &mut self.map, | ||||
|             index: &self.index, | ||||
|             txn: &self.txn, | ||||
|             candidates: candidates.into_iter(), | ||||
|             excluded, | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[cfg(test)] | ||||
| mod test { | ||||
|     use std::collections::HashSet; | ||||
|  | ||||
|     use super::*; | ||||
|     use super::super::test::{generate_index, validate_distinct_candidates}; | ||||
|  | ||||
|     macro_rules! test_map_distinct { | ||||
|         ($name:ident, $distinct:literal) => { | ||||
|             #[test] | ||||
|             fn $name() { | ||||
|                 let (index, fid, candidates) = generate_index($distinct, HashSet::new()); | ||||
|                 let txn = index.read_txn().unwrap(); | ||||
|                 let mut map_distinct = MapDistinct::new(fid, &index, &txn); | ||||
|                 let excluded = RoaringBitmap::new(); | ||||
|                 let mut iter = map_distinct.distinct(candidates.clone(), excluded); | ||||
|                 let count = validate_distinct_candidates(iter.by_ref(), fid, &index); | ||||
|                 let excluded = iter.into_excluded(); | ||||
|                 assert_eq!(count as u64 + excluded.len(), candidates.len()); | ||||
|             } | ||||
|         }; | ||||
|     } | ||||
|  | ||||
|     test_map_distinct!(test_string, "txt"); | ||||
|     test_map_distinct!(test_strings, "txts"); | ||||
|     test_map_distinct!(test_int, "cat-int"); | ||||
|     test_map_distinct!(test_ints, "cat-ints"); | ||||
| } | ||||
| @@ -1,12 +1,10 @@ | ||||
| mod facet_distinct; | ||||
| mod map_distinct; | ||||
| mod noop_distinct; | ||||
|  | ||||
| use roaring::RoaringBitmap; | ||||
|  | ||||
| use crate::DocumentId; | ||||
| pub use facet_distinct::FacetDistinct; | ||||
| pub use map_distinct::MapDistinct; | ||||
| pub use noop_distinct::NoopDistinct; | ||||
|  | ||||
| /// A trait implemented by document interators that are returned by calls to `Distinct::distinct`. | ||||
| @@ -74,7 +72,7 @@ mod test { | ||||
|  | ||||
|     /// Returns a temporary index populated with random test documents, the FieldId for the | ||||
|     /// distinct attribute, and the RoaringBitmap with the document ids. | ||||
|     pub(crate) fn generate_index(distinct: &str, facets: HashSet<String>) -> (TempIndex, FieldId, RoaringBitmap) { | ||||
|     pub(crate) fn generate_index(distinct: &str) -> (TempIndex, FieldId, RoaringBitmap) { | ||||
|         let index = TempIndex::new(); | ||||
|         let mut txn = index.write_txn().unwrap(); | ||||
|  | ||||
| @@ -82,9 +80,6 @@ mod test { | ||||
|         let builder = UpdateBuilder::new(0); | ||||
|         let mut update = builder.settings(&mut txn, &index); | ||||
|         update.set_distinct_attribute(distinct.to_string()); | ||||
|         if !facets.is_empty() { | ||||
|             update.set_filterable_fields(facets) | ||||
|         } | ||||
|         update.execute(|_, _| ()).unwrap(); | ||||
|  | ||||
|         // add documents to the index | ||||
|   | ||||
| @@ -12,7 +12,7 @@ use meilisearch_tokenizer::{Analyzer, AnalyzerConfig}; | ||||
| use once_cell::sync::Lazy; | ||||
| use roaring::bitmap::RoaringBitmap; | ||||
|  | ||||
| use distinct::{Distinct, DocIter, FacetDistinct, MapDistinct, NoopDistinct}; | ||||
| use distinct::{Distinct, DocIter, FacetDistinct, NoopDistinct}; | ||||
| use crate::search::criteria::r#final::{Final, FinalResult}; | ||||
| use crate::{Index, DocumentId}; | ||||
|  | ||||
| @@ -141,14 +141,8 @@ impl<'a> Search<'a> { | ||||
|             Some(name) => { | ||||
|                 let field_ids_map = self.index.fields_ids_map(self.rtxn)?; | ||||
|                 let id = field_ids_map.id(name).expect("distinct not present in field map"); | ||||
|                 let filterable_fields = self.index.filterable_fields(self.rtxn)?; | ||||
|                 if filterable_fields.contains(name) { | ||||
|                     let distinct = FacetDistinct::new(id, self.index, self.rtxn); | ||||
|                     self.perform_sort(distinct, matching_words, criteria) | ||||
|                 } else { | ||||
|                     let distinct = MapDistinct::new(id, self.index, self.rtxn); | ||||
|                     self.perform_sort(distinct, matching_words, criteria) | ||||
|                 } | ||||
|                 let distinct = FacetDistinct::new(id, self.index, self.rtxn); | ||||
|                 self.perform_sort(distinct, matching_words, criteria) | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user