mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-30 23:46:28 +00:00 
			
		
		
		
	add tests
This commit is contained in:
		| @@ -478,13 +478,44 @@ impl Index { | ||||
| } | ||||
|  | ||||
| #[cfg(test)] | ||||
| mod tests { | ||||
| pub(crate) mod tests { | ||||
|     use std::ops::Deref; | ||||
|  | ||||
|     use heed::EnvOpenOptions; | ||||
|     use maplit::hashmap; | ||||
|     use tempfile::TempDir; | ||||
|  | ||||
|     use crate::Index; | ||||
|     use crate::update::{IndexDocuments, UpdateFormat}; | ||||
|  | ||||
|     pub(crate) struct TempIndex { | ||||
|         inner: Index, | ||||
|         _tempdir: TempDir, | ||||
|     } | ||||
|  | ||||
|     impl Deref for TempIndex { | ||||
|         type Target = Index; | ||||
|  | ||||
|         fn deref(&self) -> &Self::Target { | ||||
|             &self.inner | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     impl TempIndex { | ||||
|         /// Creates a temporary index, with a default `4096 * 100` size. This should be enough for | ||||
|         /// most tests. | ||||
|         pub fn new() -> Self { | ||||
|             let mut options = EnvOpenOptions::new(); | ||||
|             options.map_size(100 * 4096); | ||||
|             let _tempdir = TempDir::new_in(".").unwrap(); | ||||
|             let inner = Index::new(options, _tempdir.path()).unwrap(); | ||||
|             Self { | ||||
|                 inner, | ||||
|                 _tempdir | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     #[test] | ||||
|     fn initial_fields_distribution() { | ||||
|         let path = tempfile::tempdir().unwrap(); | ||||
|   | ||||
| @@ -6,7 +6,9 @@ use crate::heed_codec::facet::*; | ||||
| use crate::{facet::FacetType, DocumentId, FieldId, Index}; | ||||
| use super::{Distinct, DocIter}; | ||||
|  | ||||
| /// A distinct implementer that is backed by facets. On each iteration, the facet values for the | ||||
| /// A distinct implementer that is backed by facets. | ||||
| /// | ||||
| /// On each iteration, the facet values for the | ||||
| /// distinct attribute of the first document are retrieved. The document ids for these facet values | ||||
| /// are then retrieved and taken out of the the candidate and added to the excluded set. We take | ||||
| /// care to keep the document we are currently on, and remove it from the excluded list. The next | ||||
| @@ -121,7 +123,7 @@ impl<'a> FacetDistinctIter<'a> { | ||||
|     } | ||||
|  | ||||
|     /// Performs the next iteration of the facet distinct. This is a convenience method that is | ||||
|     /// called by the Iterator::next implementation that tranposes the result. It makes error | ||||
|     /// called by the Iterator::next implementation that transposes the result. It makes error | ||||
|     /// handling easier. | ||||
|     fn next_inner(&mut self) -> anyhow::Result<Option<DocumentId>> { | ||||
|         // The first step is to remove all the excluded documents from our candidates | ||||
| @@ -201,3 +203,36 @@ impl<'a> Distinct<'_> for FacetDistinct<'a> { | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[cfg(test)] | ||||
| mod test { | ||||
|     use std::collections::HashMap; | ||||
|  | ||||
|     use super::*; | ||||
|     use super::super::test::{generate_index, validate_distinct_candidates}; | ||||
|     use crate::facet::FacetType; | ||||
|  | ||||
|     macro_rules! test_facet_distinct { | ||||
|         ($name:ident, $distinct:literal, $facet_type:expr) => { | ||||
|             #[test] | ||||
|             fn $name() { | ||||
|                 use std::iter::FromIterator; | ||||
|  | ||||
|                 let facets = HashMap::from_iter(Some(($distinct.to_string(), $facet_type.to_string()))); | ||||
|                 let (index, fid, candidates) = generate_index($distinct, facets); | ||||
|                 let txn = index.read_txn().unwrap(); | ||||
|                 let mut map_distinct = FacetDistinct::new(fid, &index, &txn, $facet_type); | ||||
|                 let excluded = RoaringBitmap::new(); | ||||
|                 let mut iter = map_distinct.distinct(candidates.clone(), excluded); | ||||
|                 let count = validate_distinct_candidates(iter.by_ref(), fid, &index); | ||||
|                 let excluded = iter.into_excluded(); | ||||
|                 assert_eq!(count as u64 + excluded.len(), candidates.len()); | ||||
|             } | ||||
|         }; | ||||
|     } | ||||
|  | ||||
|     test_facet_distinct!(test_string, "txt", FacetType::String); | ||||
|     test_facet_distinct!(test_strings, "txts", FacetType::String); | ||||
|     test_facet_distinct!(test_int, "cat-int", FacetType::Integer); | ||||
|     test_facet_distinct!(test_ints, "cat-ints", FacetType::Integer); | ||||
| } | ||||
|   | ||||
| @@ -6,7 +6,9 @@ use serde_json::Value; | ||||
| use super::{Distinct, DocIter}; | ||||
| use crate::{DocumentId, FieldId, Index}; | ||||
|  | ||||
| /// A distinct implementer that is backed by an `HashMap`. Each time a document is seen, the value | ||||
| /// A distinct implementer that is backed by an `HashMap`. | ||||
| /// | ||||
| /// Each time a document is seen, the value | ||||
| /// for its distinct field is added to the map. If the map already contains an entry for this | ||||
| /// value, then the document is filtered out, and is added to the excluded set. | ||||
| pub struct MapDistinct<'a> { | ||||
| @@ -38,7 +40,7 @@ pub struct MapDistinctIter<'a, 'b> { | ||||
|  | ||||
| impl<'a, 'b> MapDistinctIter<'a, 'b> { | ||||
|     /// Performs the next iteration of the mafacetp distinct. This is a convenience method that is | ||||
|     /// called by the Iterator::next implementation that tranposes the result. It makes error | ||||
|     /// called by the Iterator::next implementation that transposes the result. It makes error | ||||
|     /// handling easier. | ||||
|     fn next_inner(&mut self) -> anyhow::Result<Option<DocumentId>> { | ||||
|         let map = &mut self.map; | ||||
| @@ -105,3 +107,32 @@ impl<'a, 'b> Distinct<'b> for MapDistinct<'a> { | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[cfg(test)] | ||||
| mod test { | ||||
|     use std::collections::HashMap; | ||||
|  | ||||
|     use super::*; | ||||
|     use super::super::test::{generate_index, validate_distinct_candidates}; | ||||
|  | ||||
|     macro_rules! test_map_distinct { | ||||
|         ($name:ident, $distinct:literal) => { | ||||
|             #[test] | ||||
|             fn $name() { | ||||
|                 let (index, fid, candidates) = generate_index($distinct, HashMap::new()); | ||||
|                 let txn = index.read_txn().unwrap(); | ||||
|                 let mut map_distinct = MapDistinct::new(fid, &index, &txn); | ||||
|                 let excluded = RoaringBitmap::new(); | ||||
|                 let mut iter = map_distinct.distinct(candidates.clone(), excluded); | ||||
|                 let count = validate_distinct_candidates(iter.by_ref(), fid, &index); | ||||
|                 let excluded = iter.into_excluded(); | ||||
|                 assert_eq!(count as u64 + excluded.len(), candidates.len()); | ||||
|             } | ||||
|         }; | ||||
|     } | ||||
|  | ||||
|     test_map_distinct!(test_string, "txt"); | ||||
|     test_map_distinct!(test_strings, "txts"); | ||||
|     test_map_distinct!(test_int, "cat-int"); | ||||
|     test_map_distinct!(test_ints, "cat-ints"); | ||||
| } | ||||
|   | ||||
| @@ -4,14 +4,14 @@ mod noop_distinct; | ||||
|  | ||||
| use roaring::RoaringBitmap; | ||||
|  | ||||
| use crate::DocumentId; | ||||
| pub use facet_distinct::FacetDistinct; | ||||
| pub use map_distinct::MapDistinct; | ||||
| pub use noop_distinct::NoopDistinct; | ||||
| use crate::DocumentId; | ||||
|  | ||||
| /// A trait implemented by document interators that are returned by calls to `Distinct::distinct`. | ||||
| /// It provides a way to get back the ownership to the excluded set. | ||||
| pub trait DocIter: Iterator<Item=anyhow::Result<DocumentId>> { | ||||
| pub trait DocIter: Iterator<Item = anyhow::Result<DocumentId>> { | ||||
|     /// Returns ownership on the internal exluded set. | ||||
|     fn into_excluded(self) -> RoaringBitmap; | ||||
| } | ||||
| @@ -25,3 +25,120 @@ pub trait Distinct<'a> { | ||||
|  | ||||
|     fn distinct(&'a mut self, candidates: RoaringBitmap, excluded: RoaringBitmap) -> Self::Iter; | ||||
| } | ||||
|  | ||||
| #[cfg(test)] | ||||
| mod test { | ||||
|     use std::collections::{HashMap, HashSet}; | ||||
|  | ||||
|     use once_cell::sync::Lazy; | ||||
|     use rand::{seq::SliceRandom, Rng}; | ||||
|     use roaring::RoaringBitmap; | ||||
|     use serde_json::{json, Value}; | ||||
|  | ||||
|     use crate::index::{Index, tests::TempIndex}; | ||||
|     use crate::update::{IndexDocumentsMethod, UpdateBuilder, UpdateFormat}; | ||||
|     use crate::{BEU32, FieldId, DocumentId}; | ||||
|  | ||||
|     static JSON: Lazy<Value> = Lazy::new(generate_json); | ||||
|  | ||||
|     fn generate_json() -> Value { | ||||
|         let mut rng = rand::thread_rng(); | ||||
|         let num_docs = rng.gen_range(10..30); | ||||
|  | ||||
|         let mut documents = Vec::new(); | ||||
|  | ||||
|         let txts = ["toto", "titi", "tata"]; | ||||
|         let cats = (1..10).map(|i| i.to_string()).collect::<Vec<_>>(); | ||||
|         let cat_ints = (1..10).collect::<Vec<_>>(); | ||||
|  | ||||
|         for i in 0..num_docs { | ||||
|             let txt = txts.choose(&mut rng).unwrap(); | ||||
|             let mut sample_txts = cats.clone(); | ||||
|             sample_txts.shuffle(&mut rng); | ||||
|  | ||||
|             let mut sample_ints = cat_ints.clone(); | ||||
|             sample_ints.shuffle(&mut rng); | ||||
|  | ||||
|             let doc = json!({ | ||||
|                 "id": i, | ||||
|                 "txt": txt, | ||||
|                 "cat-int": rng.gen_range(0..3), | ||||
|                 "txts": sample_txts[..(rng.gen_range(0..3))], | ||||
|                 "cat-ints": sample_ints[..(rng.gen_range(0..3))], | ||||
|             }); | ||||
|             documents.push(doc); | ||||
|         } | ||||
|  | ||||
|         Value::Array(documents) | ||||
|     } | ||||
|  | ||||
|     /// Returns a temporary index populated with random test documents, the FieldId for the | ||||
|     /// distinct attribute, and the RoaringBitmap with the document ids. | ||||
|     pub(crate) fn generate_index(distinct: &str, facets: HashMap<String, String>) -> (TempIndex, FieldId, RoaringBitmap) { | ||||
|         let index = TempIndex::new(); | ||||
|         let mut txn = index.write_txn().unwrap(); | ||||
|  | ||||
|         // set distinct and faceted attributes for the index. | ||||
|         let builder = UpdateBuilder::new(0); | ||||
|         let mut update = builder.settings(&mut txn, &index); | ||||
|         update.set_distinct_attribute(distinct.to_string()); | ||||
|         if !facets.is_empty() { | ||||
|             update.set_faceted_fields(facets) | ||||
|         } | ||||
|         update.execute(|_, _| ()).unwrap(); | ||||
|  | ||||
|         // add documents to the index | ||||
|         let builder = UpdateBuilder::new(1); | ||||
|         let mut addition = builder.index_documents(&mut txn, &index); | ||||
|  | ||||
|         addition.index_documents_method(IndexDocumentsMethod::ReplaceDocuments); | ||||
|         addition.update_format(UpdateFormat::Json); | ||||
|  | ||||
|         addition | ||||
|             .execute(JSON.to_string().as_bytes(), |_, _| ()) | ||||
|             .unwrap(); | ||||
|  | ||||
|         let fields_map = index.fields_ids_map(&txn).unwrap(); | ||||
|         let fid = fields_map.id(&distinct).unwrap(); | ||||
|  | ||||
|         let map = (0..JSON.as_array().unwrap().len() as u32).collect(); | ||||
|  | ||||
|         txn.commit().unwrap(); | ||||
|  | ||||
|         (index, fid, map) | ||||
|     } | ||||
|  | ||||
|  | ||||
|     /// Checks that all the candidates are distinct, and returns the candidates number. | ||||
|     pub(crate) fn validate_distinct_candidates( | ||||
|         candidates: impl Iterator<Item=anyhow::Result<DocumentId>>, | ||||
|         distinct: FieldId, | ||||
|         index: &Index, | ||||
|         ) -> usize { | ||||
|         fn test(seen: &mut HashSet<String>, value: &Value) { | ||||
|             match value { | ||||
|                 Value::Null | Value::Object(_) | Value::Bool(_) => (), | ||||
|                 Value::Number(_) | Value::String(_) => { | ||||
|                     let s = value.to_string(); | ||||
|                     assert!(seen.insert(s)); | ||||
|                 } | ||||
|                 Value::Array(values) => {values.into_iter().for_each(|value| test(seen, value))} | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         let mut seen = HashSet::<String>::new(); | ||||
|  | ||||
|         let txn = index.read_txn().unwrap(); | ||||
|         let mut count = 0; | ||||
|         for candidate in candidates { | ||||
|             count += 1; | ||||
|             let candidate = candidate.unwrap(); | ||||
|             let id = BEU32::new(candidate); | ||||
|             let document = index.documents.get(&txn, &id).unwrap().unwrap(); | ||||
|             let value = document.get(distinct).unwrap(); | ||||
|             let value = serde_json::from_slice(value).unwrap(); | ||||
|             test(&mut seen, &value); | ||||
|         } | ||||
|         count | ||||
|     } | ||||
| } | ||||
|   | ||||
| @@ -3,8 +3,8 @@ use roaring::{RoaringBitmap, bitmap::IntoIter}; | ||||
| use crate::DocumentId; | ||||
| use super::{DocIter, Distinct}; | ||||
|  | ||||
| /// A distinct implementer that does not perform any distinct, and simply returns an iterator to | ||||
| /// the candidates. | ||||
| /// A distinct implementer that does not perform any distinct, | ||||
| /// and simply returns an iterator to the candidates. | ||||
| pub struct NoopDistinct; | ||||
|  | ||||
| pub struct NoopDistinctIter { | ||||
| @@ -36,3 +36,22 @@ impl Distinct<'_> for NoopDistinct { | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[cfg(test)] | ||||
| mod test { | ||||
|     use super::*; | ||||
|  | ||||
|     #[test] | ||||
|     fn test_noop() { | ||||
|         let candidates = (1..10).collect(); | ||||
|         let excluded = RoaringBitmap::new(); | ||||
|         let mut iter = NoopDistinct.distinct(candidates, excluded); | ||||
|         assert_eq!( | ||||
|             iter.by_ref().map(Result::unwrap).collect::<Vec<_>>(), | ||||
|             (1..10).collect::<Vec<_>>() | ||||
|         ); | ||||
|  | ||||
|         let excluded = iter.into_excluded(); | ||||
|         assert!(excluded.is_empty()); | ||||
|     } | ||||
| } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user