mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-26 05:26:27 +00:00 
			
		
		
		
	feat: Introduce the Criterion trait
This commit is contained in:
		| @@ -2,6 +2,7 @@ use std::cmp::Ordering; | ||||
| use group_by::GroupBy; | ||||
| use crate::Match; | ||||
| use crate::rank::{match_query_index, Document}; | ||||
| use crate::rank::criterion::Criterion; | ||||
|  | ||||
| #[inline] | ||||
| fn contains_exact(matches: &[Match]) -> bool { | ||||
| @@ -13,10 +14,14 @@ fn number_exact_matches(matches: &[Match]) -> usize { | ||||
|     GroupBy::new(matches, match_query_index).map(contains_exact).count() | ||||
| } | ||||
|  | ||||
| #[inline] | ||||
| pub fn exact(lhs: &Document, rhs: &Document) -> Ordering { | ||||
|     let lhs = number_exact_matches(&lhs.matches); | ||||
|     let rhs = number_exact_matches(&rhs.matches); | ||||
| #[derive(Debug, Clone, Copy)] | ||||
| pub struct Exact; | ||||
|  | ||||
|     lhs.cmp(&rhs).reverse() | ||||
| impl Criterion for Exact { | ||||
|     fn evaluate(&self, lhs: &Document, rhs: &Document) -> Ordering { | ||||
|         let lhs = number_exact_matches(&lhs.matches); | ||||
|         let rhs = number_exact_matches(&rhs.matches); | ||||
|  | ||||
|         lhs.cmp(&rhs).reverse() | ||||
|     } | ||||
| } | ||||
|   | ||||
| @@ -7,65 +7,64 @@ mod exact; | ||||
|  | ||||
| use std::vec; | ||||
| use std::cmp::Ordering; | ||||
| use std::ops::Deref; | ||||
| use crate::rank::Document; | ||||
|  | ||||
| pub use self::{ | ||||
|     sum_of_typos::sum_of_typos, | ||||
|     number_of_words::number_of_words, | ||||
|     words_proximity::words_proximity, | ||||
|     sum_of_words_attribute::sum_of_words_attribute, | ||||
|     sum_of_words_position::sum_of_words_position, | ||||
|     exact::exact, | ||||
|     sum_of_typos::SumOfTypos, | ||||
|     number_of_words::NumberOfWords, | ||||
|     words_proximity::WordsProximity, | ||||
|     sum_of_words_attribute::SumOfWordsAttribute, | ||||
|     sum_of_words_position::SumOfWordsPosition, | ||||
|     exact::Exact, | ||||
| }; | ||||
|  | ||||
| #[inline] | ||||
| pub fn document_id(lhs: &Document, rhs: &Document) -> Ordering { | ||||
|     lhs.id.cmp(&rhs.id) | ||||
| } | ||||
| pub trait Criterion { | ||||
|     #[inline] | ||||
|     fn evaluate(&self, lhs: &Document, rhs: &Document) -> Ordering; | ||||
|  | ||||
| #[derive(Debug)] | ||||
| pub struct Criteria<F>(Vec<F>); | ||||
|  | ||||
| impl<F> Criteria<F> { | ||||
|     pub fn new() -> Self { | ||||
|         Criteria(Vec::new()) | ||||
|     } | ||||
|  | ||||
|     pub fn with_capacity(cap: usize) -> Self { | ||||
|         Criteria(Vec::with_capacity(cap)) | ||||
|     } | ||||
|  | ||||
|     pub fn push(&mut self, criterion: F) { | ||||
|         self.0.push(criterion) | ||||
|     } | ||||
|  | ||||
|     pub fn add(mut self, criterion: F) -> Self { | ||||
|         self.push(criterion); | ||||
|         self | ||||
|     #[inline] | ||||
|     fn eq(&self, lhs: &Document, rhs: &Document) -> bool { | ||||
|         self.evaluate(lhs, rhs) == Ordering::Equal | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl<F> IntoIterator for Criteria<F> { | ||||
|     type Item = F; | ||||
|     type IntoIter = vec::IntoIter<Self::Item>; | ||||
| impl<'a, T: Criterion + ?Sized> Criterion for &'a T { | ||||
|     fn evaluate(&self, lhs: &Document, rhs: &Document) -> Ordering { | ||||
|         self.deref().evaluate(lhs, rhs) | ||||
|     } | ||||
|  | ||||
|     fn into_iter(self) -> Self::IntoIter { | ||||
|         self.0.into_iter() | ||||
|     fn eq(&self, lhs: &Document, rhs: &Document) -> bool { | ||||
|         self.deref().eq(lhs, rhs) | ||||
|     } | ||||
| } | ||||
|  | ||||
| pub fn default() -> Criteria<impl Fn(&Document, &Document) -> Ordering + Copy> { | ||||
|     let functions = &[ | ||||
|         sum_of_typos, | ||||
|         number_of_words, | ||||
|         words_proximity, | ||||
|         sum_of_words_attribute, | ||||
|         sum_of_words_position, | ||||
|         exact, | ||||
|         document_id, | ||||
|     ]; | ||||
| impl<T: Criterion + ?Sized> Criterion for Box<T> { | ||||
|     fn evaluate(&self, lhs: &Document, rhs: &Document) -> Ordering { | ||||
|         self.deref().evaluate(lhs, rhs) | ||||
|     } | ||||
|  | ||||
|     let mut criteria = Criteria::with_capacity(functions.len()); | ||||
|     for f in functions { criteria.push(f) } | ||||
|     criteria | ||||
|     fn eq(&self, lhs: &Document, rhs: &Document) -> bool { | ||||
|         self.deref().eq(lhs, rhs) | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[derive(Debug, Clone, Copy)] | ||||
| pub struct DocumentId; | ||||
|  | ||||
| impl Criterion for DocumentId { | ||||
|     fn evaluate(&self, lhs: &Document, rhs: &Document) -> Ordering { | ||||
|         lhs.id.cmp(&rhs.id) | ||||
|     } | ||||
| } | ||||
|  | ||||
| pub fn default() -> Vec<Box<dyn Criterion>> { | ||||
|     vec![ | ||||
|         Box::new(SumOfTypos), | ||||
|         Box::new(NumberOfWords), | ||||
|         Box::new(WordsProximity), | ||||
|         Box::new(SumOfWordsAttribute), | ||||
|         Box::new(SumOfWordsPosition), | ||||
|         Box::new(Exact), | ||||
|     ] | ||||
| } | ||||
|   | ||||
| @@ -2,16 +2,21 @@ use std::cmp::Ordering; | ||||
| use group_by::GroupBy; | ||||
| use crate::Match; | ||||
| use crate::rank::{match_query_index, Document}; | ||||
| use crate::rank::criterion::Criterion; | ||||
|  | ||||
| #[inline] | ||||
| fn number_of_query_words(matches: &[Match]) -> usize { | ||||
|     GroupBy::new(matches, match_query_index).count() | ||||
| } | ||||
|  | ||||
| #[inline] | ||||
| pub fn number_of_words(lhs: &Document, rhs: &Document) -> Ordering { | ||||
|     let lhs = number_of_query_words(&lhs.matches); | ||||
|     let rhs = number_of_query_words(&rhs.matches); | ||||
| #[derive(Debug, Clone, Copy)] | ||||
| pub struct NumberOfWords; | ||||
|  | ||||
|     lhs.cmp(&rhs).reverse() | ||||
| impl Criterion for NumberOfWords { | ||||
|     fn evaluate(&self, lhs: &Document, rhs: &Document) -> Ordering { | ||||
|         let lhs = number_of_query_words(&lhs.matches); | ||||
|         let rhs = number_of_query_words(&rhs.matches); | ||||
|  | ||||
|         lhs.cmp(&rhs).reverse() | ||||
|     } | ||||
| } | ||||
|   | ||||
| @@ -2,6 +2,7 @@ use std::cmp::Ordering; | ||||
| use group_by::GroupBy; | ||||
| use crate::Match; | ||||
| use crate::rank::{match_query_index, Document}; | ||||
| use crate::rank::criterion::Criterion; | ||||
|  | ||||
| #[inline] | ||||
| fn sum_matches_typos(matches: &[Match]) -> i8 { | ||||
| @@ -18,14 +19,19 @@ fn sum_matches_typos(matches: &[Match]) -> i8 { | ||||
|     sum_typos - number_words | ||||
| } | ||||
|  | ||||
| #[inline] | ||||
| pub fn sum_of_typos(lhs: &Document, rhs: &Document) -> Ordering { | ||||
|     let lhs = sum_matches_typos(&lhs.matches); | ||||
|     let rhs = sum_matches_typos(&rhs.matches); | ||||
| #[derive(Debug, Clone, Copy)] | ||||
| pub struct SumOfTypos; | ||||
|  | ||||
|     lhs.cmp(&rhs) | ||||
| impl Criterion for SumOfTypos { | ||||
|     fn evaluate(&self, lhs: &Document, rhs: &Document) -> Ordering { | ||||
|         let lhs = sum_matches_typos(&lhs.matches); | ||||
|         let rhs = sum_matches_typos(&rhs.matches); | ||||
|  | ||||
|         lhs.cmp(&rhs) | ||||
|     } | ||||
| } | ||||
|  | ||||
|  | ||||
| #[cfg(test)] | ||||
| mod tests { | ||||
|     use super::*; | ||||
| @@ -42,7 +48,7 @@ mod tests { | ||||
|                 Match { query_index: 1, distance: 0, attribute: 0, attribute_index: 2, is_exact: false }, | ||||
|             ]; | ||||
|             Document { | ||||
|                 document_id: 0, | ||||
|                 id: 0, | ||||
|                 matches: matches, | ||||
|             } | ||||
|         }; | ||||
| @@ -53,12 +59,12 @@ mod tests { | ||||
|                 Match { query_index: 1, distance: 0, attribute: 0, attribute_index: 2, is_exact: false }, | ||||
|             ]; | ||||
|             Document { | ||||
|                 document_id: 1, | ||||
|                 id: 1, | ||||
|                 matches: matches, | ||||
|             } | ||||
|         }; | ||||
|  | ||||
|         assert_eq!(sum_of_typos(&doc0, &doc1), Ordering::Less); | ||||
|         assert_eq!(SumOfTypos.evaluate(&doc0, &doc1), Ordering::Less); | ||||
|     } | ||||
|  | ||||
|     // typing: "bouton manchette" | ||||
| @@ -73,7 +79,7 @@ mod tests { | ||||
|                 Match { query_index: 1, distance: 0, attribute: 0, attribute_index: 1, is_exact: false }, | ||||
|             ]; | ||||
|             Document { | ||||
|                 document_id: 0, | ||||
|                 id: 0, | ||||
|                 matches: matches, | ||||
|             } | ||||
|         }; | ||||
| @@ -83,12 +89,12 @@ mod tests { | ||||
|                 Match { query_index: 0, distance: 0, attribute: 0, attribute_index: 0, is_exact: false }, | ||||
|             ]; | ||||
|             Document { | ||||
|                 document_id: 1, | ||||
|                 id: 1, | ||||
|                 matches: matches, | ||||
|             } | ||||
|         }; | ||||
|  | ||||
|         assert_eq!(sum_of_typos(&doc0, &doc1), Ordering::Less); | ||||
|         assert_eq!(SumOfTypos.evaluate(&doc0, &doc1), Ordering::Less); | ||||
|     } | ||||
|  | ||||
|     // typing: "bouton manchztte" | ||||
| @@ -103,7 +109,7 @@ mod tests { | ||||
|                 Match { query_index: 1, distance: 1, attribute: 0, attribute_index: 1, is_exact: false }, | ||||
|             ]; | ||||
|             Document { | ||||
|                 document_id: 0, | ||||
|                 id: 0, | ||||
|                 matches: matches, | ||||
|             } | ||||
|         }; | ||||
| @@ -113,11 +119,11 @@ mod tests { | ||||
|                 Match { query_index: 0, distance: 0, attribute: 0, attribute_index: 0, is_exact: false }, | ||||
|             ]; | ||||
|             Document { | ||||
|                 document_id: 1, | ||||
|                 id: 1, | ||||
|                 matches: matches, | ||||
|             } | ||||
|         }; | ||||
|  | ||||
|         assert_eq!(sum_of_typos(&doc0, &doc1), Ordering::Equal); | ||||
|         assert_eq!(SumOfTypos.evaluate(&doc0, &doc1), Ordering::Equal); | ||||
|     } | ||||
| } | ||||
|   | ||||
| @@ -2,6 +2,7 @@ use std::cmp::Ordering; | ||||
| use group_by::GroupBy; | ||||
| use crate::Match; | ||||
| use crate::rank::{match_query_index, Document}; | ||||
| use crate::rank::criterion::Criterion; | ||||
|  | ||||
| #[inline] | ||||
| fn sum_matches_attributes(matches: &[Match]) -> u8 { | ||||
| @@ -12,10 +13,14 @@ fn sum_matches_attributes(matches: &[Match]) -> u8 { | ||||
|     }).sum() | ||||
| } | ||||
|  | ||||
| #[inline] | ||||
| pub fn sum_of_words_attribute(lhs: &Document, rhs: &Document) -> Ordering { | ||||
|     let lhs = sum_matches_attributes(&lhs.matches); | ||||
|     let rhs = sum_matches_attributes(&rhs.matches); | ||||
| #[derive(Debug, Clone, Copy)] | ||||
| pub struct SumOfWordsAttribute; | ||||
|  | ||||
|     lhs.cmp(&rhs) | ||||
| impl Criterion for SumOfWordsAttribute { | ||||
|     fn evaluate(&self, lhs: &Document, rhs: &Document) -> Ordering { | ||||
|         let lhs = sum_matches_attributes(&lhs.matches); | ||||
|         let rhs = sum_matches_attributes(&rhs.matches); | ||||
|  | ||||
|         lhs.cmp(&rhs) | ||||
|     } | ||||
| } | ||||
|   | ||||
| @@ -2,6 +2,7 @@ use std::cmp::Ordering; | ||||
| use group_by::GroupBy; | ||||
| use crate::Match; | ||||
| use crate::rank::{match_query_index, Document}; | ||||
| use crate::rank::criterion::Criterion; | ||||
|  | ||||
| #[inline] | ||||
| fn sum_matches_attribute_index(matches: &[Match]) -> u32 { | ||||
| @@ -12,10 +13,14 @@ fn sum_matches_attribute_index(matches: &[Match]) -> u32 { | ||||
|     }).sum() | ||||
| } | ||||
|  | ||||
| #[inline] | ||||
| pub fn sum_of_words_position(lhs: &Document, rhs: &Document) -> Ordering { | ||||
|     let lhs = sum_matches_attribute_index(&lhs.matches); | ||||
|     let rhs = sum_matches_attribute_index(&rhs.matches); | ||||
| #[derive(Debug, Clone, Copy)] | ||||
| pub struct SumOfWordsPosition; | ||||
|  | ||||
|     lhs.cmp(&rhs) | ||||
| impl Criterion for SumOfWordsPosition { | ||||
|     fn evaluate(&self, lhs: &Document, rhs: &Document) -> Ordering { | ||||
|         let lhs = sum_matches_attribute_index(&lhs.matches); | ||||
|         let rhs = sum_matches_attribute_index(&rhs.matches); | ||||
|  | ||||
|         lhs.cmp(&rhs) | ||||
|     } | ||||
| } | ||||
|   | ||||
| @@ -2,6 +2,7 @@ use std::cmp::{self, Ordering}; | ||||
| use group_by::GroupBy; | ||||
| use crate::Match; | ||||
| use crate::rank::{match_query_index, Document}; | ||||
| use crate::rank::criterion::Criterion; | ||||
|  | ||||
| const MAX_DISTANCE: u32 = 8; | ||||
|  | ||||
| @@ -42,10 +43,19 @@ fn matches_proximity(matches: &[Match]) -> u32 { | ||||
|     proximity | ||||
| } | ||||
|  | ||||
| pub fn words_proximity(lhs: &Document, rhs: &Document) -> Ordering { | ||||
|     matches_proximity(&lhs.matches).cmp(&matches_proximity(&rhs.matches)) | ||||
| #[derive(Debug, Clone, Copy)] | ||||
| pub struct WordsProximity; | ||||
|  | ||||
| impl Criterion for WordsProximity { | ||||
|     fn evaluate(&self, lhs: &Document, rhs: &Document) -> Ordering { | ||||
|         let lhs = matches_proximity(&lhs.matches); | ||||
|         let rhs = matches_proximity(&rhs.matches); | ||||
|  | ||||
|         lhs.cmp(&rhs) | ||||
|     } | ||||
| } | ||||
|  | ||||
|  | ||||
| #[cfg(test)] | ||||
| mod tests { | ||||
|     use super::*; | ||||
|   | ||||
| @@ -3,7 +3,7 @@ mod ranked_stream; | ||||
|  | ||||
| use crate::{Match, DocumentId}; | ||||
|  | ||||
| pub use self::ranked_stream::{RankedStream, Config}; | ||||
| pub use self::ranked_stream::{RankedStreamBuilder, RankedStream}; | ||||
|  | ||||
| #[inline] | ||||
| fn match_query_index(a: &Match, b: &Match) -> bool { | ||||
| @@ -18,10 +18,10 @@ pub struct Document { | ||||
|  | ||||
| impl Document { | ||||
|     pub fn new(doc: DocumentId, match_: Match) -> Self { | ||||
|         Self::from_sorted_matches(doc, vec![match_]) | ||||
|         unsafe { Self::from_sorted_matches(doc, vec![match_]) } | ||||
|     } | ||||
|  | ||||
|     pub fn from_sorted_matches(id: DocumentId, matches: Vec<Match>) -> Self { | ||||
|     pub unsafe fn from_sorted_matches(id: DocumentId, matches: Vec<Match>) -> Self { | ||||
|         Self { id, matches } | ||||
|     } | ||||
| } | ||||
|   | ||||
| @@ -1,4 +1,3 @@ | ||||
| use std::cmp::Ordering; | ||||
| use std::rc::Rc; | ||||
| use std::{mem, vec}; | ||||
|  | ||||
| @@ -8,134 +7,97 @@ use group_by::GroupByMut; | ||||
|  | ||||
| use crate::automaton::{DfaExt, AutomatonExt}; | ||||
| use crate::metadata::Metadata; | ||||
| use crate::metadata::ops::{OpBuilder, Union}; | ||||
| use crate::rank::criterion::Criteria; | ||||
| use crate::metadata::ops::OpBuilder; | ||||
| use crate::rank::criterion::Criterion; | ||||
| use crate::rank::Document; | ||||
| use crate::{Match, DocumentId}; | ||||
| use crate::Match; | ||||
|  | ||||
| pub struct Config<'m, F> { | ||||
|     pub criteria: Criteria<F>, | ||||
|     pub metadata: &'m Metadata, | ||||
|     pub automatons: Vec<DfaExt>, | ||||
|     pub limit: usize, | ||||
| #[derive(Clone)] | ||||
| pub struct RankedStreamBuilder<'m, C> { | ||||
|     metadata: &'m Metadata, | ||||
|     automatons: Vec<Rc<DfaExt>>, | ||||
|     criteria: Vec<C>, | ||||
| } | ||||
|  | ||||
| pub struct RankedStream<'m, F>(RankedStreamInner<'m, F>); | ||||
|  | ||||
| impl<'m, F> RankedStream<'m, F> { | ||||
|     pub fn new(config: Config<'m, F>) -> Self { | ||||
|         let automatons: Vec<_> = config.automatons.into_iter().map(Rc::new).collect(); | ||||
|         let mut builder = OpBuilder::with_automatons(automatons.clone()); | ||||
|         builder.push(config.metadata); | ||||
|  | ||||
|         let inner = RankedStreamInner::Fed { | ||||
|             inner: builder.union(), | ||||
|             automatons: automatons, | ||||
|             criteria: config.criteria, | ||||
|             limit: config.limit, | ||||
|             matches: FnvHashMap::default(), | ||||
|         }; | ||||
|  | ||||
|         RankedStream(inner) | ||||
| impl<'m, C> RankedStreamBuilder<'m, C> { | ||||
|     pub fn new(metadata: &'m Metadata, automatons: Vec<DfaExt>) -> Self { | ||||
|         RankedStreamBuilder { | ||||
|             metadata: metadata, | ||||
|             automatons: automatons.into_iter().map(Rc::new).collect(), | ||||
|             criteria: Vec::new(), // hummm...  prefer the criterion::default() ones ! | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl<'m, 'a, F> fst::Streamer<'a> for RankedStream<'m, F> | ||||
| where F: Fn(&Document, &Document) -> Ordering + Copy, | ||||
| { | ||||
|     type Item = Document; | ||||
|  | ||||
|     fn next(&'a mut self) -> Option<Self::Item> { | ||||
|         self.0.next() | ||||
|     pub fn criteria(&mut self, criteria: Vec<C>) { | ||||
|         self.criteria = criteria; | ||||
|     } | ||||
| } | ||||
|  | ||||
| enum RankedStreamInner<'m, F> { | ||||
|     Fed { | ||||
|         inner: Union<'m>, | ||||
|         automatons: Vec<Rc<DfaExt>>, | ||||
|         criteria: Criteria<F>, | ||||
|         limit: usize, | ||||
|         matches: FnvHashMap<DocumentId, Vec<Match>>, | ||||
|     }, | ||||
|     Pours { | ||||
|         inner: vec::IntoIter<Document>, | ||||
|     }, | ||||
| } | ||||
|     pub fn build(&self) -> RankedStream<C> { | ||||
|         let mut builder = OpBuilder::with_automatons(self.automatons.clone()); | ||||
|         builder.push(self.metadata); | ||||
|  | ||||
| impl<'m, 'a, F> fst::Streamer<'a> for RankedStreamInner<'m, F> | ||||
| where F: Fn(&Document, &Document) -> Ordering + Copy, | ||||
| { | ||||
|     type Item = Document; | ||||
|  | ||||
|     fn next(&'a mut self) -> Option<Self::Item> { | ||||
|         loop { | ||||
|             match self { | ||||
|                 RankedStreamInner::Fed { inner, automatons, criteria, limit, matches } => { | ||||
|                     match inner.next() { | ||||
|                         Some((string, indexed_values)) => { | ||||
|                             for iv in indexed_values { | ||||
|                                 let automaton = &automatons[iv.index]; | ||||
|                                 let distance = automaton.eval(string).to_u8(); | ||||
|                                 let same_length = string.len() == automaton.query_len(); | ||||
|  | ||||
|                                 for di in iv.doc_indexes.as_slice() { | ||||
|                                     let match_ = Match { | ||||
|                                         query_index: iv.index as u32, | ||||
|                                         distance: distance, | ||||
|                                         attribute: di.attribute, | ||||
|                                         attribute_index: di.attribute_index, | ||||
|                                         is_exact: distance == 0 && same_length, | ||||
|                                     }; | ||||
|                                     matches.entry(di.document) | ||||
|                                            .or_insert_with(Vec::new) | ||||
|                                            .push(match_); | ||||
|                                 } | ||||
|                             } | ||||
|                         }, | ||||
|                         None => { | ||||
|                             let matches = mem::replace(matches, FnvHashMap::default()); | ||||
|                             let criteria = mem::replace(criteria, Criteria::new()); | ||||
|                             *self = RankedStreamInner::Pours { | ||||
|                                 inner: matches_into_iter(matches, criteria, *limit).into_iter() | ||||
|                             }; | ||||
|                         }, | ||||
|                     } | ||||
|                 }, | ||||
|                 RankedStreamInner::Pours { inner } => { | ||||
|                     return inner.next() | ||||
|                 }, | ||||
|             } | ||||
|         RankedStream { | ||||
|             stream: builder.union(), | ||||
|             automatons: &self.automatons, | ||||
|             criteria: &self.criteria, | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| fn matches_into_iter<F>(matches: FnvHashMap<DocumentId, Vec<Match>>, | ||||
|                         criteria: Criteria<F>, | ||||
|                         limit: usize) -> vec::IntoIter<Document> | ||||
| where F: Fn(&Document, &Document) -> Ordering + Copy, | ||||
| { | ||||
|     let mut documents: Vec<_> = matches.into_iter().map(|(id, mut matches)| { | ||||
|         matches.sort_unstable(); | ||||
|         Document::from_sorted_matches(id, matches) | ||||
|     }).collect(); | ||||
| pub struct RankedStream<'a, 'm, C> { | ||||
|     stream: crate::metadata::ops::Union<'m>, | ||||
|     automatons: &'a [Rc<DfaExt>], | ||||
|     criteria: &'a [C], | ||||
| } | ||||
|  | ||||
|     let mut groups = vec![documents.as_mut_slice()]; | ||||
| impl<'a, 'm, C> RankedStream<'a, 'm, C> { | ||||
|     pub fn retrieve_documents(&mut self, limit: usize) -> Vec<Document> | ||||
|     where C: Criterion | ||||
|     { | ||||
|         let mut matches = FnvHashMap::default(); | ||||
|  | ||||
|     for sort in criteria { | ||||
|         let temp = mem::replace(&mut groups, Vec::new()); | ||||
|         let mut computed = 0; | ||||
|         while let Some((string, indexed_values)) = self.stream.next() { | ||||
|             for iv in indexed_values { | ||||
|                 let automaton = &self.automatons[iv.index]; | ||||
|                 let distance = automaton.eval(string).to_u8(); | ||||
|                 let is_exact = distance == 0 && string.len() == automaton.query_len(); | ||||
|  | ||||
|         'grp: for group in temp { | ||||
|             group.sort_unstable_by(sort); | ||||
|             for group in GroupByMut::new(group, |a, b| sort(a, b) == Ordering::Equal) { | ||||
|                 computed += group.len(); | ||||
|                 groups.push(group); | ||||
|                 if computed >= limit { break 'grp } | ||||
|                 for di in iv.doc_indexes.as_slice() { | ||||
|                     let match_ = Match { | ||||
|                         query_index: iv.index as u32, | ||||
|                         distance: distance, | ||||
|                         attribute: di.attribute, | ||||
|                         attribute_index: di.attribute_index, | ||||
|                         is_exact: is_exact, | ||||
|                     }; | ||||
|                     matches.entry(di.document).or_insert_with(Vec::new).push(match_); | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     documents.truncate(limit); | ||||
|     documents.into_iter() | ||||
|         // collect matches from an HashMap into a Vec | ||||
|         let mut documents: Vec<_> = matches.into_iter().map(|(id, mut matches)| { | ||||
|             matches.sort_unstable(); | ||||
|             unsafe { Document::from_sorted_matches(id, matches) } | ||||
|         }).collect(); | ||||
|  | ||||
|         let mut groups = vec![documents.as_mut_slice()]; | ||||
|  | ||||
|         for criterion in self.criteria { | ||||
|             let temp = mem::replace(&mut groups, Vec::new()); | ||||
|             let mut computed = 0; | ||||
|  | ||||
|             'grp: for group in temp { | ||||
|                 group.sort_unstable_by(|a, b| criterion.evaluate(a, b)); | ||||
|                 for group in GroupByMut::new(group, |a, b| criterion.eq(a, b)) { | ||||
|                     computed += group.len(); | ||||
|                     groups.push(group); | ||||
|                     if computed >= limit { break 'grp } | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         documents.truncate(limit); | ||||
|         documents | ||||
|     } | ||||
| } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user