mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-24 20:46:27 +00:00 
			
		
		
		
	Merge pull request #188 from meilisearch/exactness-criterion
Exactness criterion
This commit is contained in:
		| @@ -65,6 +65,16 @@ impl FieldsIdsMap { | |||||||
|     pub fn iter(&self) -> impl Iterator<Item=(FieldId, &str)> { |     pub fn iter(&self) -> impl Iterator<Item=(FieldId, &str)> { | ||||||
|         self.ids_names.iter().map(|(id, name)| (*id, name.as_str())) |         self.ids_names.iter().map(|(id, name)| (*id, name.as_str())) | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     /// Iterate over the ids in the order of the ids. | ||||||
|  |     pub fn ids<'a>(&'a self) -> impl Iterator<Item=FieldId> + 'a { | ||||||
|  |         self.ids_names.keys().copied() | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     /// Iterate over the names in the order of the ids. | ||||||
|  |     pub fn names(&self) -> impl Iterator<Item=&str> { | ||||||
|  |         self.ids_names.values().map(AsRef::as_ref) | ||||||
|  |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| impl Default for FieldsIdsMap { | impl Default for FieldsIdsMap { | ||||||
|   | |||||||
							
								
								
									
										338
									
								
								milli/src/search/criteria/exactness.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										338
									
								
								milli/src/search/criteria/exactness.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,338 @@ | |||||||
|  | use std::{collections::HashMap, mem}; | ||||||
|  |  | ||||||
|  | use log::debug; | ||||||
|  | use roaring::RoaringBitmap; | ||||||
|  | use itertools::Itertools; | ||||||
|  | use std::ops::BitOr; | ||||||
|  |  | ||||||
|  | use crate::search::query_tree::{Operation, PrimitiveQueryPart}; | ||||||
|  | use crate::search::criteria::{ | ||||||
|  |     Context, | ||||||
|  |     Criterion, | ||||||
|  |     CriterionParameters, | ||||||
|  |     CriterionResult, | ||||||
|  |     resolve_query_tree, | ||||||
|  | }; | ||||||
|  | use crate::TreeLevel; | ||||||
|  |  | ||||||
|  | pub struct Exactness<'t> { | ||||||
|  |     ctx: &'t dyn Context<'t>, | ||||||
|  |     query_tree: Option<Operation>, | ||||||
|  |     state: Option<State>, | ||||||
|  |     bucket_candidates: RoaringBitmap, | ||||||
|  |     parent: Box<dyn Criterion + 't>, | ||||||
|  |     query: Vec<ExactQueryPart>, | ||||||
|  | } | ||||||
|  |  | ||||||
|  | impl<'t> Exactness<'t> { | ||||||
|  |     pub fn new(ctx: &'t dyn Context<'t>, parent: Box<dyn Criterion + 't>, primitive_query: &[PrimitiveQueryPart]) -> heed::Result<Self> { | ||||||
|  |         let mut query: Vec<_> = Vec::with_capacity(primitive_query.len()); | ||||||
|  |         for part in primitive_query { | ||||||
|  |             query.push(ExactQueryPart::from_primitive_query_part(ctx, part)?); | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         Ok(Exactness { | ||||||
|  |             ctx, | ||||||
|  |             query_tree: None, | ||||||
|  |             state: None, | ||||||
|  |             bucket_candidates: RoaringBitmap::new(), | ||||||
|  |             parent, | ||||||
|  |             query, | ||||||
|  |         }) | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | impl<'t> Criterion for Exactness<'t> { | ||||||
|  |     #[logging_timer::time("Exactness::{}")] | ||||||
|  |     fn next(&mut self, params: &mut CriterionParameters) -> anyhow::Result<Option<CriterionResult>> { | ||||||
|  |         // remove excluded candidates when next is called, instead of doing it in the loop. | ||||||
|  |         if let Some(state) = self.state.as_mut() { | ||||||
|  |             state.difference_with(params.excluded_candidates); | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         loop { | ||||||
|  |             debug!("Exactness at state {:?}", self.state); | ||||||
|  |  | ||||||
|  |             match self.state.as_mut() { | ||||||
|  |                 Some(state) if state.is_empty() => { | ||||||
|  |                     // reset state | ||||||
|  |                     self.state = None; | ||||||
|  |                     self.query_tree = None; | ||||||
|  |                 }, | ||||||
|  |                 Some(state) => { | ||||||
|  |                     let (candidates, state) = resolve_state(self.ctx, mem::take(state), &self.query)?; | ||||||
|  |                     self.state = state; | ||||||
|  |  | ||||||
|  |                     return Ok(Some(CriterionResult { | ||||||
|  |                         query_tree: self.query_tree.clone(), | ||||||
|  |                         candidates: Some(candidates), | ||||||
|  |                         bucket_candidates: mem::take(&mut self.bucket_candidates), | ||||||
|  |                     })); | ||||||
|  |                 }, | ||||||
|  |                 None => { | ||||||
|  |                     match self.parent.next(params)? { | ||||||
|  |                         Some(CriterionResult { query_tree: Some(query_tree), candidates, bucket_candidates }) => { | ||||||
|  |                             let candidates = match candidates { | ||||||
|  |                                 Some(candidates) => candidates, | ||||||
|  |                                 None => resolve_query_tree(self.ctx, &query_tree, &mut HashMap::new(), params.wdcache)?, | ||||||
|  |                             }; | ||||||
|  |                             self.state = Some(State::new(candidates)); | ||||||
|  |                             self.query_tree = Some(query_tree); | ||||||
|  |                             self.bucket_candidates |= bucket_candidates; | ||||||
|  |                         }, | ||||||
|  |                         Some(CriterionResult { query_tree, candidates, bucket_candidates }) => { | ||||||
|  |                             return Ok(Some(CriterionResult { | ||||||
|  |                                 query_tree, | ||||||
|  |                                 candidates, | ||||||
|  |                                 bucket_candidates, | ||||||
|  |                             })); | ||||||
|  |                         }, | ||||||
|  |                         None => return Ok(None), | ||||||
|  |                     } | ||||||
|  |                 }, | ||||||
|  |             } | ||||||
|  |          } | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #[derive(Debug)] | ||||||
|  | enum State { | ||||||
|  |     /// Extract the documents that have an attribute that contains exactly the query. | ||||||
|  |     ExactAttribute(RoaringBitmap), | ||||||
|  |     /// Extract the documents that have an attribute that starts with exactly the query. | ||||||
|  |     AttributeStartsWith(RoaringBitmap), | ||||||
|  |     /// Rank the remaining documents by the number of exact words contained. | ||||||
|  |     ExactWords(RoaringBitmap), | ||||||
|  |     Remainings(Vec<RoaringBitmap>), | ||||||
|  | } | ||||||
|  |  | ||||||
|  | impl State { | ||||||
|  |     fn new(candidates: RoaringBitmap) -> Self { | ||||||
|  |         Self::ExactAttribute(candidates) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     fn difference_with(&mut self, lhs: &RoaringBitmap) { | ||||||
|  |         match self { | ||||||
|  |             Self::ExactAttribute(candidates) | | ||||||
|  |             Self::AttributeStartsWith(candidates) | | ||||||
|  |             Self::ExactWords(candidates) => *candidates -= lhs, | ||||||
|  |             Self::Remainings(candidates_array) => { | ||||||
|  |                 candidates_array.iter_mut().for_each(|candidates| *candidates -= lhs); | ||||||
|  |                 candidates_array.retain(|candidates| !candidates.is_empty()); | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     fn is_empty(&self) -> bool { | ||||||
|  |         match self { | ||||||
|  |             Self::ExactAttribute(candidates) | | ||||||
|  |             Self::AttributeStartsWith(candidates) | | ||||||
|  |             Self::ExactWords(candidates) => candidates.is_empty(), | ||||||
|  |             Self::Remainings(candidates_array) => { | ||||||
|  |                 candidates_array.iter().all(RoaringBitmap::is_empty) | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | impl Default for State { | ||||||
|  |     fn default() -> Self { | ||||||
|  |         Self::Remainings(vec![]) | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #[logging_timer::time("Exactness::{}")] | ||||||
|  | fn resolve_state( | ||||||
|  |     ctx: &dyn Context, | ||||||
|  |     state: State, | ||||||
|  |     query: &[ExactQueryPart], | ||||||
|  | ) -> anyhow::Result<(RoaringBitmap, Option<State>)> | ||||||
|  | { | ||||||
|  |     use State::*; | ||||||
|  |     match state { | ||||||
|  |         ExactAttribute(mut allowed_candidates) => { | ||||||
|  |             let query_len = query.len() as u32; | ||||||
|  |             let mut candidates = RoaringBitmap::new(); | ||||||
|  |             let attributes_ids = ctx.searchable_fields_ids()?; | ||||||
|  |             for id in attributes_ids { | ||||||
|  |                 if let Some(attribute_allowed_docids) = ctx.field_id_len_docids(id, query_len)? { | ||||||
|  |                     let mut attribute_candidates_array = attribute_start_with_docids(ctx, id as u32, query)?; | ||||||
|  |                     attribute_candidates_array.push(attribute_allowed_docids); | ||||||
|  |                     candidates |= intersection_of(attribute_candidates_array.iter().collect()); | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |  | ||||||
|  |             // only keep allowed candidates | ||||||
|  |             candidates &= &allowed_candidates; | ||||||
|  |             // remove current candidates from allowed candidates | ||||||
|  |             allowed_candidates -= &candidates; | ||||||
|  |             Ok((candidates, Some(AttributeStartsWith(allowed_candidates)))) | ||||||
|  |  | ||||||
|  |         }, | ||||||
|  |         AttributeStartsWith(mut allowed_candidates) => { | ||||||
|  |             let mut candidates = RoaringBitmap::new(); | ||||||
|  |             let attributes_ids = ctx.searchable_fields_ids()?; | ||||||
|  |             for id in attributes_ids { | ||||||
|  |                 let attribute_candidates_array = attribute_start_with_docids(ctx, id as u32, query)?; | ||||||
|  |                 candidates |= intersection_of(attribute_candidates_array.iter().collect()); | ||||||
|  |             } | ||||||
|  |  | ||||||
|  |             // only keep allowed candidates | ||||||
|  |             candidates &= &allowed_candidates; | ||||||
|  |             // remove current candidates from allowed candidates | ||||||
|  |             allowed_candidates -= &candidates; | ||||||
|  |             Ok((candidates, Some(ExactWords(allowed_candidates)))) | ||||||
|  |         }, | ||||||
|  |         ExactWords(mut allowed_candidates) => { | ||||||
|  |             let number_of_part = query.len(); | ||||||
|  |             let mut parts_candidates_array = Vec::with_capacity(number_of_part); | ||||||
|  |  | ||||||
|  |             for part in query { | ||||||
|  |                 let mut candidates = RoaringBitmap::new(); | ||||||
|  |                 use ExactQueryPart::*; | ||||||
|  |                 match part { | ||||||
|  |                     Synonyms(synonyms) => { | ||||||
|  |                         for synonym in synonyms { | ||||||
|  |                             if let Some(synonym_candidates) = ctx.word_docids(synonym)? { | ||||||
|  |                                 candidates |= synonym_candidates; | ||||||
|  |                             } | ||||||
|  |                         } | ||||||
|  |                     }, | ||||||
|  |                     // compute intersection on pair of words with a proximity of 0. | ||||||
|  |                     Phrase(phrase) => { | ||||||
|  |                         let mut bitmaps = Vec::with_capacity(phrase.len().saturating_sub(1)); | ||||||
|  |                         for words in phrase.windows(2) { | ||||||
|  |                             if let [left, right] = words { | ||||||
|  |                                 match ctx.word_pair_proximity_docids(left, right, 0)? { | ||||||
|  |                                     Some(docids) => bitmaps.push(docids), | ||||||
|  |                                     None => { | ||||||
|  |                                         bitmaps.clear(); | ||||||
|  |                                         break | ||||||
|  |                                     }, | ||||||
|  |                                 } | ||||||
|  |                             } | ||||||
|  |                         } | ||||||
|  |                         candidates |= intersection_of(bitmaps.iter().collect()); | ||||||
|  |                     } | ||||||
|  |                 } | ||||||
|  |                 parts_candidates_array.push(candidates); | ||||||
|  |             } | ||||||
|  |  | ||||||
|  |             let mut candidates_array = Vec::new(); | ||||||
|  |  | ||||||
|  |             // compute documents that contain all exact words. | ||||||
|  |             let mut all_exact_candidates = intersection_of(parts_candidates_array.iter().collect()); | ||||||
|  |             all_exact_candidates &= &allowed_candidates; | ||||||
|  |             allowed_candidates -= &all_exact_candidates; | ||||||
|  |  | ||||||
|  |             // push the result of combinations of exact words grouped by the number of exact words contained by documents. | ||||||
|  |             for c_count in (1..number_of_part).rev() { | ||||||
|  |                 let mut combinations_candidates = parts_candidates_array | ||||||
|  |                     .iter() | ||||||
|  |                     // create all `c_count` combinations of exact words | ||||||
|  |                     .combinations(c_count) | ||||||
|  |                     // intersect each word candidates in combinations | ||||||
|  |                     .map(intersection_of) | ||||||
|  |                     // union combinations of `c_count` exact words | ||||||
|  |                     .fold(RoaringBitmap::new(),  RoaringBitmap::bitor); | ||||||
|  |                 // only keep allowed candidates | ||||||
|  |                 combinations_candidates &= &allowed_candidates; | ||||||
|  |                 // remove current candidates from allowed candidates | ||||||
|  |                 allowed_candidates -= &combinations_candidates; | ||||||
|  |                 candidates_array.push(combinations_candidates); | ||||||
|  |             } | ||||||
|  |  | ||||||
|  |             // push remainings allowed candidates as the worst valid candidates | ||||||
|  |             candidates_array.push(allowed_candidates); | ||||||
|  |             // reverse the array to be able to pop candidates from the best to the worst. | ||||||
|  |             candidates_array.reverse(); | ||||||
|  |  | ||||||
|  |             Ok((all_exact_candidates, Some(Remainings(candidates_array)))) | ||||||
|  |         }, | ||||||
|  |         // pop remainings candidates until the emptiness | ||||||
|  |         Remainings(mut candidates_array) => { | ||||||
|  |             let candidates = candidates_array.pop().unwrap_or_default(); | ||||||
|  |             if !candidates_array.is_empty() { | ||||||
|  |                 Ok((candidates, Some(Remainings(candidates_array)))) | ||||||
|  |             } else { | ||||||
|  |                 Ok((candidates, None)) | ||||||
|  |             } | ||||||
|  |         }, | ||||||
|  |  | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | fn attribute_start_with_docids(ctx: &dyn Context, attribute_id: u32, query: &[ExactQueryPart]) -> heed::Result<Vec<RoaringBitmap>> { | ||||||
|  |     let lowest_level = TreeLevel::min_value(); | ||||||
|  |     let mut attribute_candidates_array = Vec::new(); | ||||||
|  |     // start from attribute first position | ||||||
|  |     let mut pos = attribute_id * 1000; | ||||||
|  |     for part in query { | ||||||
|  |         use ExactQueryPart::*; | ||||||
|  |         match part { | ||||||
|  |             Synonyms(synonyms) => { | ||||||
|  |                 let mut synonyms_candidates = RoaringBitmap::new(); | ||||||
|  |                 for word in synonyms { | ||||||
|  |                     let wc = ctx.word_level_position_docids(word, lowest_level, pos, pos)?; | ||||||
|  |                     if let Some(word_candidates) = wc { | ||||||
|  |                         synonyms_candidates |= word_candidates; | ||||||
|  |                     } | ||||||
|  |                 } | ||||||
|  |                 attribute_candidates_array.push(synonyms_candidates); | ||||||
|  |                 pos += 1; | ||||||
|  |             }, | ||||||
|  |             Phrase(phrase) => { | ||||||
|  |                 for word in phrase { | ||||||
|  |                     let wc = ctx.word_level_position_docids(word, lowest_level, pos, pos)?; | ||||||
|  |                     if let Some(word_candidates) = wc { | ||||||
|  |                         attribute_candidates_array.push(word_candidates); | ||||||
|  |                     } | ||||||
|  |                     pos += 1; | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     Ok(attribute_candidates_array) | ||||||
|  | } | ||||||
|  |  | ||||||
|  | fn intersection_of(mut rbs: Vec<&RoaringBitmap>) -> RoaringBitmap { | ||||||
|  |     rbs.sort_unstable_by_key(|rb| rb.len()); | ||||||
|  |     let mut iter = rbs.into_iter(); | ||||||
|  |     match iter.next() { | ||||||
|  |         Some(first) => iter.fold(first.clone(), |acc, rb| acc & rb), | ||||||
|  |         None => RoaringBitmap::new(), | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #[derive(Debug, Clone)] | ||||||
|  | pub enum ExactQueryPart { | ||||||
|  |     Phrase(Vec<String>), | ||||||
|  |     Synonyms(Vec<String>), | ||||||
|  | } | ||||||
|  |  | ||||||
|  | impl ExactQueryPart { | ||||||
|  |     fn from_primitive_query_part(ctx: &dyn Context, part: &PrimitiveQueryPart) -> heed::Result<Self> { | ||||||
|  |         let part = match part { | ||||||
|  |             PrimitiveQueryPart::Word(word, _) => { | ||||||
|  |                 match ctx.synonyms(word)? { | ||||||
|  |                     Some(synonyms) => { | ||||||
|  |                         let mut synonyms: Vec<_> = synonyms.into_iter().filter_map(|mut array| { | ||||||
|  |                             // keep 1 word synonyms only. | ||||||
|  |                             match array.pop() { | ||||||
|  |                                 Some(word) if array.is_empty() => Some(word), | ||||||
|  |                                 _ => None, | ||||||
|  |                             } | ||||||
|  |                         }).collect(); | ||||||
|  |                         synonyms.push(word.clone()); | ||||||
|  |                         ExactQueryPart::Synonyms(synonyms) | ||||||
|  |                     }, | ||||||
|  |                     None => ExactQueryPart::Synonyms(vec![word.clone()]), | ||||||
|  |                 } | ||||||
|  |             }, | ||||||
|  |             PrimitiveQueryPart::Phrase(phrase) => ExactQueryPart::Phrase(phrase.clone()), | ||||||
|  |         }; | ||||||
|  |  | ||||||
|  |         Ok(part) | ||||||
|  |     } | ||||||
|  | } | ||||||
| @@ -4,12 +4,13 @@ use std::borrow::Cow; | |||||||
| use anyhow::bail; | use anyhow::bail; | ||||||
| use roaring::RoaringBitmap; | use roaring::RoaringBitmap; | ||||||
|  |  | ||||||
| use crate::{TreeLevel, search::{word_derivations, WordDerivationsCache}}; | use crate::{FieldId, TreeLevel, search::{word_derivations, WordDerivationsCache}}; | ||||||
| use crate::{Index, DocumentId}; | use crate::{Index, DocumentId}; | ||||||
|  |  | ||||||
| use super::query_tree::{Operation, Query, QueryKind}; | use super::query_tree::{Operation, PrimitiveQueryPart, Query, QueryKind}; | ||||||
| use self::asc_desc::AscDesc; | use self::asc_desc::AscDesc; | ||||||
| use self::attribute::Attribute; | use self::attribute::Attribute; | ||||||
|  | use self::exactness::Exactness; | ||||||
| use self::r#final::Final; | use self::r#final::Final; | ||||||
| use self::initial::Initial; | use self::initial::Initial; | ||||||
| use self::proximity::Proximity; | use self::proximity::Proximity; | ||||||
| @@ -18,6 +19,7 @@ use self::words::Words; | |||||||
|  |  | ||||||
| mod asc_desc; | mod asc_desc; | ||||||
| mod attribute; | mod attribute; | ||||||
|  | mod exactness; | ||||||
| mod initial; | mod initial; | ||||||
| mod proximity; | mod proximity; | ||||||
| mod typo; | mod typo; | ||||||
| @@ -81,6 +83,10 @@ pub trait Context<'c> { | |||||||
|     fn docid_words_positions(&self, docid: DocumentId) -> heed::Result<HashMap<String, RoaringBitmap>>; |     fn docid_words_positions(&self, docid: DocumentId) -> heed::Result<HashMap<String, RoaringBitmap>>; | ||||||
|     fn word_position_iterator(&self, word: &str, level: TreeLevel, in_prefix_cache: bool, left: Option<u32>, right: Option<u32>) -> heed::Result<Box<dyn Iterator<Item =heed::Result<((&'c str, TreeLevel, u32, u32), RoaringBitmap)>> + 'c>>; |     fn word_position_iterator(&self, word: &str, level: TreeLevel, in_prefix_cache: bool, left: Option<u32>, right: Option<u32>) -> heed::Result<Box<dyn Iterator<Item =heed::Result<((&'c str, TreeLevel, u32, u32), RoaringBitmap)>> + 'c>>; | ||||||
|     fn word_position_last_level(&self, word: &str, in_prefix_cache: bool) -> heed::Result<Option<TreeLevel>>; |     fn word_position_last_level(&self, word: &str, in_prefix_cache: bool) -> heed::Result<Option<TreeLevel>>; | ||||||
|  |     fn synonyms(&self, word: &str) -> heed::Result<Option<Vec<Vec<String>>>>; | ||||||
|  |     fn searchable_fields_ids(&self) ->  heed::Result<Vec<FieldId>>; | ||||||
|  |     fn field_id_len_docids(&self, field_id: FieldId, len: u32) -> heed::Result<Option<RoaringBitmap>>; | ||||||
|  |     fn word_level_position_docids(&self, word: &str, level: TreeLevel, left: u32, right: u32) -> Result<Option<RoaringBitmap>, heed::Error>; | ||||||
| } | } | ||||||
| pub struct CriteriaBuilder<'t> { | pub struct CriteriaBuilder<'t> { | ||||||
|     rtxn: &'t heed::RoTxn<'t>, |     rtxn: &'t heed::RoTxn<'t>, | ||||||
| @@ -170,6 +176,26 @@ impl<'c> Context<'c> for CriteriaBuilder<'c> { | |||||||
|  |  | ||||||
|         Ok(last_level) |         Ok(last_level) | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     fn synonyms(&self, word: &str) -> heed::Result<Option<Vec<Vec<String>>>> { | ||||||
|  |         self.index.words_synonyms(self.rtxn, &[word]) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     fn searchable_fields_ids(&self) -> heed::Result<Vec<FieldId>> { | ||||||
|  |         match self.index.searchable_fields_ids(self.rtxn)? { | ||||||
|  |             Some(searchable_fields_ids) => Ok(searchable_fields_ids), | ||||||
|  |             None => Ok(self.index.fields_ids_map(self.rtxn)?.ids().collect()), | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     fn field_id_len_docids(&self, _field_id: FieldId, _len: u32) -> heed::Result<Option<RoaringBitmap>> { | ||||||
|  |         Ok(None) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     fn word_level_position_docids(&self, word: &str, level: TreeLevel, left: u32, right: u32) -> Result<Option<RoaringBitmap>, heed::Error> { | ||||||
|  |         let key = (word, level, left, right); | ||||||
|  |         self.index.word_level_position_docids.get(self.rtxn, &key) | ||||||
|  |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| impl<'t> CriteriaBuilder<'t> { | impl<'t> CriteriaBuilder<'t> { | ||||||
| @@ -182,11 +208,14 @@ impl<'t> CriteriaBuilder<'t> { | |||||||
|     pub fn build( |     pub fn build( | ||||||
|         &'t self, |         &'t self, | ||||||
|         query_tree: Option<Operation>, |         query_tree: Option<Operation>, | ||||||
|  |         primitive_query: Option<Vec<PrimitiveQueryPart>>, | ||||||
|         facet_candidates: Option<RoaringBitmap>, |         facet_candidates: Option<RoaringBitmap>, | ||||||
|     ) -> anyhow::Result<Final<'t>> |     ) -> anyhow::Result<Final<'t>> | ||||||
|     { |     { | ||||||
|         use crate::criterion::Criterion as Name; |         use crate::criterion::Criterion as Name; | ||||||
|  |  | ||||||
|  |         let primitive_query = primitive_query.unwrap_or_default(); | ||||||
|  |  | ||||||
|         let mut criterion = Box::new(Initial::new(query_tree, facet_candidates)) as Box<dyn Criterion>; |         let mut criterion = Box::new(Initial::new(query_tree, facet_candidates)) as Box<dyn Criterion>; | ||||||
|         for name in self.index.criteria(&self.rtxn)? { |         for name in self.index.criteria(&self.rtxn)? { | ||||||
|             criterion = match name { |             criterion = match name { | ||||||
| @@ -194,9 +223,9 @@ impl<'t> CriteriaBuilder<'t> { | |||||||
|                 Name::Words => Box::new(Words::new(self, criterion)), |                 Name::Words => Box::new(Words::new(self, criterion)), | ||||||
|                 Name::Proximity => Box::new(Proximity::new(self, criterion)), |                 Name::Proximity => Box::new(Proximity::new(self, criterion)), | ||||||
|                 Name::Attribute => Box::new(Attribute::new(self, criterion)), |                 Name::Attribute => Box::new(Attribute::new(self, criterion)), | ||||||
|  |                 Name::Exactness => Box::new(Exactness::new(self, criterion, &primitive_query)?), | ||||||
|                 Name::Asc(field) => Box::new(AscDesc::asc(&self.index, &self.rtxn, criterion, field)?), |                 Name::Asc(field) => Box::new(AscDesc::asc(&self.index, &self.rtxn, criterion, field)?), | ||||||
|                 Name::Desc(field) => Box::new(AscDesc::desc(&self.index, &self.rtxn, criterion, field)?), |                 Name::Desc(field) => Box::new(AscDesc::desc(&self.index, &self.rtxn, criterion, field)?), | ||||||
|                 _otherwise => criterion, |  | ||||||
|             }; |             }; | ||||||
|         } |         } | ||||||
|  |  | ||||||
| @@ -455,6 +484,22 @@ pub mod test { | |||||||
|         fn word_position_last_level(&self, _word: &str, _in_prefix_cache: bool) -> heed::Result<Option<TreeLevel>> { |         fn word_position_last_level(&self, _word: &str, _in_prefix_cache: bool) -> heed::Result<Option<TreeLevel>> { | ||||||
|             todo!() |             todo!() | ||||||
|         } |         } | ||||||
|  |  | ||||||
|  |         fn synonyms(&self, _word: &str) -> heed::Result<Option<Vec<Vec<String>>>> { | ||||||
|  |             todo!() | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         fn searchable_fields_ids(&self) ->  heed::Result<Vec<FieldId>> { | ||||||
|  |             todo!() | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         fn word_level_position_docids(&self, _word: &str, _level: TreeLevel, _left: u32, _right: u32) -> Result<Option<RoaringBitmap>, heed::Error> { | ||||||
|  |             todo!() | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         fn field_id_len_docids(&self, _field_id: FieldId, _len: u32) -> heed::Result<Option<RoaringBitmap>> { | ||||||
|  |             todo!() | ||||||
|  |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     impl<'a> Default for TestContext<'a> { |     impl<'a> Default for TestContext<'a> { | ||||||
|   | |||||||
| @@ -97,7 +97,7 @@ impl<'a> Search<'a> { | |||||||
|     pub fn execute(&self) -> anyhow::Result<SearchResult> { |     pub fn execute(&self) -> anyhow::Result<SearchResult> { | ||||||
|         // We create the query tree by spliting the query into tokens. |         // We create the query tree by spliting the query into tokens. | ||||||
|         let before = Instant::now(); |         let before = Instant::now(); | ||||||
|         let query_tree = match self.query.as_ref() { |         let (query_tree, primitive_query) = match self.query.as_ref() { | ||||||
|             Some(query) => { |             Some(query) => { | ||||||
|                 let mut builder = QueryTreeBuilder::new(self.rtxn, self.index); |                 let mut builder = QueryTreeBuilder::new(self.rtxn, self.index); | ||||||
|                 builder.optional_words(self.optional_words); |                 builder.optional_words(self.optional_words); | ||||||
| @@ -113,9 +113,9 @@ impl<'a> Search<'a> { | |||||||
|                 let analyzer = Analyzer::new(config); |                 let analyzer = Analyzer::new(config); | ||||||
|                 let result = analyzer.analyze(query); |                 let result = analyzer.analyze(query); | ||||||
|                 let tokens = result.tokens(); |                 let tokens = result.tokens(); | ||||||
|                 builder.build(tokens)? |                 builder.build(tokens)?.map_or((None, None), |(qt, pq)| (Some(qt), Some(pq))) | ||||||
|             }, |             }, | ||||||
|             None => None, |             None => (None, None), | ||||||
|         }; |         }; | ||||||
|  |  | ||||||
|         debug!("query tree: {:?} took {:.02?}", query_tree, before.elapsed()); |         debug!("query tree: {:?} took {:.02?}", query_tree, before.elapsed()); | ||||||
| @@ -135,7 +135,7 @@ impl<'a> Search<'a> { | |||||||
|         }; |         }; | ||||||
|  |  | ||||||
|         let criteria_builder = criteria::CriteriaBuilder::new(self.rtxn, self.index)?; |         let criteria_builder = criteria::CriteriaBuilder::new(self.rtxn, self.index)?; | ||||||
|         let criteria = criteria_builder.build(query_tree, facet_candidates)?; |         let criteria = criteria_builder.build(query_tree, primitive_query, facet_candidates)?; | ||||||
|  |  | ||||||
|         match self.index.distinct_attribute(self.rtxn)? { |         match self.index.distinct_attribute(self.rtxn)? { | ||||||
|             None => self.perform_sort(NoopDistinct, matching_words, criteria), |             None => self.perform_sort(NoopDistinct, matching_words, criteria), | ||||||
|   | |||||||
| @@ -228,11 +228,12 @@ impl<'a> QueryTreeBuilder<'a> { | |||||||
|     /// - if `authorize_typos` is set to `false` the query tree will be generated |     /// - if `authorize_typos` is set to `false` the query tree will be generated | ||||||
|     ///   forcing all query words to match documents without any typo |     ///   forcing all query words to match documents without any typo | ||||||
|     ///   (the criterion `typo` will be ignored) |     ///   (the criterion `typo` will be ignored) | ||||||
|     pub fn build(&self, query: TokenStream) -> anyhow::Result<Option<Operation>> { |     pub fn build(&self, query: TokenStream) -> anyhow::Result<Option<(Operation, PrimitiveQuery)>> { | ||||||
|         let stop_words = self.index.stop_words(self.rtxn)?; |         let stop_words = self.index.stop_words(self.rtxn)?; | ||||||
|         let primitive_query = create_primitive_query(query, stop_words, self.words_limit); |         let primitive_query = create_primitive_query(query, stop_words, self.words_limit); | ||||||
|         if !primitive_query.is_empty() { |         if !primitive_query.is_empty() { | ||||||
|             create_query_tree(self, self.optional_words, self.authorize_typos, primitive_query).map(Some) |             let qt = create_query_tree(self, self.optional_words, self.authorize_typos, &primitive_query)?; | ||||||
|  |             Ok(Some((qt, primitive_query))) | ||||||
|         } else { |         } else { | ||||||
|             Ok(None) |             Ok(None) | ||||||
|         } |         } | ||||||
| @@ -340,7 +341,7 @@ fn create_query_tree( | |||||||
|     ctx: &impl Context, |     ctx: &impl Context, | ||||||
|     optional_words: bool, |     optional_words: bool, | ||||||
|     authorize_typos: bool, |     authorize_typos: bool, | ||||||
|     query: PrimitiveQuery, |     query: &[PrimitiveQueryPart], | ||||||
| ) -> anyhow::Result<Operation> | ) -> anyhow::Result<Operation> | ||||||
| { | { | ||||||
|     /// Matches on the `PrimitiveQueryPart` and create an operation from it. |     /// Matches on the `PrimitiveQueryPart` and create an operation from it. | ||||||
| @@ -458,16 +459,16 @@ fn create_query_tree( | |||||||
|     } |     } | ||||||
|  |  | ||||||
|     if optional_words { |     if optional_words { | ||||||
|         optional_word(ctx, authorize_typos, query) |         optional_word(ctx, authorize_typos, query.to_vec()) | ||||||
|     } else { |     } else { | ||||||
|         ngrams(ctx, authorize_typos, query.as_slice()) |         ngrams(ctx, authorize_typos, query) | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| type PrimitiveQuery = Vec<PrimitiveQueryPart>; | pub type PrimitiveQuery = Vec<PrimitiveQueryPart>; | ||||||
|  |  | ||||||
| #[derive(Debug, Clone)] | #[derive(Debug, Clone)] | ||||||
| enum PrimitiveQueryPart { | pub enum PrimitiveQueryPart { | ||||||
|     Phrase(Vec<String>), |     Phrase(Vec<String>), | ||||||
|     Word(String, IsPrefix), |     Word(String, IsPrefix), | ||||||
| } | } | ||||||
| @@ -579,11 +580,12 @@ mod test { | |||||||
|             authorize_typos: bool, |             authorize_typos: bool, | ||||||
|             words_limit: Option<usize>, |             words_limit: Option<usize>, | ||||||
|             query: TokenStream, |             query: TokenStream, | ||||||
|         ) -> anyhow::Result<Option<Operation>> |         ) -> anyhow::Result<Option<(Operation, PrimitiveQuery)>> | ||||||
|         { |         { | ||||||
|             let primitive_query = create_primitive_query(query, None, words_limit); |             let primitive_query = create_primitive_query(query, None, words_limit); | ||||||
|             if !primitive_query.is_empty() { |             if !primitive_query.is_empty() { | ||||||
|                 create_query_tree(self, optional_words, authorize_typos, primitive_query).map(Some) |                 let qt = create_query_tree(self, optional_words, authorize_typos, &primitive_query)?; | ||||||
|  |                 Ok(Some((qt, primitive_query))) | ||||||
|             } else { |             } else { | ||||||
|                 Ok(None) |                 Ok(None) | ||||||
|             } |             } | ||||||
| @@ -674,7 +676,7 @@ mod test { | |||||||
|             Operation::Query(Query { prefix: true, kind: QueryKind::tolerant(2, "heyfriends".to_string()) }), |             Operation::Query(Query { prefix: true, kind: QueryKind::tolerant(2, "heyfriends".to_string()) }), | ||||||
|         ]); |         ]); | ||||||
|  |  | ||||||
|         let query_tree = TestContext::default().build(false, true, None, tokens).unwrap().unwrap(); |         let (query_tree, _) = TestContext::default().build(false, true, None, tokens).unwrap().unwrap(); | ||||||
|  |  | ||||||
|         assert_eq!(expected, query_tree); |         assert_eq!(expected, query_tree); | ||||||
|     } |     } | ||||||
| @@ -694,7 +696,7 @@ mod test { | |||||||
|             Operation::Query(Query { prefix: false, kind: QueryKind::tolerant(2, "heyfriends".to_string()) }), |             Operation::Query(Query { prefix: false, kind: QueryKind::tolerant(2, "heyfriends".to_string()) }), | ||||||
|         ]); |         ]); | ||||||
|  |  | ||||||
|         let query_tree = TestContext::default().build(false, true, None, tokens).unwrap().unwrap(); |         let (query_tree, _) = TestContext::default().build(false, true, None, tokens).unwrap().unwrap(); | ||||||
|  |  | ||||||
|         assert_eq!(expected, query_tree); |         assert_eq!(expected, query_tree); | ||||||
|     } |     } | ||||||
| @@ -725,7 +727,7 @@ mod test { | |||||||
|             Operation::Query(Query { prefix: false, kind: QueryKind::tolerant(2, "helloworld".to_string()) }), |             Operation::Query(Query { prefix: false, kind: QueryKind::tolerant(2, "helloworld".to_string()) }), | ||||||
|         ]); |         ]); | ||||||
|  |  | ||||||
|         let query_tree = TestContext::default().build(false, true, None, tokens).unwrap().unwrap(); |         let (query_tree, _) = TestContext::default().build(false, true, None, tokens).unwrap().unwrap(); | ||||||
|  |  | ||||||
|         assert_eq!(expected, query_tree); |         assert_eq!(expected, query_tree); | ||||||
|     } |     } | ||||||
| @@ -770,7 +772,7 @@ mod test { | |||||||
|             ]), |             ]), | ||||||
|         ]); |         ]); | ||||||
|  |  | ||||||
|         let query_tree = TestContext::default().build(false, true, None, tokens).unwrap().unwrap(); |         let (query_tree, _) = TestContext::default().build(false, true, None, tokens).unwrap().unwrap(); | ||||||
|  |  | ||||||
|         assert_eq!(expected, query_tree); |         assert_eq!(expected, query_tree); | ||||||
|     } |     } | ||||||
| @@ -790,7 +792,7 @@ mod test { | |||||||
|             Operation::Query(Query { prefix: false, kind: QueryKind::tolerant(1, "ngrams".to_string()) }), |             Operation::Query(Query { prefix: false, kind: QueryKind::tolerant(1, "ngrams".to_string()) }), | ||||||
|         ]); |         ]); | ||||||
|  |  | ||||||
|         let query_tree = TestContext::default().build(false, true, None, tokens).unwrap().unwrap(); |         let (query_tree, _) = TestContext::default().build(false, true, None, tokens).unwrap().unwrap(); | ||||||
|  |  | ||||||
|         assert_eq!(expected, query_tree); |         assert_eq!(expected, query_tree); | ||||||
|     } |     } | ||||||
| @@ -816,7 +818,7 @@ mod test { | |||||||
|             Operation::Query(Query { prefix: false, kind: QueryKind::tolerant(2, "wordsplitfish".to_string()) }), |             Operation::Query(Query { prefix: false, kind: QueryKind::tolerant(2, "wordsplitfish".to_string()) }), | ||||||
|         ]); |         ]); | ||||||
|  |  | ||||||
|         let query_tree = TestContext::default().build(false, true, None, tokens).unwrap().unwrap(); |         let (query_tree, _) = TestContext::default().build(false, true, None, tokens).unwrap().unwrap(); | ||||||
|  |  | ||||||
|         assert_eq!(expected, query_tree); |         assert_eq!(expected, query_tree); | ||||||
|     } |     } | ||||||
| @@ -836,7 +838,7 @@ mod test { | |||||||
|             Operation::Query(Query { prefix: false, kind: QueryKind::exact("wooop".to_string()) }), |             Operation::Query(Query { prefix: false, kind: QueryKind::exact("wooop".to_string()) }), | ||||||
|         ]); |         ]); | ||||||
|  |  | ||||||
|         let query_tree = TestContext::default().build(false, true, None, tokens).unwrap().unwrap(); |         let (query_tree, _) = TestContext::default().build(false, true, None, tokens).unwrap().unwrap(); | ||||||
|  |  | ||||||
|         assert_eq!(expected, query_tree); |         assert_eq!(expected, query_tree); | ||||||
|     } |     } | ||||||
| @@ -875,7 +877,7 @@ mod test { | |||||||
|                 Operation::Query(Query { prefix: false, kind: QueryKind::tolerant(2, "heymyfriend".to_string()) }), |                 Operation::Query(Query { prefix: false, kind: QueryKind::tolerant(2, "heymyfriend".to_string()) }), | ||||||
|             ]), |             ]), | ||||||
|         ]); |         ]); | ||||||
|         let query_tree = TestContext::default().build(true, true, None, tokens).unwrap().unwrap(); |         let (query_tree, _) = TestContext::default().build(true, true, None, tokens).unwrap().unwrap(); | ||||||
|  |  | ||||||
|         assert_eq!(expected, query_tree); |         assert_eq!(expected, query_tree); | ||||||
|     } |     } | ||||||
| @@ -891,7 +893,7 @@ mod test { | |||||||
|             Operation::Query(Query { prefix: false, kind: QueryKind::exact("hey".to_string()) }), |             Operation::Query(Query { prefix: false, kind: QueryKind::exact("hey".to_string()) }), | ||||||
|             Operation::Query(Query { prefix: false, kind: QueryKind::exact("my".to_string()) }), |             Operation::Query(Query { prefix: false, kind: QueryKind::exact("my".to_string()) }), | ||||||
|         ]); |         ]); | ||||||
|         let query_tree = TestContext::default().build(true, true, None, tokens).unwrap().unwrap(); |         let (query_tree, _) = TestContext::default().build(true, true, None, tokens).unwrap().unwrap(); | ||||||
|  |  | ||||||
|         assert_eq!(expected, query_tree); |         assert_eq!(expected, query_tree); | ||||||
|     } |     } | ||||||
| @@ -925,7 +927,7 @@ mod test { | |||||||
|                 Operation::Query(Query { prefix: false, kind: QueryKind::exact("friend".to_string()) }), |                 Operation::Query(Query { prefix: false, kind: QueryKind::exact("friend".to_string()) }), | ||||||
|             ]), |             ]), | ||||||
|         ]); |         ]); | ||||||
|         let query_tree = TestContext::default().build(true, true, None, tokens).unwrap().unwrap(); |         let (query_tree, _) = TestContext::default().build(true, true, None, tokens).unwrap().unwrap(); | ||||||
|  |  | ||||||
|         assert_eq!(expected, query_tree); |         assert_eq!(expected, query_tree); | ||||||
|     } |     } | ||||||
| @@ -944,7 +946,7 @@ mod test { | |||||||
|             ]), |             ]), | ||||||
|             Operation::Query(Query { prefix: false, kind: QueryKind::exact("heyfriends".to_string()) }), |             Operation::Query(Query { prefix: false, kind: QueryKind::exact("heyfriends".to_string()) }), | ||||||
|         ]); |         ]); | ||||||
|         let query_tree = TestContext::default().build(false, false, None, tokens).unwrap().unwrap(); |         let (query_tree, _) = TestContext::default().build(false, false, None, tokens).unwrap().unwrap(); | ||||||
|  |  | ||||||
|         assert_eq!(expected, query_tree); |         assert_eq!(expected, query_tree); | ||||||
|     } |     } | ||||||
| @@ -957,7 +959,7 @@ mod test { | |||||||
|         let tokens = result.tokens(); |         let tokens = result.tokens(); | ||||||
|  |  | ||||||
|         let context = TestContext::default(); |         let context = TestContext::default(); | ||||||
|         let query_tree = context.build(false, true, None, tokens).unwrap().unwrap(); |         let (query_tree, _) = context.build(false, true, None, tokens).unwrap().unwrap(); | ||||||
|  |  | ||||||
|         let expected = hashset!{ |         let expected = hashset!{ | ||||||
|             ("word",                0, false), |             ("word",                0, false), | ||||||
| @@ -997,7 +999,7 @@ mod test { | |||||||
|             Operation::Query(Query { prefix: false, kind: QueryKind::exact("good".to_string()) }), |             Operation::Query(Query { prefix: false, kind: QueryKind::exact("good".to_string()) }), | ||||||
|         ]); |         ]); | ||||||
|  |  | ||||||
|         let query_tree = TestContext::default().build(false, false, Some(2), tokens).unwrap().unwrap(); |         let (query_tree, _) = TestContext::default().build(false, false, Some(2), tokens).unwrap().unwrap(); | ||||||
|  |  | ||||||
|         assert_eq!(expected, query_tree); |         assert_eq!(expected, query_tree); | ||||||
|     } |     } | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user