mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-24 20:46:27 +00:00 
			
		
		
		
	Merge forward and backward proximity conditions in proximity graph
This commit is contained in:
		| @@ -303,7 +303,7 @@ mod tests { | ||||
|             let mut ctx = SearchContext::new(&index, &txn); | ||||
|             let results = execute_search( | ||||
|                 &mut ctx, | ||||
|                 "releases from poison by the government", | ||||
|                 "which a the releases from poison by the government", | ||||
|                 // "sun flower s are the best", | ||||
|                 // "zero config", | ||||
|                 TermsMatchingStrategy::Last, | ||||
| @@ -359,7 +359,7 @@ mod tests { | ||||
|         let start = Instant::now(); | ||||
|  | ||||
|         let mut s = Search::new(&txn, &index); | ||||
|         s.query("releases from poison by the government"); | ||||
|         s.query("which a the releases from poison by the government"); | ||||
|         s.terms_matching_strategy(TermsMatchingStrategy::Last); | ||||
|         // s.criterion_implementation_strategy(crate::CriterionImplementationStrategy::OnlySetBased); | ||||
|         let docs = s.execute().unwrap(); | ||||
|   | ||||
| @@ -94,7 +94,7 @@ pub fn build_edges<'ctx>( | ||||
|         )]); | ||||
|     } | ||||
|  | ||||
|     let mut cost_proximity_word_pairs = BTreeMap::<u8, BTreeMap<u8, Vec<WordPair>>>::new(); | ||||
|     let mut cost_word_pairs = BTreeMap::<u8, Vec<WordPair>>::new(); | ||||
|  | ||||
|     if let Some(right_prefix) = right_term.use_prefix_db { | ||||
|         for (left_phrase, left_word) in last_word_of_term_iter(left_term, phrase_interner) { | ||||
| @@ -106,7 +106,7 @@ pub fn build_edges<'ctx>( | ||||
|                 right_ngram_length, | ||||
|                 left_word, | ||||
|                 right_prefix, | ||||
|                 &mut cost_proximity_word_pairs, | ||||
|                 &mut cost_word_pairs, | ||||
|                 left_phrase, | ||||
|             )?; | ||||
|         } | ||||
| @@ -129,28 +129,22 @@ pub fn build_edges<'ctx>( | ||||
|                 right_ngram_length, | ||||
|                 left_word, | ||||
|                 right_word, | ||||
|                 &mut cost_proximity_word_pairs, | ||||
|                 &mut cost_word_pairs, | ||||
|                 &[left_phrase, right_phrase].iter().copied().flatten().collect::<Vec<_>>(), | ||||
|             )?; | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     let mut new_edges = cost_proximity_word_pairs | ||||
|     let mut new_edges = cost_word_pairs | ||||
|         .into_iter() | ||||
|         .flat_map(|(cost, proximity_word_pairs)| { | ||||
|             let mut edges = vec![]; | ||||
|             for (proximity, word_pairs) in proximity_word_pairs { | ||||
|                 edges.push(( | ||||
|                     cost, | ||||
|                     EdgeCondition::Conditional(conditions_interner.insert( | ||||
|                         ProximityCondition::Pairs { | ||||
|                             pairs: word_pairs.into_boxed_slice(), | ||||
|                             proximity, | ||||
|                         }, | ||||
|                     )), | ||||
|                 )) | ||||
|             } | ||||
|             edges | ||||
|         .map(|(cost, word_pairs)| { | ||||
|             ( | ||||
|                 cost, | ||||
|                 EdgeCondition::Conditional( | ||||
|                     conditions_interner | ||||
|                         .insert(ProximityCondition::Pairs { pairs: word_pairs.into_boxed_slice() }), | ||||
|                 ), | ||||
|             ) | ||||
|         }) | ||||
|         .collect::<Vec<_>>(); | ||||
|     new_edges.push(( | ||||
| @@ -170,7 +164,7 @@ fn add_prefix_edges<'ctx>( | ||||
|     right_ngram_length: usize, | ||||
|     left_word: Interned<String>, | ||||
|     right_prefix: Interned<String>, | ||||
|     cost_proximity_word_pairs: &mut BTreeMap<u8, BTreeMap<u8, Vec<WordPair>>>, | ||||
|     cost_proximity_word_pairs: &mut BTreeMap<u8, Vec<WordPair>>, | ||||
|     left_phrase: Option<Interned<Phrase>>, | ||||
| ) -> Result<()> { | ||||
|     for proximity in 1..=(8 - right_ngram_length) { | ||||
| @@ -188,16 +182,12 @@ fn add_prefix_edges<'ctx>( | ||||
|             )? | ||||
|             .is_some() | ||||
|         { | ||||
|             cost_proximity_word_pairs | ||||
|                 .entry(cost) | ||||
|                 .or_default() | ||||
|                 .entry(proximity as u8) | ||||
|                 .or_default() | ||||
|                 .push(WordPair::WordPrefix { | ||||
|                     phrases: left_phrase.into_iter().collect(), | ||||
|                     left: left_word, | ||||
|                     right_prefix, | ||||
|                 }); | ||||
|             cost_proximity_word_pairs.entry(cost).or_default().push(WordPair::WordPrefix { | ||||
|                 phrases: left_phrase.into_iter().collect(), | ||||
|                 left: left_word, | ||||
|                 right_prefix, | ||||
|                 proximity: proximity as u8, | ||||
|             }); | ||||
|         } | ||||
|  | ||||
|         // No swapping when computing the proximity between a phrase and a word | ||||
| @@ -213,12 +203,11 @@ fn add_prefix_edges<'ctx>( | ||||
|                 )? | ||||
|                 .is_some() | ||||
|         { | ||||
|             cost_proximity_word_pairs | ||||
|                 .entry(cost) | ||||
|                 .or_default() | ||||
|                 .entry(proximity as u8) | ||||
|                 .or_default() | ||||
|                 .push(WordPair::WordPrefixSwapped { left_prefix: right_prefix, right: left_word }); | ||||
|             cost_proximity_word_pairs.entry(cost).or_default().push(WordPair::WordPrefixSwapped { | ||||
|                 left_prefix: right_prefix, | ||||
|                 right: left_word, | ||||
|                 proximity: proximity as u8 - 1, | ||||
|             }); | ||||
|         } | ||||
|     } | ||||
|     Ok(()) | ||||
| @@ -232,7 +221,7 @@ fn add_non_prefix_edges<'ctx>( | ||||
|     right_ngram_length: usize, | ||||
|     word1: Interned<String>, | ||||
|     word2: Interned<String>, | ||||
|     cost_proximity_word_pairs: &mut BTreeMap<u8, BTreeMap<u8, Vec<WordPair>>>, | ||||
|     cost_proximity_word_pairs: &mut BTreeMap<u8, Vec<WordPair>>, | ||||
|     phrases: &[Interned<Phrase>], | ||||
| ) -> Result<()> { | ||||
|     for proximity in 1..=(8 - right_ngram_length) { | ||||
| @@ -248,12 +237,12 @@ fn add_non_prefix_edges<'ctx>( | ||||
|             )? | ||||
|             .is_some() | ||||
|         { | ||||
|             cost_proximity_word_pairs | ||||
|                 .entry(cost) | ||||
|                 .or_default() | ||||
|                 .entry(proximity as u8) | ||||
|                 .or_default() | ||||
|                 .push(WordPair::Words { phrases: phrases.to_vec(), left: word1, right: word2 }); | ||||
|             cost_proximity_word_pairs.entry(cost).or_default().push(WordPair::Words { | ||||
|                 phrases: phrases.to_vec(), | ||||
|                 left: word1, | ||||
|                 right: word2, | ||||
|                 proximity: proximity as u8, | ||||
|             }); | ||||
|         } | ||||
|         if proximity > 1 | ||||
|             // no swapping when either term is a phrase | ||||
| @@ -269,12 +258,12 @@ fn add_non_prefix_edges<'ctx>( | ||||
|                 )? | ||||
|                 .is_some() | ||||
|         { | ||||
|             cost_proximity_word_pairs | ||||
|                 .entry(cost) | ||||
|                 .or_default() | ||||
|                 .entry(proximity as u8 - 1) | ||||
|                 .or_default() | ||||
|                 .push(WordPair::Words { phrases: vec![], left: word2, right: word1 }); | ||||
|             cost_proximity_word_pairs.entry(cost).or_default().push(WordPair::Words { | ||||
|                 phrases: vec![], | ||||
|                 left: word2, | ||||
|                 right: word1, | ||||
|                 proximity: proximity as u8 - 1, | ||||
|             }); | ||||
|         } | ||||
|     } | ||||
|     Ok(()) | ||||
|   | ||||
| @@ -18,7 +18,7 @@ pub fn compute_docids<'ctx>( | ||||
|         phrase_interner, | ||||
|         term_interner, | ||||
|     } = ctx; | ||||
|     let (pairs, proximity) = match edge { | ||||
|     let pairs = match edge { | ||||
|         ProximityCondition::Term { term } => { | ||||
|             return term_docids | ||||
|                 .get_query_term_docids( | ||||
| @@ -32,12 +32,12 @@ pub fn compute_docids<'ctx>( | ||||
|                 ) | ||||
|                 .cloned() | ||||
|         } | ||||
|         ProximityCondition::Pairs { pairs, proximity } => (pairs, proximity), | ||||
|         ProximityCondition::Pairs { pairs } => pairs, | ||||
|     }; | ||||
|     let mut pair_docids = RoaringBitmap::new(); | ||||
|     for pair in pairs.iter() { | ||||
|         let pair = match pair { | ||||
|             WordPair::Words { phrases, left, right } => { | ||||
|             WordPair::Words { phrases, left, right, proximity } => { | ||||
|                 let mut docids = db_cache | ||||
|                     .get_word_pair_proximity_docids( | ||||
|                         index, | ||||
| @@ -64,7 +64,7 @@ pub fn compute_docids<'ctx>( | ||||
|                 } | ||||
|                 docids | ||||
|             } | ||||
|             WordPair::WordPrefix { phrases, left, right_prefix } => { | ||||
|             WordPair::WordPrefix { phrases, left, right_prefix, proximity } => { | ||||
|                 let mut docids = db_cache | ||||
|                     .get_word_prefix_pair_proximity_docids( | ||||
|                         index, | ||||
| @@ -91,7 +91,7 @@ pub fn compute_docids<'ctx>( | ||||
|                 } | ||||
|                 docids | ||||
|             } | ||||
|             WordPair::WordPrefixSwapped { left_prefix, right } => db_cache | ||||
|             WordPair::WordPrefixSwapped { left_prefix, right, proximity } => db_cache | ||||
|                 .get_prefix_word_pair_proximity_docids( | ||||
|                     index, | ||||
|                     txn, | ||||
|   | ||||
| @@ -18,22 +18,25 @@ pub enum WordPair { | ||||
|         phrases: Vec<Interned<Phrase>>, | ||||
|         left: Interned<String>, | ||||
|         right: Interned<String>, | ||||
|         proximity: u8, | ||||
|     }, | ||||
|     WordPrefix { | ||||
|         phrases: Vec<Interned<Phrase>>, | ||||
|         left: Interned<String>, | ||||
|         right_prefix: Interned<String>, | ||||
|         proximity: u8, | ||||
|     }, | ||||
|     WordPrefixSwapped { | ||||
|         left_prefix: Interned<String>, | ||||
|         right: Interned<String>, | ||||
|         proximity: u8, | ||||
|     }, | ||||
| } | ||||
|  | ||||
| #[derive(Clone, PartialEq, Eq, Hash)] | ||||
| pub enum ProximityCondition { | ||||
|     Term { term: Interned<QueryTerm> }, | ||||
|     Pairs { pairs: Box<[WordPair]>, proximity: u8 }, | ||||
|     Pairs { pairs: Box<[WordPair]> }, | ||||
| } | ||||
|  | ||||
| pub enum ProximityGraph {} | ||||
| @@ -46,8 +49,8 @@ impl RankingRuleGraphTrait for ProximityGraph { | ||||
|             ProximityCondition::Term { term } => { | ||||
|                 format!("term {term}") | ||||
|             } | ||||
|             ProximityCondition::Pairs { pairs, proximity } => { | ||||
|                 format!("prox {proximity}, {} pairs", pairs.len()) | ||||
|             ProximityCondition::Pairs { pairs } => { | ||||
|                 format!("pairs {}", pairs.len()) | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user