mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-26 21:46:27 +00:00 
			
		
		
		
	Fix bug in the proximity ranking rule for queries with ngrams
This commit is contained in:
		| @@ -36,6 +36,8 @@ That is we find the documents where either: | ||||
| - OR: `pretty` is 2-close to `house` AND `house` is 1-close to `by` | ||||
| */ | ||||
|  | ||||
| use std::ops::ControlFlow; | ||||
|  | ||||
| use roaring::RoaringBitmap; | ||||
|  | ||||
| use super::interner::MappedInterner; | ||||
| @@ -263,7 +265,7 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase | ||||
|                         graph.remove_edges_with_condition(condition); | ||||
|                         // 3. Also remove the entry from the edge_docids_cache, since we don't need it anymore | ||||
|                         edge_docids_cache.cache.remove(&condition); | ||||
|                         return Ok(()); | ||||
|                         return Ok(ControlFlow::Continue(())); | ||||
|                     } | ||||
|                     path_docids &= edge_docids; | ||||
|  | ||||
| @@ -287,14 +289,18 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase | ||||
|                         } | ||||
|                         // We should maybe instead try to compute: | ||||
|                         // 0th & nth & 1st & n-1th & 2nd & etc... | ||||
|                         return Ok(()); | ||||
|                         return Ok(ControlFlow::Continue(())); | ||||
|                     } | ||||
|                 } | ||||
|                 bucket |= &path_docids; | ||||
|                 // Reduce the size of the universe so that we can more optimistically discard candidate paths | ||||
|                 universe -= path_docids; | ||||
|                 // TODO: if the universe is empty, stop iterating | ||||
|                 Ok(()) | ||||
|  | ||||
|                 if universe.is_empty() { | ||||
|                     Ok(ControlFlow::Break(())) | ||||
|                 } else { | ||||
|                     Ok(ControlFlow::Continue(())) | ||||
|                 } | ||||
|             }, | ||||
|         )?; | ||||
|  | ||||
|   | ||||
| @@ -10,7 +10,7 @@ use crate::search::new::interner::{Interned, MappedInterner}; | ||||
| use crate::search::new::query_graph::QueryNodeData; | ||||
| use crate::search::new::query_term::{LocatedQueryTerm, QueryTerm}; | ||||
| use crate::search::new::ranking_rule_graph::{ | ||||
|     DeadEndPathCache, Edge, EdgeCondition, ProximityEdge, ProximityGraph, RankingRuleGraph, | ||||
|     DeadEndPathCache, Edge, EdgeCondition, ProximityCondition, ProximityGraph, RankingRuleGraph, | ||||
|     RankingRuleGraphTrait, TypoEdge, TypoGraph, | ||||
| }; | ||||
| use crate::search::new::small_bitmap::SmallBitmap; | ||||
| @@ -46,7 +46,7 @@ pub enum SearchEvents { | ||||
|         paths: Vec<Vec<u16>>, | ||||
|         empty_paths_cache: DeadEndPathCache<ProximityGraph>, | ||||
|         universe: RoaringBitmap, | ||||
|         distances: MappedInterner<Vec<(u16, SmallBitmap<ProximityEdge>)>, QueryNode>, | ||||
|         distances: MappedInterner<Vec<(u16, SmallBitmap<ProximityCondition>)>, QueryNode>, | ||||
|         cost: u16, | ||||
|     }, | ||||
|     TypoState { | ||||
| @@ -172,7 +172,7 @@ impl SearchLogger<QueryGraph> for DetailedSearchLogger { | ||||
|         paths_map: &[Vec<u16>], | ||||
|         empty_paths_cache: &DeadEndPathCache<ProximityGraph>, | ||||
|         universe: &RoaringBitmap, | ||||
|         distances: &MappedInterner<Vec<(u16, SmallBitmap<ProximityEdge>)>, QueryNode>, | ||||
|         distances: &MappedInterner<Vec<(u16, SmallBitmap<ProximityCondition>)>, QueryNode>, | ||||
|         cost: u16, | ||||
|     ) { | ||||
|         self.events.push(SearchEvents::ProximityState { | ||||
|   | ||||
| @@ -6,7 +6,7 @@ use roaring::RoaringBitmap; | ||||
| use super::interner::MappedInterner; | ||||
| use super::query_graph::QueryNode; | ||||
| use super::ranking_rule_graph::{ | ||||
|     DeadEndPathCache, ProximityEdge, ProximityGraph, RankingRuleGraph, TypoEdge, TypoGraph, | ||||
|     DeadEndPathCache, ProximityCondition, ProximityGraph, RankingRuleGraph, TypoEdge, TypoGraph, | ||||
| }; | ||||
| use super::small_bitmap::SmallBitmap; | ||||
| use super::{RankingRule, RankingRuleQueryTrait}; | ||||
| @@ -68,7 +68,7 @@ pub trait SearchLogger<Q: RankingRuleQueryTrait> { | ||||
|         paths: &[Vec<u16>], | ||||
|         empty_paths_cache: &DeadEndPathCache<ProximityGraph>, | ||||
|         universe: &RoaringBitmap, | ||||
|         distances: &MappedInterner<Vec<(u16, SmallBitmap<ProximityEdge>)>, QueryNode>, | ||||
|         distances: &MappedInterner<Vec<(u16, SmallBitmap<ProximityCondition>)>, QueryNode>, | ||||
|         cost: u16, | ||||
|     ); | ||||
|  | ||||
| @@ -139,7 +139,7 @@ impl<Q: RankingRuleQueryTrait> SearchLogger<Q> for DefaultSearchLogger { | ||||
|         _paths_map: &[Vec<u16>], | ||||
|         _empty_paths_cache: &DeadEndPathCache<ProximityGraph>, | ||||
|         _universe: &RoaringBitmap, | ||||
|         _distances: &MappedInterner<Vec<(u16, SmallBitmap<ProximityEdge>)>, QueryNode>, | ||||
|         _distances: &MappedInterner<Vec<(u16, SmallBitmap<ProximityCondition>)>, QueryNode>, | ||||
|         _cost: u16, | ||||
|     ) { | ||||
|     } | ||||
|   | ||||
| @@ -303,16 +303,16 @@ mod tests { | ||||
|             let mut ctx = SearchContext::new(&index, &txn); | ||||
|             let results = execute_search( | ||||
|                 &mut ctx, | ||||
|                 // "which a the releases from poison by the government", | ||||
|                 "releases from poison by the government", | ||||
|                 // "sun flower s are the best", | ||||
|                 "zero config", | ||||
|                 // "zero config", | ||||
|                 TermsMatchingStrategy::Last, | ||||
|                 None, | ||||
|                 0, | ||||
|                 20, | ||||
|                 &mut DefaultSearchLogger, | ||||
|                 &mut DefaultSearchLogger, | ||||
|                 //&mut logger, | ||||
|                 // &mut logger, | ||||
|             ) | ||||
|             .unwrap(); | ||||
|  | ||||
| @@ -359,9 +359,9 @@ mod tests { | ||||
|         let start = Instant::now(); | ||||
|  | ||||
|         let mut s = Search::new(&txn, &index); | ||||
|         s.query("which a the releases from poison by the government"); | ||||
|         s.query("releases from poison by the government"); | ||||
|         s.terms_matching_strategy(TermsMatchingStrategy::Last); | ||||
|         s.criterion_implementation_strategy(crate::CriterionImplementationStrategy::OnlySetBased); | ||||
|         // s.criterion_implementation_strategy(crate::CriterionImplementationStrategy::OnlySetBased); | ||||
|         let docs = s.execute().unwrap(); | ||||
|  | ||||
|         let elapsed = start.elapsed(); | ||||
|   | ||||
| @@ -2,6 +2,7 @@ | ||||
|  | ||||
| use std::collections::btree_map::Entry; | ||||
| use std::collections::{BTreeMap, VecDeque}; | ||||
| use std::ops::ControlFlow; | ||||
|  | ||||
| use super::empty_paths_cache::DeadEndPathCache; | ||||
| use super::{EdgeCondition, RankingRuleGraph, RankingRuleGraphTrait}; | ||||
| @@ -23,7 +24,7 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> { | ||||
|         cost: u16, | ||||
|         all_distances: &MappedInterner<Vec<(u16, SmallBitmap<G::EdgeCondition>)>, QueryNode>, | ||||
|         empty_paths_cache: &mut DeadEndPathCache<G>, | ||||
|         mut visit: impl FnMut(&[u16], &mut Self, &mut DeadEndPathCache<G>) -> Result<()>, | ||||
|         mut visit: impl FnMut(&[u16], &mut Self, &mut DeadEndPathCache<G>) -> Result<ControlFlow<()>>, | ||||
|     ) -> Result<()> { | ||||
|         let _ = self.visit_paths_of_cost_rec( | ||||
|             from, | ||||
| @@ -43,7 +44,7 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> { | ||||
|         cost: u16, | ||||
|         all_distances: &MappedInterner<Vec<(u16, SmallBitmap<G::EdgeCondition>)>, QueryNode>, | ||||
|         empty_paths_cache: &mut DeadEndPathCache<G>, | ||||
|         visit: &mut impl FnMut(&[u16], &mut Self, &mut DeadEndPathCache<G>) -> Result<()>, | ||||
|         visit: &mut impl FnMut(&[u16], &mut Self, &mut DeadEndPathCache<G>) -> Result<ControlFlow<()>>, | ||||
|         prev_conditions: &mut Vec<u16>, | ||||
|         cur_path: &mut SmallBitmap<G::EdgeCondition>, | ||||
|         forbidden_conditions: &mut SmallBitmap<G::EdgeCondition>, | ||||
| @@ -60,7 +61,11 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> { | ||||
|                 EdgeCondition::Unconditional => { | ||||
|                     if edge.dest_node == self.query_graph.end_node { | ||||
|                         any_valid = true; | ||||
|                         visit(prev_conditions, self, empty_paths_cache)?; | ||||
|                         let control_flow = visit(prev_conditions, self, empty_paths_cache)?; | ||||
|                         match control_flow { | ||||
|                             ControlFlow::Continue(_) => {} | ||||
|                             ControlFlow::Break(_) => return Ok(true), | ||||
|                         } | ||||
|                         true | ||||
|                     } else { | ||||
|                         self.visit_paths_of_cost_rec( | ||||
| @@ -101,7 +106,11 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> { | ||||
|                     ); | ||||
|                     let next_any_valid = if edge.dest_node == self.query_graph.end_node { | ||||
|                         any_valid = true; | ||||
|                         visit(prev_conditions, self, empty_paths_cache)?; | ||||
|                         let control_flow = visit(prev_conditions, self, empty_paths_cache)?; | ||||
|                         match control_flow { | ||||
|                             ControlFlow::Continue(_) => {} | ||||
|                             ControlFlow::Break(_) => return Ok(true), | ||||
|                         } | ||||
|                         true | ||||
|                     } else { | ||||
|                         self.visit_paths_of_cost_rec( | ||||
|   | ||||
| @@ -20,7 +20,7 @@ use std::hash::Hash; | ||||
|  | ||||
| pub use edge_docids_cache::EdgeConditionDocIdsCache; | ||||
| pub use empty_paths_cache::DeadEndPathCache; | ||||
| pub use proximity::{ProximityEdge, ProximityGraph}; | ||||
| pub use proximity::{ProximityCondition, ProximityGraph}; | ||||
| use roaring::RoaringBitmap; | ||||
| pub use typo::{TypoEdge, TypoGraph}; | ||||
|  | ||||
|   | ||||
| @@ -1,7 +1,7 @@ | ||||
| #![allow(clippy::too_many_arguments)] | ||||
| use std::collections::BTreeMap; | ||||
|  | ||||
| use super::ProximityEdge; | ||||
| use super::ProximityCondition; | ||||
| use crate::search::new::db_cache::DatabaseCache; | ||||
| use crate::search::new::interner::{DedupInterner, Interned}; | ||||
| use crate::search::new::query_graph::QueryNodeData; | ||||
| @@ -37,10 +37,10 @@ fn first_word_of_term_iter<'t>( | ||||
|  | ||||
| pub fn build_edges<'ctx>( | ||||
|     ctx: &mut SearchContext<'ctx>, | ||||
|     conditions_interner: &mut DedupInterner<ProximityEdge>, | ||||
|     conditions_interner: &mut DedupInterner<ProximityCondition>, | ||||
|     from_node: &QueryNode, | ||||
|     to_node: &QueryNode, | ||||
| ) -> Result<Vec<(u8, EdgeCondition<ProximityEdge>)>> { | ||||
| ) -> Result<Vec<(u8, EdgeCondition<ProximityCondition>)>> { | ||||
|     let SearchContext { | ||||
|         index, | ||||
|         txn, | ||||
| @@ -51,24 +51,33 @@ pub fn build_edges<'ctx>( | ||||
|         term_docids: _, | ||||
|     } = ctx; | ||||
|  | ||||
|     let (left_term, left_end_position) = match &from_node.data { | ||||
|         QueryNodeData::Term(LocatedQueryTerm { value, positions }) => { | ||||
|             (term_interner.get(*value), *positions.end()) | ||||
|         } | ||||
|         QueryNodeData::Deleted => return Ok(vec![]), | ||||
|         QueryNodeData::Start => return Ok(vec![(0, EdgeCondition::Unconditional)]), | ||||
|         QueryNodeData::End => return Ok(vec![]), | ||||
|     }; | ||||
|  | ||||
|     let right_term = match &to_node.data { | ||||
|         QueryNodeData::End => return Ok(vec![(0, EdgeCondition::Unconditional)]), | ||||
|         QueryNodeData::Deleted | QueryNodeData::Start => return Ok(vec![]), | ||||
|         QueryNodeData::Term(term) => term, | ||||
|     }; | ||||
|     let LocatedQueryTerm { value: right_value, positions: right_positions } = right_term; | ||||
|  | ||||
|     let LocatedQueryTerm { value: right_term_interned, positions: right_positions } = right_term; | ||||
|  | ||||
|     let (right_term, right_start_position, right_ngram_length) = | ||||
|         (term_interner.get(*right_value), *right_positions.start(), right_positions.len()); | ||||
|         (term_interner.get(*right_term_interned), *right_positions.start(), right_positions.len()); | ||||
|  | ||||
|     let (left_term, left_end_position) = match &from_node.data { | ||||
|         QueryNodeData::Term(LocatedQueryTerm { value, positions }) => { | ||||
|             (term_interner.get(*value), *positions.end()) | ||||
|         } | ||||
|         QueryNodeData::Deleted => return Ok(vec![]), | ||||
|         QueryNodeData::Start => { | ||||
|             return Ok(vec![( | ||||
|                 (right_ngram_length - 1) as u8, | ||||
|                 EdgeCondition::Conditional( | ||||
|                     conditions_interner | ||||
|                         .insert(ProximityCondition::Term { term: *right_term_interned }), | ||||
|                 ), | ||||
|             )]) | ||||
|         } | ||||
|         QueryNodeData::End => return Ok(vec![]), | ||||
|     }; | ||||
|  | ||||
|     if left_end_position + 1 != right_start_position { | ||||
|         // We want to ignore this pair of terms | ||||
| @@ -77,7 +86,12 @@ pub fn build_edges<'ctx>( | ||||
|         // `flowers` is removed by the `words` ranking rule. | ||||
|         // The remaining query graph represents `the sun .. are beautiful` | ||||
|         // but `sun` and `are` have no proximity condition between them | ||||
|         return Ok(vec![(0, EdgeCondition::Unconditional)]); | ||||
|         return Ok(vec![( | ||||
|             (right_ngram_length - 1) as u8, | ||||
|             EdgeCondition::Conditional( | ||||
|                 conditions_interner.insert(ProximityCondition::Term { term: *right_term_interned }), | ||||
|             ), | ||||
|         )]); | ||||
|     } | ||||
|  | ||||
|     let mut cost_proximity_word_pairs = BTreeMap::<u8, BTreeMap<u8, Vec<WordPair>>>::new(); | ||||
| @@ -121,24 +135,30 @@ pub fn build_edges<'ctx>( | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     let mut new_edges = | ||||
|         cost_proximity_word_pairs | ||||
|     let mut new_edges = cost_proximity_word_pairs | ||||
|         .into_iter() | ||||
|         .flat_map(|(cost, proximity_word_pairs)| { | ||||
|             let mut edges = vec![]; | ||||
|             for (proximity, word_pairs) in proximity_word_pairs { | ||||
|                 edges.push(( | ||||
|                     cost, | ||||
|                         EdgeCondition::Conditional(conditions_interner.insert(ProximityEdge { | ||||
|                     EdgeCondition::Conditional(conditions_interner.insert( | ||||
|                         ProximityCondition::Pairs { | ||||
|                             pairs: word_pairs.into_boxed_slice(), | ||||
|                             proximity, | ||||
|                         })), | ||||
|                         }, | ||||
|                     )), | ||||
|                 )) | ||||
|             } | ||||
|             edges | ||||
|         }) | ||||
|         .collect::<Vec<_>>(); | ||||
|     new_edges.push((8 + (right_ngram_length - 1) as u8, EdgeCondition::Unconditional)); | ||||
|     new_edges.push(( | ||||
|         8 + (right_ngram_length - 1) as u8, | ||||
|         EdgeCondition::Conditional( | ||||
|             conditions_interner.insert(ProximityCondition::Term { term: *right_term_interned }), | ||||
|         ), | ||||
|     )); | ||||
|     Ok(new_edges) | ||||
| } | ||||
|  | ||||
|   | ||||
| @@ -1,16 +1,39 @@ | ||||
| use roaring::RoaringBitmap; | ||||
|  | ||||
| use super::{ProximityEdge, WordPair}; | ||||
| use super::{ProximityCondition, WordPair}; | ||||
| use crate::search::new::SearchContext; | ||||
| use crate::{CboRoaringBitmapCodec, Result}; | ||||
|  | ||||
| pub fn compute_docids<'ctx>( | ||||
|     ctx: &mut SearchContext<'ctx>, | ||||
|     edge: &ProximityEdge, | ||||
|     edge: &ProximityCondition, | ||||
|     universe: &RoaringBitmap, | ||||
| ) -> Result<RoaringBitmap> { | ||||
|     let SearchContext { index, txn, db_cache, word_interner, .. } = ctx; | ||||
|     let ProximityEdge { pairs, proximity } = edge; | ||||
|     let SearchContext { | ||||
|         index, | ||||
|         txn, | ||||
|         db_cache, | ||||
|         word_interner, | ||||
|         term_docids, | ||||
|         phrase_interner, | ||||
|         term_interner, | ||||
|     } = ctx; | ||||
|     let (pairs, proximity) = match edge { | ||||
|         ProximityCondition::Term { term } => { | ||||
|             return term_docids | ||||
|                 .get_query_term_docids( | ||||
|                     index, | ||||
|                     txn, | ||||
|                     db_cache, | ||||
|                     word_interner, | ||||
|                     term_interner, | ||||
|                     phrase_interner, | ||||
|                     *term, | ||||
|                 ) | ||||
|                 .cloned() | ||||
|         } | ||||
|         ProximityCondition::Pairs { pairs, proximity } => (pairs, proximity), | ||||
|     }; | ||||
|     let mut pair_docids = RoaringBitmap::new(); | ||||
|     for pair in pairs.iter() { | ||||
|         let pair = match pair { | ||||
|   | ||||
| @@ -4,15 +4,15 @@ pub mod compute_docids; | ||||
| use roaring::RoaringBitmap; | ||||
|  | ||||
| use super::empty_paths_cache::DeadEndPathCache; | ||||
| use super::{EdgeCondition, RankingRuleGraphTrait}; | ||||
| use super::{EdgeCondition, RankingRuleGraph, RankingRuleGraphTrait}; | ||||
| use crate::search::new::interner::{DedupInterner, Interned, MappedInterner}; | ||||
| use crate::search::new::logger::SearchLogger; | ||||
| use crate::search::new::query_term::Phrase; | ||||
| use crate::search::new::query_term::{Phrase, QueryTerm}; | ||||
| use crate::search::new::small_bitmap::SmallBitmap; | ||||
| use crate::search::new::{QueryGraph, QueryNode, SearchContext}; | ||||
| use crate::Result; | ||||
|  | ||||
| #[derive(Clone, PartialEq, Eq, Hash)] | ||||
| #[derive(Debug, Clone, PartialEq, Eq, Hash)] | ||||
| pub enum WordPair { | ||||
|     Words { | ||||
|         phrases: Vec<Interned<Phrase>>, | ||||
| @@ -31,27 +31,33 @@ pub enum WordPair { | ||||
| } | ||||
|  | ||||
| #[derive(Clone, PartialEq, Eq, Hash)] | ||||
| pub struct ProximityEdge { | ||||
|     pairs: Box<[WordPair]>, | ||||
|     proximity: u8, | ||||
| pub enum ProximityCondition { | ||||
|     Term { term: Interned<QueryTerm> }, | ||||
|     Pairs { pairs: Box<[WordPair]>, proximity: u8 }, | ||||
| } | ||||
|  | ||||
| pub enum ProximityGraph {} | ||||
|  | ||||
| impl RankingRuleGraphTrait for ProximityGraph { | ||||
|     type EdgeCondition = ProximityEdge; | ||||
|     type EdgeCondition = ProximityCondition; | ||||
|  | ||||
|     fn label_for_edge_condition(edge: &Self::EdgeCondition) -> String { | ||||
|         let ProximityEdge { pairs, proximity } = edge; | ||||
|         format!(", prox {proximity}, {} pairs", pairs.len()) | ||||
|         match edge { | ||||
|             ProximityCondition::Term { term } => { | ||||
|                 format!("term {term}") | ||||
|             } | ||||
|             ProximityCondition::Pairs { pairs, proximity } => { | ||||
|                 format!("prox {proximity}, {} pairs", pairs.len()) | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     fn resolve_edge_condition<'ctx>( | ||||
|         ctx: &mut SearchContext<'ctx>, | ||||
|         edge: &Self::EdgeCondition, | ||||
|         condition: &Self::EdgeCondition, | ||||
|         universe: &RoaringBitmap, | ||||
|     ) -> Result<roaring::RoaringBitmap> { | ||||
|         compute_docids::compute_docids(ctx, edge, universe) | ||||
|         compute_docids::compute_docids(ctx, condition, universe) | ||||
|     } | ||||
|  | ||||
|     fn build_edges<'ctx>( | ||||
| @@ -64,11 +70,11 @@ impl RankingRuleGraphTrait for ProximityGraph { | ||||
|     } | ||||
|  | ||||
|     fn log_state( | ||||
|         graph: &super::RankingRuleGraph<Self>, | ||||
|         graph: &RankingRuleGraph<Self>, | ||||
|         paths: &[Vec<u16>], | ||||
|         empty_paths_cache: &DeadEndPathCache<Self>, | ||||
|         universe: &RoaringBitmap, | ||||
|         distances: &MappedInterner<Vec<(u16, SmallBitmap<ProximityEdge>)>, QueryNode>, | ||||
|         distances: &MappedInterner<Vec<(u16, SmallBitmap<ProximityCondition>)>, QueryNode>, | ||||
|         cost: u16, | ||||
|         logger: &mut dyn SearchLogger<QueryGraph>, | ||||
|     ) { | ||||
|   | ||||
		Reference in New Issue
	
	Block a user