mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-31 16:06:31 +00:00 
			
		
		
		
	Reorganise initialisation of ranking rules + rename PathsMap -> PathSet
This commit is contained in:
		| @@ -40,12 +40,25 @@ use roaring::RoaringBitmap; | ||||
|  | ||||
| use super::logger::SearchLogger; | ||||
| use super::ranking_rule_graph::{ | ||||
|     EdgeDocidsCache, EmptyPathsCache, RankingRuleGraph, RankingRuleGraphTrait, | ||||
|     EdgeDocidsCache, EmptyPathsCache, RankingRuleGraph, RankingRuleGraphTrait, TypoGraph, ProximityGraph, | ||||
| }; | ||||
| use super::small_bitmap::SmallBitmap; | ||||
| use super::{BitmapOrAllRef, QueryGraph, RankingRule, RankingRuleOutput, SearchContext}; | ||||
| use crate::Result; | ||||
|  | ||||
| pub type Proximity = GraphBasedRankingRule<ProximityGraph>; | ||||
| impl Default for GraphBasedRankingRule<ProximityGraph> { | ||||
|     fn default() -> Self { | ||||
|         Self::new("proximity".to_owned()) | ||||
|     } | ||||
| } | ||||
| pub type Typo = GraphBasedRankingRule<TypoGraph>; | ||||
| impl Default for GraphBasedRankingRule<TypoGraph> { | ||||
|     fn default() -> Self { | ||||
|         Self::new("typo".to_owned()) | ||||
|     } | ||||
| } | ||||
|  | ||||
| /// A generic graph-based ranking rule | ||||
| pub struct GraphBasedRankingRule<G: RankingRuleGraphTrait> { | ||||
|     id: String, | ||||
|   | ||||
| @@ -98,7 +98,7 @@ impl SearchLogger<QueryGraph> for DetailedSearchLogger { | ||||
|     fn initial_universe(&mut self, universe: &RoaringBitmap) { | ||||
|         self.initial_universe = Some(universe.clone()); | ||||
|     } | ||||
|     fn ranking_rules(&mut self, rr: &[&mut dyn RankingRule<QueryGraph>]) { | ||||
|     fn ranking_rules(&mut self, rr: &[Box<dyn RankingRule<QueryGraph>>]) { | ||||
|         self.ranking_rules_ids = Some(rr.iter().map(|rr| rr.id()).collect()); | ||||
|     } | ||||
|  | ||||
|   | ||||
| @@ -19,7 +19,7 @@ pub trait SearchLogger<Q: RankingRuleQueryTrait> { | ||||
|     fn initial_universe(&mut self, universe: &RoaringBitmap); | ||||
|  | ||||
|     /// Logs the ranking rules used to perform the search query | ||||
|     fn ranking_rules(&mut self, rr: &[&mut dyn RankingRule<Q>]); | ||||
|     fn ranking_rules(&mut self, rr: &[Box<dyn RankingRule<Q>>]); | ||||
|  | ||||
|     /// Logs the start of a ranking rule's iteration. | ||||
|     fn start_iteration_ranking_rule<'transaction>( | ||||
| @@ -90,7 +90,7 @@ impl<Q: RankingRuleQueryTrait> SearchLogger<Q> for DefaultSearchLogger { | ||||
|  | ||||
|     fn initial_universe(&mut self, _universe: &RoaringBitmap) {} | ||||
|  | ||||
|     fn ranking_rules(&mut self, _rr: &[&mut dyn RankingRule<Q>]) {} | ||||
|     fn ranking_rules(&mut self, _rr: &[Box<dyn RankingRule<Q>>]) {} | ||||
|  | ||||
|     fn start_iteration_ranking_rule<'transaction>( | ||||
|         &mut self, | ||||
|   | ||||
| @@ -17,7 +17,7 @@ mod words; | ||||
|  | ||||
| pub use logger::{DefaultSearchLogger, SearchLogger}; | ||||
|  | ||||
| use std::collections::BTreeSet; | ||||
| use std::collections::{BTreeSet, HashSet}; | ||||
|  | ||||
| use charabia::Tokenize; | ||||
| use db_cache::DatabaseCache; | ||||
| @@ -28,10 +28,10 @@ use roaring::RoaringBitmap; | ||||
|  | ||||
| use self::interner::Interner; | ||||
| use self::query_term::{Phrase, WordDerivations}; | ||||
| use self::ranking_rules::PlaceholderQuery; | ||||
| use self::resolve_query_graph::{resolve_query_graph, QueryTermDocIdsCache}; | ||||
| use crate::search::new::graph_based_ranking_rule::GraphBasedRankingRule; | ||||
| use crate::search::new::graph_based_ranking_rule::{Proximity, Typo}; | ||||
| use crate::search::new::query_term::located_query_terms_from_string; | ||||
| use crate::search::new::ranking_rule_graph::{ProximityGraph, TypoGraph}; | ||||
| use crate::search::new::words::Words; | ||||
| use crate::{Filter, Index, Result, TermsMatchingStrategy}; | ||||
|  | ||||
| @@ -88,7 +88,9 @@ fn resolve_maximally_reduced_query_graph<'search>( | ||||
|         TermsMatchingStrategy::All => vec![], | ||||
|     }; | ||||
|     // don't remove the first term | ||||
|     positions_to_remove.remove(0); | ||||
|     if !positions_to_remove.is_empty() { | ||||
|         positions_to_remove.remove(0); | ||||
|     } | ||||
|     loop { | ||||
|         if positions_to_remove.is_empty() { | ||||
|             break; | ||||
| @@ -102,48 +104,172 @@ fn resolve_maximally_reduced_query_graph<'search>( | ||||
|  | ||||
|     Ok(docids) | ||||
| } | ||||
| fn get_ranking_rules_for_placeholder_search<'search>( | ||||
|     ctx: &SearchContext<'search>, | ||||
| ) -> Result<Vec<Box<dyn RankingRule<'search, PlaceholderQuery>>>> { | ||||
|     // let sort = false; | ||||
|     // let mut asc = HashSet::new(); | ||||
|     // let mut desc = HashSet::new(); | ||||
|     let /*mut*/ ranking_rules: Vec<Box<dyn RankingRule<PlaceholderQuery>>> = vec![]; | ||||
|     let settings_ranking_rules = ctx.index.criteria(ctx.txn)?; | ||||
|     for rr in settings_ranking_rules { | ||||
|         // Add Words before any of: typo, proximity, attribute, exactness | ||||
|         match rr { | ||||
|             crate::Criterion::Words | ||||
|             | crate::Criterion::Typo | ||||
|             | crate::Criterion::Attribute | ||||
|             | crate::Criterion::Proximity | ||||
|             | crate::Criterion::Exactness => continue, | ||||
|             crate::Criterion::Sort => todo!(), | ||||
|             crate::Criterion::Asc(_) => todo!(), | ||||
|             crate::Criterion::Desc(_) => todo!(), | ||||
|         } | ||||
|     } | ||||
|     Ok(ranking_rules) | ||||
| } | ||||
| fn get_ranking_rules_for_query_graph_search<'search>( | ||||
|     ctx: &SearchContext<'search>, | ||||
|     terms_matching_strategy: TermsMatchingStrategy, | ||||
| ) -> Result<Vec<Box<dyn RankingRule<'search, QueryGraph>>>> { | ||||
|     // query graph search | ||||
|     let mut words = false; | ||||
|     let mut typo = false; | ||||
|     let mut proximity = false; | ||||
|     let sort = false; | ||||
|     let attribute = false; | ||||
|     let exactness = false; | ||||
|     let mut asc = HashSet::new(); | ||||
|     let mut desc = HashSet::new(); | ||||
|  | ||||
|     let mut ranking_rules: Vec<Box<dyn RankingRule<QueryGraph>>> = vec![]; | ||||
|     let settings_ranking_rules = ctx.index.criteria(ctx.txn)?; | ||||
|     for rr in settings_ranking_rules { | ||||
|         // Add Words before any of: typo, proximity, attribute, exactness | ||||
|         match rr { | ||||
|             crate::Criterion::Typo | ||||
|             | crate::Criterion::Attribute | ||||
|             | crate::Criterion::Proximity | ||||
|             | crate::Criterion::Exactness => { | ||||
|                 if !words { | ||||
|                     ranking_rules.push(Box::new(Words::new(terms_matching_strategy))); | ||||
|                     words = true; | ||||
|                 } | ||||
|             } | ||||
|             _ => {} | ||||
|         } | ||||
|         match rr { | ||||
|             crate::Criterion::Words => { | ||||
|                 if words { | ||||
|                     continue; | ||||
|                 } | ||||
|                 ranking_rules.push(Box::new(Words::new(terms_matching_strategy))); | ||||
|                 words = true; | ||||
|             } | ||||
|             crate::Criterion::Typo => { | ||||
|                 if typo { | ||||
|                     continue; | ||||
|                 } | ||||
|                 typo = true; | ||||
|                 ranking_rules.push(Box::<Typo>::default()); | ||||
|             } | ||||
|             crate::Criterion::Proximity => { | ||||
|                 if proximity { | ||||
|                     continue; | ||||
|                 } | ||||
|                 proximity = true; | ||||
|                 ranking_rules.push(Box::<Proximity>::default()); | ||||
|             } | ||||
|             crate::Criterion::Attribute => { | ||||
|                 if attribute { | ||||
|                     continue; | ||||
|                 } | ||||
|                 todo!(); | ||||
|                 // attribute = false; | ||||
|             } | ||||
|             crate::Criterion::Sort => { | ||||
|                 if sort { | ||||
|                     continue; | ||||
|                 } | ||||
|                 todo!(); | ||||
|                 // sort = false; | ||||
|             } | ||||
|             crate::Criterion::Exactness => { | ||||
|                 if exactness { | ||||
|                     continue; | ||||
|                 } | ||||
|                 todo!(); | ||||
|                 // exactness = false; | ||||
|             } | ||||
|             crate::Criterion::Asc(field) => { | ||||
|                 if asc.contains(&field) { | ||||
|                     continue; | ||||
|                 } | ||||
|                 asc.insert(field); | ||||
|                 todo!(); | ||||
|             } | ||||
|             crate::Criterion::Desc(field) => { | ||||
|                 if desc.contains(&field) { | ||||
|                     continue; | ||||
|                 } | ||||
|                 desc.insert(field); | ||||
|                 todo!(); | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|     Ok(ranking_rules) | ||||
| } | ||||
|  | ||||
| #[allow(clippy::too_many_arguments)] | ||||
| pub fn execute_search<'search>( | ||||
|     ctx: &mut SearchContext<'search>, | ||||
|     query: &str, | ||||
|     terms_matching_strategy: TermsMatchingStrategy, | ||||
|     filters: Option<Filter>, | ||||
|     from: usize, | ||||
|     length: usize, | ||||
|     logger: &mut dyn SearchLogger<QueryGraph>, | ||||
|     placeholder_search_logger: &mut dyn SearchLogger<PlaceholderQuery>, | ||||
|     query_graph_logger: &mut dyn SearchLogger<QueryGraph>, | ||||
| ) -> Result<Vec<u32>> { | ||||
|     assert!(!query.is_empty()); | ||||
|     let query_terms = located_query_terms_from_string(ctx, query.tokenize(), None)?; | ||||
|     let graph = QueryGraph::from_query(ctx, query_terms)?; | ||||
|  | ||||
|     logger.initial_query(&graph); | ||||
|  | ||||
|     let universe = if let Some(filters) = filters { | ||||
|         filters.evaluate(ctx.txn, ctx.index)? | ||||
|     } else { | ||||
|         ctx.index.documents_ids(ctx.txn)? | ||||
|     }; | ||||
|  | ||||
|     let universe = resolve_maximally_reduced_query_graph( | ||||
|         ctx, | ||||
|         &universe, | ||||
|         &graph, | ||||
|         TermsMatchingStrategy::Last, | ||||
|         logger, | ||||
|     )?; | ||||
|     // TODO: create ranking rules here | ||||
|     // TODO: other way to tell whether it is a placeholder search | ||||
|     // This way of doing things is not correct because if someone searches | ||||
|     // for a word that does not appear in any document, the word will be removed | ||||
|     // from the graph and thus its number of nodes will be == 2 | ||||
|     // But in that case, we should return no results. | ||||
|     // | ||||
|     // The search is a placeholder search only if there are no tokens? | ||||
|     if graph.nodes.len() > 2 { | ||||
|         let universe = resolve_maximally_reduced_query_graph( | ||||
|             ctx, | ||||
|             &universe, | ||||
|             &graph, | ||||
|             terms_matching_strategy, | ||||
|             query_graph_logger, | ||||
|         )?; | ||||
|  | ||||
|     logger.initial_universe(&universe); | ||||
|  | ||||
|     let words = &mut Words::new(TermsMatchingStrategy::Last); | ||||
|     // let sort = &mut Sort::new(index, txn, "release_date".to_owned(), true)?; | ||||
|     let proximity = &mut GraphBasedRankingRule::<ProximityGraph>::new("proximity".to_owned()); | ||||
|     let typo = &mut GraphBasedRankingRule::<TypoGraph>::new("typo".to_owned()); | ||||
|     // TODO: ranking rules given as argument | ||||
|     let ranking_rules: Vec<&mut dyn RankingRule<'search, QueryGraph>> = | ||||
|         vec![words, typo, proximity /*sort*/]; | ||||
|  | ||||
|     bucket_sort(ctx, ranking_rules, &graph, &universe, from, length, logger) | ||||
|         let ranking_rules = get_ranking_rules_for_query_graph_search(ctx, terms_matching_strategy)?; | ||||
|         bucket_sort(ctx, ranking_rules, &graph, &universe, from, length, query_graph_logger) | ||||
|     } else { | ||||
|         let ranking_rules = get_ranking_rules_for_placeholder_search(ctx)?; | ||||
|         bucket_sort( | ||||
|             ctx, | ||||
|             ranking_rules, | ||||
|             &PlaceholderQuery, | ||||
|             &universe, | ||||
|             from, | ||||
|             length, | ||||
|             placeholder_search_logger, | ||||
|         ) | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[cfg(test)] | ||||
| @@ -182,10 +308,11 @@ mod tests { | ||||
|         let results = execute_search( | ||||
|             &mut ctx, | ||||
|             "zero config", | ||||
|             TermsMatchingStrategy::Last, | ||||
|             None, | ||||
|             0, | ||||
|             20, | ||||
|             // &mut DefaultSearchLogger, | ||||
|             &mut DefaultSearchLogger, | ||||
|             &mut logger, | ||||
|         ) | ||||
|         .unwrap(); | ||||
| @@ -279,10 +406,11 @@ mod tests { | ||||
|         let results = execute_search( | ||||
|             &mut ctx, | ||||
|             "releases from poison by the government", | ||||
|             TermsMatchingStrategy::Last, | ||||
|             None, | ||||
|             0, | ||||
|             20, | ||||
|             // &mut DefaultSearchLogger, | ||||
|             &mut DefaultSearchLogger, | ||||
|             &mut logger, | ||||
|         ) | ||||
|         .unwrap(); | ||||
|   | ||||
| @@ -1,4 +1,4 @@ | ||||
| use super::paths_map::PathSet; | ||||
| use super::path_set::PathSet; | ||||
| use crate::search::new::small_bitmap::SmallBitmap; | ||||
|  | ||||
| /// A cache which stores sufficient conditions for a path | ||||
| @@ -10,7 +10,7 @@ pub struct EmptyPathsCache { | ||||
|     pub empty_edges: SmallBitmap, | ||||
|     /// A set of path prefixes that resolve to no documents. | ||||
|     pub empty_prefixes: PathSet, | ||||
|     /// A set of empty couple of edge indexes that resolve to no documents. | ||||
|     /// A set of empty couples of edge indexes that resolve to no documents. | ||||
|     pub empty_couple_edges: Vec<SmallBitmap>, | ||||
| } | ||||
| impl EmptyPathsCache { | ||||
|   | ||||
| @@ -9,7 +9,7 @@ mod build; | ||||
| mod cheapest_paths; | ||||
| mod edge_docids_cache; | ||||
| mod empty_paths_cache; | ||||
| mod paths_map; | ||||
| mod path_set; | ||||
|  | ||||
| /// Implementation of the `proximity` ranking rule | ||||
| mod proximity; | ||||
|   | ||||
| @@ -70,16 +70,15 @@ pub struct RankingRuleOutput<Q> { | ||||
|  | ||||
| pub fn bucket_sort<'search, Q: RankingRuleQueryTrait>( | ||||
|     ctx: &mut SearchContext<'search>, | ||||
|     mut ranking_rules: Vec<&mut dyn RankingRule<'search, Q>>, | ||||
|     query_graph: &Q, | ||||
|     mut ranking_rules: Vec<Box<dyn RankingRule<'search, Q>>>, | ||||
|     query: &Q, | ||||
|     universe: &RoaringBitmap, | ||||
|     from: usize, | ||||
|     length: usize, | ||||
|     logger: &mut dyn SearchLogger<Q>, | ||||
| ) -> Result<Vec<u32>> { | ||||
|     logger.initial_query(query_graph); | ||||
|  | ||||
|     logger.ranking_rules(&ranking_rules); | ||||
|     logger.initial_universe(universe); | ||||
|  | ||||
|     let distinct_fid = if let Some(field) = ctx.index.distinct_field(ctx.txn)? { | ||||
|         ctx.index.fields_ids_map(ctx.txn)?.id(field) | ||||
| @@ -92,8 +91,8 @@ pub fn bucket_sort<'search, Q: RankingRuleQueryTrait>( | ||||
|     } | ||||
|  | ||||
|     let ranking_rules_len = ranking_rules.len(); | ||||
|     logger.start_iteration_ranking_rule(0, ranking_rules[0], query_graph, universe); | ||||
|     ranking_rules[0].start_iteration(ctx, logger, universe, query_graph)?; | ||||
|     logger.start_iteration_ranking_rule(0, ranking_rules[0].as_ref(), query, universe); | ||||
|     ranking_rules[0].start_iteration(ctx, logger, universe, query)?; | ||||
|  | ||||
|     let mut ranking_rule_universes: Vec<RoaringBitmap> = | ||||
|         vec![RoaringBitmap::default(); ranking_rules_len]; | ||||
| @@ -109,7 +108,7 @@ pub fn bucket_sort<'search, Q: RankingRuleQueryTrait>( | ||||
|             assert!(ranking_rule_universes[cur_ranking_rule_index].is_empty()); | ||||
|             logger.end_iteration_ranking_rule( | ||||
|                 cur_ranking_rule_index, | ||||
|                 ranking_rules[cur_ranking_rule_index], | ||||
|                 ranking_rules[cur_ranking_rule_index].as_ref(), | ||||
|                 &ranking_rule_universes[cur_ranking_rule_index], | ||||
|             ); | ||||
|             ranking_rule_universes[cur_ranking_rule_index].clear(); | ||||
| @@ -149,7 +148,7 @@ pub fn bucket_sort<'search, Q: RankingRuleQueryTrait>( | ||||
|                         // then just skip the bucket | ||||
|                         logger.skip_bucket_ranking_rule( | ||||
|                             cur_ranking_rule_index, | ||||
|                             ranking_rules[cur_ranking_rule_index], | ||||
|                             ranking_rules[cur_ranking_rule_index].as_ref(), | ||||
|                             &candidates, | ||||
|                         ); | ||||
|                     } else { | ||||
| @@ -159,7 +158,7 @@ pub fn bucket_sort<'search, Q: RankingRuleQueryTrait>( | ||||
|                             all_candidates.split_at(from - cur_offset); | ||||
|                         logger.skip_bucket_ranking_rule( | ||||
|                             cur_ranking_rule_index, | ||||
|                             ranking_rules[cur_ranking_rule_index], | ||||
|                             ranking_rules[cur_ranking_rule_index].as_ref(), | ||||
|                             &skipped_candidates.into_iter().collect(), | ||||
|                         ); | ||||
|                         let candidates = candidates | ||||
| @@ -186,7 +185,6 @@ pub fn bucket_sort<'search, Q: RankingRuleQueryTrait>( | ||||
|         // anything, just extend the results and go back to the parent ranking rule. | ||||
|         if ranking_rule_universes[cur_ranking_rule_index].len() <= 1 { | ||||
|             maybe_add_to_results!(&ranking_rule_universes[cur_ranking_rule_index]); | ||||
|             ranking_rule_universes[cur_ranking_rule_index].clear(); | ||||
|             back!(); | ||||
|             continue; | ||||
|         } | ||||
| @@ -198,7 +196,7 @@ pub fn bucket_sort<'search, Q: RankingRuleQueryTrait>( | ||||
|  | ||||
|         logger.next_bucket_ranking_rule( | ||||
|             cur_ranking_rule_index, | ||||
|             ranking_rules[cur_ranking_rule_index], | ||||
|             ranking_rules[cur_ranking_rule_index].as_ref(), | ||||
|             &ranking_rule_universes[cur_ranking_rule_index], | ||||
|             &next_bucket.candidates, | ||||
|         ); | ||||
| @@ -218,7 +216,7 @@ pub fn bucket_sort<'search, Q: RankingRuleQueryTrait>( | ||||
|         ranking_rule_universes[cur_ranking_rule_index] = next_bucket.candidates.clone(); | ||||
|         logger.start_iteration_ranking_rule( | ||||
|             cur_ranking_rule_index, | ||||
|             ranking_rules[cur_ranking_rule_index], | ||||
|             ranking_rules[cur_ranking_rule_index].as_ref(), | ||||
|             &next_bucket.query, | ||||
|             &ranking_rule_universes[cur_ranking_rule_index], | ||||
|         ); | ||||
|   | ||||
		Reference in New Issue
	
	Block a user