mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-31 07:56:28 +00:00 
			
		
		
		
	Store the scores for each bucket
Remove optimization where ranking rules are not executed on buckets of a single document when the score needs to be computed
This commit is contained in:
		| @@ -3,14 +3,18 @@ use roaring::RoaringBitmap; | ||||
| use super::logger::SearchLogger; | ||||
| use super::ranking_rules::{BoxRankingRule, RankingRuleQueryTrait}; | ||||
| use super::SearchContext; | ||||
| use crate::score_details::{ScoreDetails, ScoringStrategy}; | ||||
| use crate::search::new::distinct::{apply_distinct_rule, distinct_single_docid, DistinctOutput}; | ||||
| use crate::Result; | ||||
|  | ||||
| pub struct BucketSortOutput { | ||||
|     pub docids: Vec<u32>, | ||||
|     pub scores: Vec<Vec<ScoreDetails>>, | ||||
|     pub all_candidates: RoaringBitmap, | ||||
| } | ||||
|  | ||||
| // TODO: would probably be good to regroup some of these inside of a struct? | ||||
| #[allow(clippy::too_many_arguments)] | ||||
| pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>( | ||||
|     ctx: &mut SearchContext<'ctx>, | ||||
|     mut ranking_rules: Vec<BoxRankingRule<'ctx, Q>>, | ||||
| @@ -18,6 +22,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>( | ||||
|     universe: &RoaringBitmap, | ||||
|     from: usize, | ||||
|     length: usize, | ||||
|     scoring_strategy: ScoringStrategy, | ||||
|     logger: &mut dyn SearchLogger<Q>, | ||||
| ) -> Result<BucketSortOutput> { | ||||
|     logger.initial_query(query); | ||||
| @@ -31,7 +36,11 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>( | ||||
|     }; | ||||
|  | ||||
|     if universe.len() < from as u64 { | ||||
|         return Ok(BucketSortOutput { docids: vec![], all_candidates: universe.clone() }); | ||||
|         return Ok(BucketSortOutput { | ||||
|             docids: vec![], | ||||
|             scores: vec![], | ||||
|             all_candidates: universe.clone(), | ||||
|         }); | ||||
|     } | ||||
|     if ranking_rules.is_empty() { | ||||
|         if let Some(distinct_fid) = distinct_fid { | ||||
| @@ -49,22 +58,32 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>( | ||||
|             } | ||||
|             let mut all_candidates = universe - excluded; | ||||
|             all_candidates.extend(results.iter().copied()); | ||||
|             return Ok(BucketSortOutput { docids: results, all_candidates }); | ||||
|             return Ok(BucketSortOutput { | ||||
|                 scores: vec![Default::default(); results.len()], | ||||
|                 docids: results, | ||||
|                 all_candidates, | ||||
|             }); | ||||
|         } else { | ||||
|             let docids = universe.iter().skip(from).take(length).collect(); | ||||
|             return Ok(BucketSortOutput { docids, all_candidates: universe.clone() }); | ||||
|             let docids: Vec<u32> = universe.iter().skip(from).take(length).collect(); | ||||
|             return Ok(BucketSortOutput { | ||||
|                 scores: vec![Default::default(); docids.len()], | ||||
|                 docids, | ||||
|                 all_candidates: universe.clone(), | ||||
|             }); | ||||
|         }; | ||||
|     } | ||||
|  | ||||
|     let ranking_rules_len = ranking_rules.len(); | ||||
|  | ||||
|     logger.start_iteration_ranking_rule(0, ranking_rules[0].as_ref(), query, universe); | ||||
|  | ||||
|     ranking_rules[0].start_iteration(ctx, logger, universe, query)?; | ||||
|  | ||||
|     let mut ranking_rule_scores: Vec<ScoreDetails> = vec![]; | ||||
|  | ||||
|     let mut ranking_rule_universes: Vec<RoaringBitmap> = | ||||
|         vec![RoaringBitmap::default(); ranking_rules_len]; | ||||
|     ranking_rule_universes[0] = universe.clone(); | ||||
|  | ||||
|     let mut cur_ranking_rule_index = 0; | ||||
|  | ||||
|     /// Finish iterating over the current ranking rule, yielding | ||||
| @@ -89,11 +108,15 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>( | ||||
|             } else { | ||||
|                 cur_ranking_rule_index -= 1; | ||||
|             } | ||||
|             if ranking_rule_scores.len() > cur_ranking_rule_index { | ||||
|                 ranking_rule_scores.pop(); | ||||
|             } | ||||
|         }; | ||||
|     } | ||||
|  | ||||
|     let mut all_candidates = universe.clone(); | ||||
|     let mut valid_docids = vec![]; | ||||
|     let mut valid_scores = vec![]; | ||||
|     let mut cur_offset = 0usize; | ||||
|  | ||||
|     macro_rules! maybe_add_to_results { | ||||
| @@ -104,21 +127,26 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>( | ||||
|                 length, | ||||
|                 logger, | ||||
|                 &mut valid_docids, | ||||
|                 &mut valid_scores, | ||||
|                 &mut all_candidates, | ||||
|                 &mut ranking_rule_universes, | ||||
|                 &mut ranking_rules, | ||||
|                 cur_ranking_rule_index, | ||||
|                 &mut cur_offset, | ||||
|                 distinct_fid, | ||||
|                 &ranking_rule_scores, | ||||
|                 $candidates, | ||||
|             )?; | ||||
|         }; | ||||
|     } | ||||
|  | ||||
|     while valid_docids.len() < length { | ||||
|         // The universe for this bucket is zero or one element, so we don't need to sort | ||||
|         // anything, just extend the results and go back to the parent ranking rule. | ||||
|         if ranking_rule_universes[cur_ranking_rule_index].len() <= 1 { | ||||
|         // The universe for this bucket is zero, so we don't need to sort | ||||
|         // anything, just go back to the parent ranking rule. | ||||
|         if ranking_rule_universes[cur_ranking_rule_index].is_empty() | ||||
|             || (scoring_strategy == ScoringStrategy::Skip | ||||
|                 && ranking_rule_universes[cur_ranking_rule_index].len() == 1) | ||||
|         { | ||||
|             let bucket = std::mem::take(&mut ranking_rule_universes[cur_ranking_rule_index]); | ||||
|             maybe_add_to_results!(bucket); | ||||
|             back!(); | ||||
| @@ -130,6 +158,8 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>( | ||||
|             continue; | ||||
|         }; | ||||
|  | ||||
|         ranking_rule_scores.push(next_bucket.score); | ||||
|  | ||||
|         logger.next_bucket_ranking_rule( | ||||
|             cur_ranking_rule_index, | ||||
|             ranking_rules[cur_ranking_rule_index].as_ref(), | ||||
| @@ -143,10 +173,11 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>( | ||||
|         ranking_rule_universes[cur_ranking_rule_index] -= &next_bucket.candidates; | ||||
|  | ||||
|         if cur_ranking_rule_index == ranking_rules_len - 1 | ||||
|             || next_bucket.candidates.len() <= 1 | ||||
|             || (scoring_strategy == ScoringStrategy::Skip && next_bucket.candidates.len() <= 1) | ||||
|             || cur_offset + (next_bucket.candidates.len() as usize) < from | ||||
|         { | ||||
|             maybe_add_to_results!(next_bucket.candidates); | ||||
|             ranking_rule_scores.pop(); | ||||
|             continue; | ||||
|         } | ||||
|  | ||||
| @@ -166,7 +197,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>( | ||||
|         )?; | ||||
|     } | ||||
|  | ||||
|     Ok(BucketSortOutput { docids: valid_docids, all_candidates }) | ||||
|     Ok(BucketSortOutput { docids: valid_docids, scores: valid_scores, all_candidates }) | ||||
| } | ||||
|  | ||||
| /// Add the candidates to the results. Take `distinct`, `from`, `length`, and `cur_offset` | ||||
| @@ -179,14 +210,18 @@ fn maybe_add_to_results<'ctx, Q: RankingRuleQueryTrait>( | ||||
|     logger: &mut dyn SearchLogger<Q>, | ||||
|  | ||||
|     valid_docids: &mut Vec<u32>, | ||||
|     valid_scores: &mut Vec<Vec<ScoreDetails>>, | ||||
|     all_candidates: &mut RoaringBitmap, | ||||
|  | ||||
|     ranking_rule_universes: &mut [RoaringBitmap], | ||||
|     ranking_rules: &mut [BoxRankingRule<'ctx, Q>], | ||||
|  | ||||
|     cur_ranking_rule_index: usize, | ||||
|  | ||||
|     cur_offset: &mut usize, | ||||
|  | ||||
|     distinct_fid: Option<u16>, | ||||
|     ranking_rule_scores: &[ScoreDetails], | ||||
|     candidates: RoaringBitmap, | ||||
| ) -> Result<()> { | ||||
|     // First apply the distinct rule on the candidates, reducing the universes if necessary | ||||
| @@ -231,13 +266,17 @@ fn maybe_add_to_results<'ctx, Q: RankingRuleQueryTrait>( | ||||
|             let candidates = | ||||
|                 candidates.iter().take(length - valid_docids.len()).copied().collect::<Vec<_>>(); | ||||
|             logger.add_to_results(&candidates); | ||||
|             valid_docids.extend(&candidates); | ||||
|             valid_docids.extend_from_slice(&candidates); | ||||
|             valid_scores | ||||
|                 .extend(std::iter::repeat(ranking_rule_scores.to_owned()).take(candidates.len())); | ||||
|         } | ||||
|     } else { | ||||
|         // if we have passed the offset already, add some of the documents (up to the limit) | ||||
|         let candidates = candidates.iter().take(length - valid_docids.len()).collect::<Vec<u32>>(); | ||||
|         logger.add_to_results(&candidates); | ||||
|         valid_docids.extend(&candidates); | ||||
|         valid_docids.extend_from_slice(&candidates); | ||||
|         valid_scores | ||||
|             .extend(std::iter::repeat(ranking_rule_scores.to_owned()).take(candidates.len())); | ||||
|     } | ||||
|  | ||||
|     *cur_offset += candidates.len() as usize; | ||||
|   | ||||
| @@ -44,6 +44,7 @@ use self::geo_sort::GeoSort; | ||||
| pub use self::geo_sort::Strategy as GeoSortStrategy; | ||||
| use self::graph_based_ranking_rule::Words; | ||||
| use self::interner::Interned; | ||||
| use crate::score_details::{ScoreDetails, ScoringStrategy}; | ||||
| use crate::search::new::distinct::apply_distinct_rule; | ||||
| use crate::{AscDesc, DocumentId, Filter, Index, Member, Result, TermsMatchingStrategy, UserError}; | ||||
|  | ||||
| @@ -411,7 +412,16 @@ pub fn execute_search( | ||||
|         universe = | ||||
|             resolve_universe(ctx, &universe, &graph, terms_matching_strategy, query_graph_logger)?; | ||||
|  | ||||
|         bucket_sort(ctx, ranking_rules, &graph, &universe, from, length, query_graph_logger)? | ||||
|         bucket_sort( | ||||
|             ctx, | ||||
|             ranking_rules, | ||||
|             &graph, | ||||
|             &universe, | ||||
|             from, | ||||
|             length, | ||||
|             ScoringStrategy::Skip, | ||||
|             query_graph_logger, | ||||
|         )? | ||||
|     } else { | ||||
|         let ranking_rules = | ||||
|             get_ranking_rules_for_placeholder_search(ctx, sort_criteria, geo_strategy)?; | ||||
| @@ -422,17 +432,20 @@ pub fn execute_search( | ||||
|             &universe, | ||||
|             from, | ||||
|             length, | ||||
|             ScoringStrategy::Skip, | ||||
|             placeholder_search_logger, | ||||
|         )? | ||||
|     }; | ||||
|  | ||||
|     let BucketSortOutput { docids, mut all_candidates } = bucket_sort_output; | ||||
|     let BucketSortOutput { docids, scores, mut all_candidates } = bucket_sort_output; | ||||
|  | ||||
|     let fields_ids_map = ctx.index.fields_ids_map(ctx.txn)?; | ||||
|  | ||||
|     // The candidates is the universe unless the exhaustive number of hits | ||||
|     // is requested and a distinct attribute is set. | ||||
|     if exhaustive_number_hits { | ||||
|         if let Some(f) = ctx.index.distinct_field(ctx.txn)? { | ||||
|             if let Some(distinct_fid) = ctx.index.fields_ids_map(ctx.txn)?.id(f) { | ||||
|             if let Some(distinct_fid) = fields_ids_map.id(f) { | ||||
|                 all_candidates = apply_distinct_rule(ctx, distinct_fid, &all_candidates)?.remaining; | ||||
|             } | ||||
|         } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user