mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-31 16:06:31 +00:00 
			
		
		
		
	Store the scores for each bucket
Remove optimization where ranking rules are not executed on buckets of a single document when the score needs to be computed
This commit is contained in:
		| @@ -3,14 +3,18 @@ use roaring::RoaringBitmap; | |||||||
| use super::logger::SearchLogger; | use super::logger::SearchLogger; | ||||||
| use super::ranking_rules::{BoxRankingRule, RankingRuleQueryTrait}; | use super::ranking_rules::{BoxRankingRule, RankingRuleQueryTrait}; | ||||||
| use super::SearchContext; | use super::SearchContext; | ||||||
|  | use crate::score_details::{ScoreDetails, ScoringStrategy}; | ||||||
| use crate::search::new::distinct::{apply_distinct_rule, distinct_single_docid, DistinctOutput}; | use crate::search::new::distinct::{apply_distinct_rule, distinct_single_docid, DistinctOutput}; | ||||||
| use crate::Result; | use crate::Result; | ||||||
|  |  | ||||||
| pub struct BucketSortOutput { | pub struct BucketSortOutput { | ||||||
|     pub docids: Vec<u32>, |     pub docids: Vec<u32>, | ||||||
|  |     pub scores: Vec<Vec<ScoreDetails>>, | ||||||
|     pub all_candidates: RoaringBitmap, |     pub all_candidates: RoaringBitmap, | ||||||
| } | } | ||||||
|  |  | ||||||
|  | // TODO: would probably be good to regroup some of these inside of a struct? | ||||||
|  | #[allow(clippy::too_many_arguments)] | ||||||
| pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>( | pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>( | ||||||
|     ctx: &mut SearchContext<'ctx>, |     ctx: &mut SearchContext<'ctx>, | ||||||
|     mut ranking_rules: Vec<BoxRankingRule<'ctx, Q>>, |     mut ranking_rules: Vec<BoxRankingRule<'ctx, Q>>, | ||||||
| @@ -18,6 +22,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>( | |||||||
|     universe: &RoaringBitmap, |     universe: &RoaringBitmap, | ||||||
|     from: usize, |     from: usize, | ||||||
|     length: usize, |     length: usize, | ||||||
|  |     scoring_strategy: ScoringStrategy, | ||||||
|     logger: &mut dyn SearchLogger<Q>, |     logger: &mut dyn SearchLogger<Q>, | ||||||
| ) -> Result<BucketSortOutput> { | ) -> Result<BucketSortOutput> { | ||||||
|     logger.initial_query(query); |     logger.initial_query(query); | ||||||
| @@ -31,7 +36,11 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>( | |||||||
|     }; |     }; | ||||||
|  |  | ||||||
|     if universe.len() < from as u64 { |     if universe.len() < from as u64 { | ||||||
|         return Ok(BucketSortOutput { docids: vec![], all_candidates: universe.clone() }); |         return Ok(BucketSortOutput { | ||||||
|  |             docids: vec![], | ||||||
|  |             scores: vec![], | ||||||
|  |             all_candidates: universe.clone(), | ||||||
|  |         }); | ||||||
|     } |     } | ||||||
|     if ranking_rules.is_empty() { |     if ranking_rules.is_empty() { | ||||||
|         if let Some(distinct_fid) = distinct_fid { |         if let Some(distinct_fid) = distinct_fid { | ||||||
| @@ -49,22 +58,32 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>( | |||||||
|             } |             } | ||||||
|             let mut all_candidates = universe - excluded; |             let mut all_candidates = universe - excluded; | ||||||
|             all_candidates.extend(results.iter().copied()); |             all_candidates.extend(results.iter().copied()); | ||||||
|             return Ok(BucketSortOutput { docids: results, all_candidates }); |             return Ok(BucketSortOutput { | ||||||
|  |                 scores: vec![Default::default(); results.len()], | ||||||
|  |                 docids: results, | ||||||
|  |                 all_candidates, | ||||||
|  |             }); | ||||||
|         } else { |         } else { | ||||||
|             let docids = universe.iter().skip(from).take(length).collect(); |             let docids: Vec<u32> = universe.iter().skip(from).take(length).collect(); | ||||||
|             return Ok(BucketSortOutput { docids, all_candidates: universe.clone() }); |             return Ok(BucketSortOutput { | ||||||
|  |                 scores: vec![Default::default(); docids.len()], | ||||||
|  |                 docids, | ||||||
|  |                 all_candidates: universe.clone(), | ||||||
|  |             }); | ||||||
|         }; |         }; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     let ranking_rules_len = ranking_rules.len(); |     let ranking_rules_len = ranking_rules.len(); | ||||||
|  |  | ||||||
|     logger.start_iteration_ranking_rule(0, ranking_rules[0].as_ref(), query, universe); |     logger.start_iteration_ranking_rule(0, ranking_rules[0].as_ref(), query, universe); | ||||||
|  |  | ||||||
|     ranking_rules[0].start_iteration(ctx, logger, universe, query)?; |     ranking_rules[0].start_iteration(ctx, logger, universe, query)?; | ||||||
|  |  | ||||||
|  |     let mut ranking_rule_scores: Vec<ScoreDetails> = vec![]; | ||||||
|  |  | ||||||
|     let mut ranking_rule_universes: Vec<RoaringBitmap> = |     let mut ranking_rule_universes: Vec<RoaringBitmap> = | ||||||
|         vec![RoaringBitmap::default(); ranking_rules_len]; |         vec![RoaringBitmap::default(); ranking_rules_len]; | ||||||
|     ranking_rule_universes[0] = universe.clone(); |     ranking_rule_universes[0] = universe.clone(); | ||||||
|  |  | ||||||
|     let mut cur_ranking_rule_index = 0; |     let mut cur_ranking_rule_index = 0; | ||||||
|  |  | ||||||
|     /// Finish iterating over the current ranking rule, yielding |     /// Finish iterating over the current ranking rule, yielding | ||||||
| @@ -89,11 +108,15 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>( | |||||||
|             } else { |             } else { | ||||||
|                 cur_ranking_rule_index -= 1; |                 cur_ranking_rule_index -= 1; | ||||||
|             } |             } | ||||||
|  |             if ranking_rule_scores.len() > cur_ranking_rule_index { | ||||||
|  |                 ranking_rule_scores.pop(); | ||||||
|  |             } | ||||||
|         }; |         }; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     let mut all_candidates = universe.clone(); |     let mut all_candidates = universe.clone(); | ||||||
|     let mut valid_docids = vec![]; |     let mut valid_docids = vec![]; | ||||||
|  |     let mut valid_scores = vec![]; | ||||||
|     let mut cur_offset = 0usize; |     let mut cur_offset = 0usize; | ||||||
|  |  | ||||||
|     macro_rules! maybe_add_to_results { |     macro_rules! maybe_add_to_results { | ||||||
| @@ -104,21 +127,26 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>( | |||||||
|                 length, |                 length, | ||||||
|                 logger, |                 logger, | ||||||
|                 &mut valid_docids, |                 &mut valid_docids, | ||||||
|  |                 &mut valid_scores, | ||||||
|                 &mut all_candidates, |                 &mut all_candidates, | ||||||
|                 &mut ranking_rule_universes, |                 &mut ranking_rule_universes, | ||||||
|                 &mut ranking_rules, |                 &mut ranking_rules, | ||||||
|                 cur_ranking_rule_index, |                 cur_ranking_rule_index, | ||||||
|                 &mut cur_offset, |                 &mut cur_offset, | ||||||
|                 distinct_fid, |                 distinct_fid, | ||||||
|  |                 &ranking_rule_scores, | ||||||
|                 $candidates, |                 $candidates, | ||||||
|             )?; |             )?; | ||||||
|         }; |         }; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     while valid_docids.len() < length { |     while valid_docids.len() < length { | ||||||
|         // The universe for this bucket is zero or one element, so we don't need to sort |         // The universe for this bucket is zero, so we don't need to sort | ||||||
|         // anything, just extend the results and go back to the parent ranking rule. |         // anything, just go back to the parent ranking rule. | ||||||
|         if ranking_rule_universes[cur_ranking_rule_index].len() <= 1 { |         if ranking_rule_universes[cur_ranking_rule_index].is_empty() | ||||||
|  |             || (scoring_strategy == ScoringStrategy::Skip | ||||||
|  |                 && ranking_rule_universes[cur_ranking_rule_index].len() == 1) | ||||||
|  |         { | ||||||
|             let bucket = std::mem::take(&mut ranking_rule_universes[cur_ranking_rule_index]); |             let bucket = std::mem::take(&mut ranking_rule_universes[cur_ranking_rule_index]); | ||||||
|             maybe_add_to_results!(bucket); |             maybe_add_to_results!(bucket); | ||||||
|             back!(); |             back!(); | ||||||
| @@ -130,6 +158,8 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>( | |||||||
|             continue; |             continue; | ||||||
|         }; |         }; | ||||||
|  |  | ||||||
|  |         ranking_rule_scores.push(next_bucket.score); | ||||||
|  |  | ||||||
|         logger.next_bucket_ranking_rule( |         logger.next_bucket_ranking_rule( | ||||||
|             cur_ranking_rule_index, |             cur_ranking_rule_index, | ||||||
|             ranking_rules[cur_ranking_rule_index].as_ref(), |             ranking_rules[cur_ranking_rule_index].as_ref(), | ||||||
| @@ -143,10 +173,11 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>( | |||||||
|         ranking_rule_universes[cur_ranking_rule_index] -= &next_bucket.candidates; |         ranking_rule_universes[cur_ranking_rule_index] -= &next_bucket.candidates; | ||||||
|  |  | ||||||
|         if cur_ranking_rule_index == ranking_rules_len - 1 |         if cur_ranking_rule_index == ranking_rules_len - 1 | ||||||
|             || next_bucket.candidates.len() <= 1 |             || (scoring_strategy == ScoringStrategy::Skip && next_bucket.candidates.len() <= 1) | ||||||
|             || cur_offset + (next_bucket.candidates.len() as usize) < from |             || cur_offset + (next_bucket.candidates.len() as usize) < from | ||||||
|         { |         { | ||||||
|             maybe_add_to_results!(next_bucket.candidates); |             maybe_add_to_results!(next_bucket.candidates); | ||||||
|  |             ranking_rule_scores.pop(); | ||||||
|             continue; |             continue; | ||||||
|         } |         } | ||||||
|  |  | ||||||
| @@ -166,7 +197,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>( | |||||||
|         )?; |         )?; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     Ok(BucketSortOutput { docids: valid_docids, all_candidates }) |     Ok(BucketSortOutput { docids: valid_docids, scores: valid_scores, all_candidates }) | ||||||
| } | } | ||||||
|  |  | ||||||
| /// Add the candidates to the results. Take `distinct`, `from`, `length`, and `cur_offset` | /// Add the candidates to the results. Take `distinct`, `from`, `length`, and `cur_offset` | ||||||
| @@ -179,14 +210,18 @@ fn maybe_add_to_results<'ctx, Q: RankingRuleQueryTrait>( | |||||||
|     logger: &mut dyn SearchLogger<Q>, |     logger: &mut dyn SearchLogger<Q>, | ||||||
|  |  | ||||||
|     valid_docids: &mut Vec<u32>, |     valid_docids: &mut Vec<u32>, | ||||||
|  |     valid_scores: &mut Vec<Vec<ScoreDetails>>, | ||||||
|     all_candidates: &mut RoaringBitmap, |     all_candidates: &mut RoaringBitmap, | ||||||
|  |  | ||||||
|     ranking_rule_universes: &mut [RoaringBitmap], |     ranking_rule_universes: &mut [RoaringBitmap], | ||||||
|     ranking_rules: &mut [BoxRankingRule<'ctx, Q>], |     ranking_rules: &mut [BoxRankingRule<'ctx, Q>], | ||||||
|  |  | ||||||
|     cur_ranking_rule_index: usize, |     cur_ranking_rule_index: usize, | ||||||
|  |  | ||||||
|     cur_offset: &mut usize, |     cur_offset: &mut usize, | ||||||
|  |  | ||||||
|     distinct_fid: Option<u16>, |     distinct_fid: Option<u16>, | ||||||
|  |     ranking_rule_scores: &[ScoreDetails], | ||||||
|     candidates: RoaringBitmap, |     candidates: RoaringBitmap, | ||||||
| ) -> Result<()> { | ) -> Result<()> { | ||||||
|     // First apply the distinct rule on the candidates, reducing the universes if necessary |     // First apply the distinct rule on the candidates, reducing the universes if necessary | ||||||
| @@ -231,13 +266,17 @@ fn maybe_add_to_results<'ctx, Q: RankingRuleQueryTrait>( | |||||||
|             let candidates = |             let candidates = | ||||||
|                 candidates.iter().take(length - valid_docids.len()).copied().collect::<Vec<_>>(); |                 candidates.iter().take(length - valid_docids.len()).copied().collect::<Vec<_>>(); | ||||||
|             logger.add_to_results(&candidates); |             logger.add_to_results(&candidates); | ||||||
|             valid_docids.extend(&candidates); |             valid_docids.extend_from_slice(&candidates); | ||||||
|  |             valid_scores | ||||||
|  |                 .extend(std::iter::repeat(ranking_rule_scores.to_owned()).take(candidates.len())); | ||||||
|         } |         } | ||||||
|     } else { |     } else { | ||||||
|         // if we have passed the offset already, add some of the documents (up to the limit) |         // if we have passed the offset already, add some of the documents (up to the limit) | ||||||
|         let candidates = candidates.iter().take(length - valid_docids.len()).collect::<Vec<u32>>(); |         let candidates = candidates.iter().take(length - valid_docids.len()).collect::<Vec<u32>>(); | ||||||
|         logger.add_to_results(&candidates); |         logger.add_to_results(&candidates); | ||||||
|         valid_docids.extend(&candidates); |         valid_docids.extend_from_slice(&candidates); | ||||||
|  |         valid_scores | ||||||
|  |             .extend(std::iter::repeat(ranking_rule_scores.to_owned()).take(candidates.len())); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     *cur_offset += candidates.len() as usize; |     *cur_offset += candidates.len() as usize; | ||||||
|   | |||||||
| @@ -44,6 +44,7 @@ use self::geo_sort::GeoSort; | |||||||
| pub use self::geo_sort::Strategy as GeoSortStrategy; | pub use self::geo_sort::Strategy as GeoSortStrategy; | ||||||
| use self::graph_based_ranking_rule::Words; | use self::graph_based_ranking_rule::Words; | ||||||
| use self::interner::Interned; | use self::interner::Interned; | ||||||
|  | use crate::score_details::{ScoreDetails, ScoringStrategy}; | ||||||
| use crate::search::new::distinct::apply_distinct_rule; | use crate::search::new::distinct::apply_distinct_rule; | ||||||
| use crate::{AscDesc, DocumentId, Filter, Index, Member, Result, TermsMatchingStrategy, UserError}; | use crate::{AscDesc, DocumentId, Filter, Index, Member, Result, TermsMatchingStrategy, UserError}; | ||||||
|  |  | ||||||
| @@ -411,7 +412,16 @@ pub fn execute_search( | |||||||
|         universe = |         universe = | ||||||
|             resolve_universe(ctx, &universe, &graph, terms_matching_strategy, query_graph_logger)?; |             resolve_universe(ctx, &universe, &graph, terms_matching_strategy, query_graph_logger)?; | ||||||
|  |  | ||||||
|         bucket_sort(ctx, ranking_rules, &graph, &universe, from, length, query_graph_logger)? |         bucket_sort( | ||||||
|  |             ctx, | ||||||
|  |             ranking_rules, | ||||||
|  |             &graph, | ||||||
|  |             &universe, | ||||||
|  |             from, | ||||||
|  |             length, | ||||||
|  |             ScoringStrategy::Skip, | ||||||
|  |             query_graph_logger, | ||||||
|  |         )? | ||||||
|     } else { |     } else { | ||||||
|         let ranking_rules = |         let ranking_rules = | ||||||
|             get_ranking_rules_for_placeholder_search(ctx, sort_criteria, geo_strategy)?; |             get_ranking_rules_for_placeholder_search(ctx, sort_criteria, geo_strategy)?; | ||||||
| @@ -422,17 +432,20 @@ pub fn execute_search( | |||||||
|             &universe, |             &universe, | ||||||
|             from, |             from, | ||||||
|             length, |             length, | ||||||
|  |             ScoringStrategy::Skip, | ||||||
|             placeholder_search_logger, |             placeholder_search_logger, | ||||||
|         )? |         )? | ||||||
|     }; |     }; | ||||||
|  |  | ||||||
|     let BucketSortOutput { docids, mut all_candidates } = bucket_sort_output; |     let BucketSortOutput { docids, scores, mut all_candidates } = bucket_sort_output; | ||||||
|  |  | ||||||
|  |     let fields_ids_map = ctx.index.fields_ids_map(ctx.txn)?; | ||||||
|  |  | ||||||
|     // The candidates is the universe unless the exhaustive number of hits |     // The candidates is the universe unless the exhaustive number of hits | ||||||
|     // is requested and a distinct attribute is set. |     // is requested and a distinct attribute is set. | ||||||
|     if exhaustive_number_hits { |     if exhaustive_number_hits { | ||||||
|         if let Some(f) = ctx.index.distinct_field(ctx.txn)? { |         if let Some(f) = ctx.index.distinct_field(ctx.txn)? { | ||||||
|             if let Some(distinct_fid) = ctx.index.fields_ids_map(ctx.txn)?.id(f) { |             if let Some(distinct_fid) = fields_ids_map.id(f) { | ||||||
|                 all_candidates = apply_distinct_rule(ctx, distinct_fid, &all_candidates)?.remaining; |                 all_candidates = apply_distinct_rule(ctx, distinct_fid, &all_candidates)?.remaining; | ||||||
|             } |             } | ||||||
|         } |         } | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user