mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-25 13:06:27 +00:00 
			
		
		
		
	Merge pull request #5725 from meilisearch/fix-threshold-overcounting-bug
Fix Total Hits being wrong when rankingScoreThreshold is used
This commit is contained in:
		| @@ -1051,6 +1051,7 @@ pub fn prepare_search<'t>( | |||||||
|         .unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS); |         .unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS); | ||||||
|  |  | ||||||
|     search.exhaustive_number_hits(is_finite_pagination); |     search.exhaustive_number_hits(is_finite_pagination); | ||||||
|  |     search.max_total_hits(Some(max_total_hits)); | ||||||
|     search.scoring_strategy( |     search.scoring_strategy( | ||||||
|         if query.show_ranking_score |         if query.show_ranking_score | ||||||
|             || query.show_ranking_score_details |             || query.show_ranking_score_details | ||||||
|   | |||||||
| @@ -1,6 +1,7 @@ | |||||||
| use super::shared_index_with_documents; | use super::shared_index_with_documents; | ||||||
| use crate::common::Server; | use crate::common::Server; | ||||||
| use crate::json; | use crate::json; | ||||||
|  | use meili_snap::{json_string, snapshot}; | ||||||
|  |  | ||||||
| #[actix_rt::test] | #[actix_rt::test] | ||||||
| async fn default_search_should_return_estimated_total_hit() { | async fn default_search_should_return_estimated_total_hit() { | ||||||
| @@ -133,3 +134,61 @@ async fn ensure_placeholder_search_hit_count_valid() { | |||||||
|             .await; |             .await; | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
|  | #[actix_rt::test] | ||||||
|  | async fn test_issue_5274() { | ||||||
|  |     let server = Server::new_shared(); | ||||||
|  |     let index = server.unique_index(); | ||||||
|  |  | ||||||
|  |     let documents = json!([ | ||||||
|  |         { | ||||||
|  |             "id": 1, | ||||||
|  |             "title": "Document 1", | ||||||
|  |             "content": "This is the first." | ||||||
|  |         }, | ||||||
|  |         { | ||||||
|  |             "id": 2, | ||||||
|  |             "title": "Document 2", | ||||||
|  |             "content": "This is the second doc." | ||||||
|  |         } | ||||||
|  |     ]); | ||||||
|  |     let (task, _code) = index.add_documents(documents, None).await; | ||||||
|  |     server.wait_task(task.uid()).await.succeeded(); | ||||||
|  |  | ||||||
|  |     // Find out the lowest ranking score among the documents | ||||||
|  |     let (rep, _status) = index | ||||||
|  |         .search_post(json!({"q": "doc", "page": 1, "hitsPerPage": 2, "showRankingScore": true})) | ||||||
|  |         .await; | ||||||
|  |     let hits = rep["hits"].as_array().expect("Missing hits array"); | ||||||
|  |     let second_hit = hits.get(1).expect("Missing second hit"); | ||||||
|  |     let ranking_score = second_hit | ||||||
|  |         .get("_rankingScore") | ||||||
|  |         .expect("Missing _rankingScore field") | ||||||
|  |         .as_f64() | ||||||
|  |         .expect("Expected _rankingScore to be a f64"); | ||||||
|  |  | ||||||
|  |     // Search with a ranking score threshold just above and expect to be a single hit | ||||||
|  |     let (rep, _status) = index | ||||||
|  |         .search_post(json!({"q": "doc", "page": 1, "hitsPerPage": 1, "rankingScoreThreshold": ranking_score + 0.0001})) | ||||||
|  |         .await; | ||||||
|  |  | ||||||
|  |     snapshot!(json_string!(rep, { | ||||||
|  |         ".processingTimeMs" => "[ignored]", | ||||||
|  |     }), @r#" | ||||||
|  |     { | ||||||
|  |       "hits": [ | ||||||
|  |         { | ||||||
|  |           "id": 2, | ||||||
|  |           "title": "Document 2", | ||||||
|  |           "content": "This is the second doc." | ||||||
|  |         } | ||||||
|  |       ], | ||||||
|  |       "query": "doc", | ||||||
|  |       "processingTimeMs": "[ignored]", | ||||||
|  |       "hitsPerPage": 1, | ||||||
|  |       "page": 1, | ||||||
|  |       "totalPages": 1, | ||||||
|  |       "totalHits": 1 | ||||||
|  |     } | ||||||
|  |     "#); | ||||||
|  | } | ||||||
|   | |||||||
| @@ -210,6 +210,7 @@ impl Search<'_> { | |||||||
|             scoring_strategy: ScoringStrategy::Detailed, |             scoring_strategy: ScoringStrategy::Detailed, | ||||||
|             words_limit: self.words_limit, |             words_limit: self.words_limit, | ||||||
|             exhaustive_number_hits: self.exhaustive_number_hits, |             exhaustive_number_hits: self.exhaustive_number_hits, | ||||||
|  |             max_total_hits: self.max_total_hits, | ||||||
|             rtxn: self.rtxn, |             rtxn: self.rtxn, | ||||||
|             index: self.index, |             index: self.index, | ||||||
|             semantic: self.semantic.clone(), |             semantic: self.semantic.clone(), | ||||||
|   | |||||||
| @@ -52,6 +52,7 @@ pub struct Search<'a> { | |||||||
|     scoring_strategy: ScoringStrategy, |     scoring_strategy: ScoringStrategy, | ||||||
|     words_limit: usize, |     words_limit: usize, | ||||||
|     exhaustive_number_hits: bool, |     exhaustive_number_hits: bool, | ||||||
|  |     max_total_hits: Option<usize>, | ||||||
|     rtxn: &'a heed::RoTxn<'a>, |     rtxn: &'a heed::RoTxn<'a>, | ||||||
|     index: &'a Index, |     index: &'a Index, | ||||||
|     semantic: Option<SemanticSearch>, |     semantic: Option<SemanticSearch>, | ||||||
| @@ -74,6 +75,7 @@ impl<'a> Search<'a> { | |||||||
|             terms_matching_strategy: TermsMatchingStrategy::default(), |             terms_matching_strategy: TermsMatchingStrategy::default(), | ||||||
|             scoring_strategy: Default::default(), |             scoring_strategy: Default::default(), | ||||||
|             exhaustive_number_hits: false, |             exhaustive_number_hits: false, | ||||||
|  |             max_total_hits: None, | ||||||
|             words_limit: 10, |             words_limit: 10, | ||||||
|             rtxn, |             rtxn, | ||||||
|             index, |             index, | ||||||
| @@ -165,6 +167,11 @@ impl<'a> Search<'a> { | |||||||
|         self |         self | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     pub fn max_total_hits(&mut self, max_total_hits: Option<usize>) -> &mut Search<'a> { | ||||||
|  |         self.max_total_hits = max_total_hits; | ||||||
|  |         self | ||||||
|  |     } | ||||||
|  |  | ||||||
|     pub fn time_budget(&mut self, time_budget: TimeBudget) -> &mut Search<'a> { |     pub fn time_budget(&mut self, time_budget: TimeBudget) -> &mut Search<'a> { | ||||||
|         self.time_budget = time_budget; |         self.time_budget = time_budget; | ||||||
|         self |         self | ||||||
| @@ -243,6 +250,8 @@ impl<'a> Search<'a> { | |||||||
|                 &mut ctx, |                 &mut ctx, | ||||||
|                 vector, |                 vector, | ||||||
|                 self.scoring_strategy, |                 self.scoring_strategy, | ||||||
|  |                 self.exhaustive_number_hits, | ||||||
|  |                 self.max_total_hits, | ||||||
|                 universe, |                 universe, | ||||||
|                 &self.sort_criteria, |                 &self.sort_criteria, | ||||||
|                 &self.distinct, |                 &self.distinct, | ||||||
| @@ -261,6 +270,7 @@ impl<'a> Search<'a> { | |||||||
|                 self.terms_matching_strategy, |                 self.terms_matching_strategy, | ||||||
|                 self.scoring_strategy, |                 self.scoring_strategy, | ||||||
|                 self.exhaustive_number_hits, |                 self.exhaustive_number_hits, | ||||||
|  |                 self.max_total_hits, | ||||||
|                 universe, |                 universe, | ||||||
|                 &self.sort_criteria, |                 &self.sort_criteria, | ||||||
|                 &self.distinct, |                 &self.distinct, | ||||||
| @@ -314,6 +324,7 @@ impl fmt::Debug for Search<'_> { | |||||||
|             scoring_strategy, |             scoring_strategy, | ||||||
|             words_limit, |             words_limit, | ||||||
|             exhaustive_number_hits, |             exhaustive_number_hits, | ||||||
|  |             max_total_hits, | ||||||
|             rtxn: _, |             rtxn: _, | ||||||
|             index: _, |             index: _, | ||||||
|             semantic, |             semantic, | ||||||
| @@ -333,6 +344,7 @@ impl fmt::Debug for Search<'_> { | |||||||
|             .field("terms_matching_strategy", terms_matching_strategy) |             .field("terms_matching_strategy", terms_matching_strategy) | ||||||
|             .field("scoring_strategy", scoring_strategy) |             .field("scoring_strategy", scoring_strategy) | ||||||
|             .field("exhaustive_number_hits", exhaustive_number_hits) |             .field("exhaustive_number_hits", exhaustive_number_hits) | ||||||
|  |             .field("max_total_hits", max_total_hits) | ||||||
|             .field("words_limit", words_limit) |             .field("words_limit", words_limit) | ||||||
|             .field( |             .field( | ||||||
|                 "semantic.embedder_name", |                 "semantic.embedder_name", | ||||||
|   | |||||||
| @@ -32,6 +32,8 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>( | |||||||
|     logger: &mut dyn SearchLogger<Q>, |     logger: &mut dyn SearchLogger<Q>, | ||||||
|     time_budget: TimeBudget, |     time_budget: TimeBudget, | ||||||
|     ranking_score_threshold: Option<f64>, |     ranking_score_threshold: Option<f64>, | ||||||
|  |     exhaustive_number_hits: bool, | ||||||
|  |     max_total_hits: Option<usize>, | ||||||
| ) -> Result<BucketSortOutput> { | ) -> Result<BucketSortOutput> { | ||||||
|     logger.initial_query(query); |     logger.initial_query(query); | ||||||
|     logger.ranking_rules(&ranking_rules); |     logger.ranking_rules(&ranking_rules); | ||||||
| @@ -159,7 +161,13 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>( | |||||||
|         }; |         }; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     while valid_docids.len() < length { |     let max_len_to_evaluate = | ||||||
|  |         match (max_total_hits, exhaustive_number_hits && ranking_score_threshold.is_some()) { | ||||||
|  |             (Some(max_total_hits), true) => max_total_hits, | ||||||
|  |             _ => length, | ||||||
|  |         }; | ||||||
|  |  | ||||||
|  |     while valid_docids.len() < max_len_to_evaluate { | ||||||
|         if time_budget.exceeded() { |         if time_budget.exceeded() { | ||||||
|             loop { |             loop { | ||||||
|                 let bucket = std::mem::take(&mut ranking_rule_universes[cur_ranking_rule_index]); |                 let bucket = std::mem::take(&mut ranking_rule_universes[cur_ranking_rule_index]); | ||||||
|   | |||||||
| @@ -510,6 +510,7 @@ mod tests { | |||||||
|                 crate::TermsMatchingStrategy::default(), |                 crate::TermsMatchingStrategy::default(), | ||||||
|                 crate::score_details::ScoringStrategy::Skip, |                 crate::score_details::ScoringStrategy::Skip, | ||||||
|                 false, |                 false, | ||||||
|  |                 None, | ||||||
|                 universe, |                 universe, | ||||||
|                 &None, |                 &None, | ||||||
|                 &None, |                 &None, | ||||||
|   | |||||||
| @@ -626,6 +626,8 @@ pub fn execute_vector_search( | |||||||
|     ctx: &mut SearchContext<'_>, |     ctx: &mut SearchContext<'_>, | ||||||
|     vector: &[f32], |     vector: &[f32], | ||||||
|     scoring_strategy: ScoringStrategy, |     scoring_strategy: ScoringStrategy, | ||||||
|  |     exhaustive_number_hits: bool, | ||||||
|  |     max_total_hits: Option<usize>, | ||||||
|     universe: RoaringBitmap, |     universe: RoaringBitmap, | ||||||
|     sort_criteria: &Option<Vec<AscDesc>>, |     sort_criteria: &Option<Vec<AscDesc>>, | ||||||
|     distinct: &Option<String>, |     distinct: &Option<String>, | ||||||
| @@ -669,6 +671,8 @@ pub fn execute_vector_search( | |||||||
|         placeholder_search_logger, |         placeholder_search_logger, | ||||||
|         time_budget, |         time_budget, | ||||||
|         ranking_score_threshold, |         ranking_score_threshold, | ||||||
|  |         exhaustive_number_hits, | ||||||
|  |         max_total_hits, | ||||||
|     )?; |     )?; | ||||||
|  |  | ||||||
|     Ok(PartialSearchResult { |     Ok(PartialSearchResult { | ||||||
| @@ -689,6 +693,7 @@ pub fn execute_search( | |||||||
|     terms_matching_strategy: TermsMatchingStrategy, |     terms_matching_strategy: TermsMatchingStrategy, | ||||||
|     scoring_strategy: ScoringStrategy, |     scoring_strategy: ScoringStrategy, | ||||||
|     exhaustive_number_hits: bool, |     exhaustive_number_hits: bool, | ||||||
|  |     max_total_hits: Option<usize>, | ||||||
|     mut universe: RoaringBitmap, |     mut universe: RoaringBitmap, | ||||||
|     sort_criteria: &Option<Vec<AscDesc>>, |     sort_criteria: &Option<Vec<AscDesc>>, | ||||||
|     distinct: &Option<String>, |     distinct: &Option<String>, | ||||||
| @@ -825,6 +830,8 @@ pub fn execute_search( | |||||||
|             query_graph_logger, |             query_graph_logger, | ||||||
|             time_budget, |             time_budget, | ||||||
|             ranking_score_threshold, |             ranking_score_threshold, | ||||||
|  |             exhaustive_number_hits, | ||||||
|  |             max_total_hits, | ||||||
|         )? |         )? | ||||||
|     } else { |     } else { | ||||||
|         let ranking_rules = |         let ranking_rules = | ||||||
| @@ -841,6 +848,8 @@ pub fn execute_search( | |||||||
|             placeholder_search_logger, |             placeholder_search_logger, | ||||||
|             time_budget, |             time_budget, | ||||||
|             ranking_score_threshold, |             ranking_score_threshold, | ||||||
|  |             exhaustive_number_hits, | ||||||
|  |             max_total_hits, | ||||||
|         )? |         )? | ||||||
|     }; |     }; | ||||||
|  |  | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user