From 7ae9a4afee3981ee98bfbde8777b0f03f11388f6 Mon Sep 17 00:00:00 2001 From: Mubelotix Date: Tue, 1 Jul 2025 15:42:43 +0200 Subject: [PATCH 1/4] Add a test for issue #5274 --- crates/meilisearch/tests/search/pagination.rs | 59 +++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/crates/meilisearch/tests/search/pagination.rs b/crates/meilisearch/tests/search/pagination.rs index c0752e7ec..6dd8b3181 100644 --- a/crates/meilisearch/tests/search/pagination.rs +++ b/crates/meilisearch/tests/search/pagination.rs @@ -1,6 +1,7 @@ use super::shared_index_with_documents; use crate::common::Server; use crate::json; +use meili_snap::{json_string, snapshot}; #[actix_rt::test] async fn default_search_should_return_estimated_total_hit() { @@ -133,3 +134,61 @@ async fn ensure_placeholder_search_hit_count_valid() { .await; } } + +#[actix_rt::test] +async fn test_issue_5274() { + let server = Server::new_shared(); + let index = server.unique_index(); + + let documents = json!([ + { + "id": 1, + "title": "Document 1", + "content": "This is the first." + }, + { + "id": 2, + "title": "Document 2", + "content": "This is the second doc." + } + ]); + let (task, _code) = index.add_documents(documents, None).await; + server.wait_task(task.uid()).await.succeeded(); + + // Find out the lowest ranking score among the documents + let (rep, _status) = index + .search_post(json!({"q": "doc", "page": 1, "hitsPerPage": 2, "showRankingScore": true})) + .await; + let hits = rep["hits"].as_array().expect("Missing hits array"); + let second_hit = hits.get(1).expect("Missing second hit"); + let ranking_score = second_hit + .get("_rankingScore") + .expect("Missing _rankingScore field") + .as_f64() + .expect("Expected _rankingScore to be a f64"); + + // Search with a ranking score threshold just above and expect to be a single hit + let (rep, _status) = index + .search_post(json!({"q": "doc", "page": 1, "hitsPerPage": 1, "rankingScoreThreshold": ranking_score + 0.0001})) + .await; + + snapshot!(json_string!(rep, { + ".processingTimeMs" => "[ignored]", + }), @r#" + { + "hits": [ + { + "id": 2, + "title": "Document 2", + "content": "This is the second doc." + } + ], + "query": "doc", + "processingTimeMs": "[ignored]", + "hitsPerPage": 1, + "page": 1, + "totalPages": 1, + "totalHits": 1 + } + "#); +} From dedae94102f7960d22befe0bf68edfba40fea3ee Mon Sep 17 00:00:00 2001 From: Mubelotix Date: Tue, 1 Jul 2025 16:22:25 +0200 Subject: [PATCH 2/4] Fix #5274 --- crates/milli/src/search/mod.rs | 1 + crates/milli/src/search/new/bucket_sort.rs | 5 ++++- crates/milli/src/search/new/mod.rs | 4 ++++ 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/crates/milli/src/search/mod.rs b/crates/milli/src/search/mod.rs index 62183afc3..ecb9af852 100644 --- a/crates/milli/src/search/mod.rs +++ b/crates/milli/src/search/mod.rs @@ -236,6 +236,7 @@ impl<'a> Search<'a> { &mut ctx, vector, self.scoring_strategy, + self.exhaustive_number_hits, universe, &self.sort_criteria, &self.distinct, diff --git a/crates/milli/src/search/new/bucket_sort.rs b/crates/milli/src/search/new/bucket_sort.rs index 3c26cad5c..f4fd62825 100644 --- a/crates/milli/src/search/new/bucket_sort.rs +++ b/crates/milli/src/search/new/bucket_sort.rs @@ -32,6 +32,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>( logger: &mut dyn SearchLogger, time_budget: TimeBudget, ranking_score_threshold: Option, + exhaustive_number_hits: bool, ) -> Result { logger.initial_query(query); logger.ranking_rules(&ranking_rules); @@ -159,7 +160,9 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>( }; } - while valid_docids.len() < length { + while valid_docids.len() < length + || (exhaustive_number_hits && ranking_score_threshold.is_some()) + { if time_budget.exceeded() { loop { let bucket = std::mem::take(&mut ranking_rule_universes[cur_ranking_rule_index]); diff --git a/crates/milli/src/search/new/mod.rs b/crates/milli/src/search/new/mod.rs index a65b4076b..2c6fe5c3c 100644 --- a/crates/milli/src/search/new/mod.rs +++ b/crates/milli/src/search/new/mod.rs @@ -626,6 +626,7 @@ pub fn execute_vector_search( ctx: &mut SearchContext<'_>, vector: &[f32], scoring_strategy: ScoringStrategy, + exhaustive_number_hits: bool, universe: RoaringBitmap, sort_criteria: &Option>, distinct: &Option, @@ -669,6 +670,7 @@ pub fn execute_vector_search( placeholder_search_logger, time_budget, ranking_score_threshold, + exhaustive_number_hits, )?; Ok(PartialSearchResult { @@ -825,6 +827,7 @@ pub fn execute_search( query_graph_logger, time_budget, ranking_score_threshold, + exhaustive_number_hits, )? } else { let ranking_rules = @@ -841,6 +844,7 @@ pub fn execute_search( placeholder_search_logger, time_budget, ranking_score_threshold, + exhaustive_number_hits, )? }; From 600178c5abb02d493937597ba12fa31419c933ab Mon Sep 17 00:00:00 2001 From: Mubelotix Date: Tue, 1 Jul 2025 18:33:09 +0200 Subject: [PATCH 3/4] Still limit to max hits --- crates/meilisearch/src/search/mod.rs | 1 + crates/milli/src/search/hybrid.rs | 1 + crates/milli/src/search/mod.rs | 11 +++++++++++ crates/milli/src/search/new/bucket_sort.rs | 6 +++++- crates/milli/src/search/new/matches/mod.rs | 1 + crates/milli/src/search/new/mod.rs | 5 +++++ 6 files changed, 24 insertions(+), 1 deletion(-) diff --git a/crates/meilisearch/src/search/mod.rs b/crates/meilisearch/src/search/mod.rs index 5e543c53f..e1cfc542b 100644 --- a/crates/meilisearch/src/search/mod.rs +++ b/crates/meilisearch/src/search/mod.rs @@ -1020,6 +1020,7 @@ pub fn prepare_search<'t>( .unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS); search.exhaustive_number_hits(is_finite_pagination); + search.max_total_hits(Some(max_total_hits)); search.scoring_strategy( if query.show_ranking_score || query.show_ranking_score_details diff --git a/crates/milli/src/search/hybrid.rs b/crates/milli/src/search/hybrid.rs index b63f6288f..5fc228807 100644 --- a/crates/milli/src/search/hybrid.rs +++ b/crates/milli/src/search/hybrid.rs @@ -209,6 +209,7 @@ impl Search<'_> { scoring_strategy: ScoringStrategy::Detailed, words_limit: self.words_limit, exhaustive_number_hits: self.exhaustive_number_hits, + max_total_hits: self.max_total_hits, rtxn: self.rtxn, index: self.index, semantic: self.semantic.clone(), diff --git a/crates/milli/src/search/mod.rs b/crates/milli/src/search/mod.rs index ecb9af852..2192ea9fd 100644 --- a/crates/milli/src/search/mod.rs +++ b/crates/milli/src/search/mod.rs @@ -51,6 +51,7 @@ pub struct Search<'a> { scoring_strategy: ScoringStrategy, words_limit: usize, exhaustive_number_hits: bool, + max_total_hits: Option, rtxn: &'a heed::RoTxn<'a>, index: &'a Index, semantic: Option, @@ -73,6 +74,7 @@ impl<'a> Search<'a> { terms_matching_strategy: TermsMatchingStrategy::default(), scoring_strategy: Default::default(), exhaustive_number_hits: false, + max_total_hits: None, words_limit: 10, rtxn, index, @@ -163,6 +165,11 @@ impl<'a> Search<'a> { self } + pub fn max_total_hits(&mut self, max_total_hits: Option) -> &mut Search<'a> { + self.max_total_hits = max_total_hits; + self + } + pub fn time_budget(&mut self, time_budget: TimeBudget) -> &mut Search<'a> { self.time_budget = time_budget; self @@ -237,6 +244,7 @@ impl<'a> Search<'a> { vector, self.scoring_strategy, self.exhaustive_number_hits, + self.max_total_hits, universe, &self.sort_criteria, &self.distinct, @@ -256,6 +264,7 @@ impl<'a> Search<'a> { self.terms_matching_strategy, self.scoring_strategy, self.exhaustive_number_hits, + self.max_total_hits, universe, &self.sort_criteria, &self.distinct, @@ -309,6 +318,7 @@ impl fmt::Debug for Search<'_> { scoring_strategy, words_limit, exhaustive_number_hits, + max_total_hits, rtxn: _, index: _, semantic, @@ -328,6 +338,7 @@ impl fmt::Debug for Search<'_> { .field("terms_matching_strategy", terms_matching_strategy) .field("scoring_strategy", scoring_strategy) .field("exhaustive_number_hits", exhaustive_number_hits) + .field("max_total_hits", max_total_hits) .field("words_limit", words_limit) .field( "semantic.embedder_name", diff --git a/crates/milli/src/search/new/bucket_sort.rs b/crates/milli/src/search/new/bucket_sort.rs index f4fd62825..298983091 100644 --- a/crates/milli/src/search/new/bucket_sort.rs +++ b/crates/milli/src/search/new/bucket_sort.rs @@ -33,6 +33,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>( time_budget: TimeBudget, ranking_score_threshold: Option, exhaustive_number_hits: bool, + max_total_hits: Option, ) -> Result { logger.initial_query(query); logger.ranking_rules(&ranking_rules); @@ -160,8 +161,11 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>( }; } + let max_total_hits = max_total_hits.unwrap_or(usize::MAX); while valid_docids.len() < length - || (exhaustive_number_hits && ranking_score_threshold.is_some()) + || (exhaustive_number_hits + && ranking_score_threshold.is_some() + && valid_docids.len() < max_total_hits) { if time_budget.exceeded() { loop { diff --git a/crates/milli/src/search/new/matches/mod.rs b/crates/milli/src/search/new/matches/mod.rs index 2d6f2cf17..66f65f5e5 100644 --- a/crates/milli/src/search/new/matches/mod.rs +++ b/crates/milli/src/search/new/matches/mod.rs @@ -510,6 +510,7 @@ mod tests { crate::TermsMatchingStrategy::default(), crate::score_details::ScoringStrategy::Skip, false, + None, universe, &None, &None, diff --git a/crates/milli/src/search/new/mod.rs b/crates/milli/src/search/new/mod.rs index 2c6fe5c3c..047d08202 100644 --- a/crates/milli/src/search/new/mod.rs +++ b/crates/milli/src/search/new/mod.rs @@ -627,6 +627,7 @@ pub fn execute_vector_search( vector: &[f32], scoring_strategy: ScoringStrategy, exhaustive_number_hits: bool, + max_total_hits: Option, universe: RoaringBitmap, sort_criteria: &Option>, distinct: &Option, @@ -671,6 +672,7 @@ pub fn execute_vector_search( time_budget, ranking_score_threshold, exhaustive_number_hits, + max_total_hits, )?; Ok(PartialSearchResult { @@ -691,6 +693,7 @@ pub fn execute_search( terms_matching_strategy: TermsMatchingStrategy, scoring_strategy: ScoringStrategy, exhaustive_number_hits: bool, + max_total_hits: Option, mut universe: RoaringBitmap, sort_criteria: &Option>, distinct: &Option, @@ -828,6 +831,7 @@ pub fn execute_search( time_budget, ranking_score_threshold, exhaustive_number_hits, + max_total_hits, )? } else { let ranking_rules = @@ -845,6 +849,7 @@ pub fn execute_search( time_budget, ranking_score_threshold, exhaustive_number_hits, + max_total_hits, )? }; From d6bd60d569d4a07578c4891542756bd0cf38705a Mon Sep 17 00:00:00 2001 From: Mubelotix Date: Tue, 15 Jul 2025 18:00:37 +0200 Subject: [PATCH 4/4] Apply review suggestions Co-Authored-By: Louis Dureuil --- crates/milli/src/search/new/bucket_sort.rs | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/crates/milli/src/search/new/bucket_sort.rs b/crates/milli/src/search/new/bucket_sort.rs index 298983091..645d36e16 100644 --- a/crates/milli/src/search/new/bucket_sort.rs +++ b/crates/milli/src/search/new/bucket_sort.rs @@ -161,12 +161,13 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>( }; } - let max_total_hits = max_total_hits.unwrap_or(usize::MAX); - while valid_docids.len() < length - || (exhaustive_number_hits - && ranking_score_threshold.is_some() - && valid_docids.len() < max_total_hits) - { + let max_len_to_evaluate = + match (max_total_hits, exhaustive_number_hits && ranking_score_threshold.is_some()) { + (Some(max_total_hits), true) => max_total_hits, + _ => length, + }; + + while valid_docids.len() < max_len_to_evaluate { if time_budget.exceeded() { loop { let bucket = std::mem::take(&mut ranking_rule_universes[cur_ranking_rule_index]);