mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-07-26 00:01:00 +00:00
Merge pull request #5725 from meilisearch/fix-threshold-overcounting-bug
Fix Total Hits being wrong when rankingScoreThreshold is used
This commit is contained in:
@ -1051,6 +1051,7 @@ pub fn prepare_search<'t>(
|
||||
.unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS);
|
||||
|
||||
search.exhaustive_number_hits(is_finite_pagination);
|
||||
search.max_total_hits(Some(max_total_hits));
|
||||
search.scoring_strategy(
|
||||
if query.show_ranking_score
|
||||
|| query.show_ranking_score_details
|
||||
|
@ -1,6 +1,7 @@
|
||||
use super::shared_index_with_documents;
|
||||
use crate::common::Server;
|
||||
use crate::json;
|
||||
use meili_snap::{json_string, snapshot};
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn default_search_should_return_estimated_total_hit() {
|
||||
@ -133,3 +134,61 @@ async fn ensure_placeholder_search_hit_count_valid() {
|
||||
.await;
|
||||
}
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn test_issue_5274() {
|
||||
let server = Server::new_shared();
|
||||
let index = server.unique_index();
|
||||
|
||||
let documents = json!([
|
||||
{
|
||||
"id": 1,
|
||||
"title": "Document 1",
|
||||
"content": "This is the first."
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"title": "Document 2",
|
||||
"content": "This is the second doc."
|
||||
}
|
||||
]);
|
||||
let (task, _code) = index.add_documents(documents, None).await;
|
||||
server.wait_task(task.uid()).await.succeeded();
|
||||
|
||||
// Find out the lowest ranking score among the documents
|
||||
let (rep, _status) = index
|
||||
.search_post(json!({"q": "doc", "page": 1, "hitsPerPage": 2, "showRankingScore": true}))
|
||||
.await;
|
||||
let hits = rep["hits"].as_array().expect("Missing hits array");
|
||||
let second_hit = hits.get(1).expect("Missing second hit");
|
||||
let ranking_score = second_hit
|
||||
.get("_rankingScore")
|
||||
.expect("Missing _rankingScore field")
|
||||
.as_f64()
|
||||
.expect("Expected _rankingScore to be a f64");
|
||||
|
||||
// Search with a ranking score threshold just above and expect to be a single hit
|
||||
let (rep, _status) = index
|
||||
.search_post(json!({"q": "doc", "page": 1, "hitsPerPage": 1, "rankingScoreThreshold": ranking_score + 0.0001}))
|
||||
.await;
|
||||
|
||||
snapshot!(json_string!(rep, {
|
||||
".processingTimeMs" => "[ignored]",
|
||||
}), @r#"
|
||||
{
|
||||
"hits": [
|
||||
{
|
||||
"id": 2,
|
||||
"title": "Document 2",
|
||||
"content": "This is the second doc."
|
||||
}
|
||||
],
|
||||
"query": "doc",
|
||||
"processingTimeMs": "[ignored]",
|
||||
"hitsPerPage": 1,
|
||||
"page": 1,
|
||||
"totalPages": 1,
|
||||
"totalHits": 1
|
||||
}
|
||||
"#);
|
||||
}
|
||||
|
@ -210,6 +210,7 @@ impl Search<'_> {
|
||||
scoring_strategy: ScoringStrategy::Detailed,
|
||||
words_limit: self.words_limit,
|
||||
exhaustive_number_hits: self.exhaustive_number_hits,
|
||||
max_total_hits: self.max_total_hits,
|
||||
rtxn: self.rtxn,
|
||||
index: self.index,
|
||||
semantic: self.semantic.clone(),
|
||||
|
@ -52,6 +52,7 @@ pub struct Search<'a> {
|
||||
scoring_strategy: ScoringStrategy,
|
||||
words_limit: usize,
|
||||
exhaustive_number_hits: bool,
|
||||
max_total_hits: Option<usize>,
|
||||
rtxn: &'a heed::RoTxn<'a>,
|
||||
index: &'a Index,
|
||||
semantic: Option<SemanticSearch>,
|
||||
@ -74,6 +75,7 @@ impl<'a> Search<'a> {
|
||||
terms_matching_strategy: TermsMatchingStrategy::default(),
|
||||
scoring_strategy: Default::default(),
|
||||
exhaustive_number_hits: false,
|
||||
max_total_hits: None,
|
||||
words_limit: 10,
|
||||
rtxn,
|
||||
index,
|
||||
@ -165,6 +167,11 @@ impl<'a> Search<'a> {
|
||||
self
|
||||
}
|
||||
|
||||
pub fn max_total_hits(&mut self, max_total_hits: Option<usize>) -> &mut Search<'a> {
|
||||
self.max_total_hits = max_total_hits;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn time_budget(&mut self, time_budget: TimeBudget) -> &mut Search<'a> {
|
||||
self.time_budget = time_budget;
|
||||
self
|
||||
@ -243,6 +250,8 @@ impl<'a> Search<'a> {
|
||||
&mut ctx,
|
||||
vector,
|
||||
self.scoring_strategy,
|
||||
self.exhaustive_number_hits,
|
||||
self.max_total_hits,
|
||||
universe,
|
||||
&self.sort_criteria,
|
||||
&self.distinct,
|
||||
@ -261,6 +270,7 @@ impl<'a> Search<'a> {
|
||||
self.terms_matching_strategy,
|
||||
self.scoring_strategy,
|
||||
self.exhaustive_number_hits,
|
||||
self.max_total_hits,
|
||||
universe,
|
||||
&self.sort_criteria,
|
||||
&self.distinct,
|
||||
@ -314,6 +324,7 @@ impl fmt::Debug for Search<'_> {
|
||||
scoring_strategy,
|
||||
words_limit,
|
||||
exhaustive_number_hits,
|
||||
max_total_hits,
|
||||
rtxn: _,
|
||||
index: _,
|
||||
semantic,
|
||||
@ -333,6 +344,7 @@ impl fmt::Debug for Search<'_> {
|
||||
.field("terms_matching_strategy", terms_matching_strategy)
|
||||
.field("scoring_strategy", scoring_strategy)
|
||||
.field("exhaustive_number_hits", exhaustive_number_hits)
|
||||
.field("max_total_hits", max_total_hits)
|
||||
.field("words_limit", words_limit)
|
||||
.field(
|
||||
"semantic.embedder_name",
|
||||
|
@ -32,6 +32,8 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
|
||||
logger: &mut dyn SearchLogger<Q>,
|
||||
time_budget: TimeBudget,
|
||||
ranking_score_threshold: Option<f64>,
|
||||
exhaustive_number_hits: bool,
|
||||
max_total_hits: Option<usize>,
|
||||
) -> Result<BucketSortOutput> {
|
||||
logger.initial_query(query);
|
||||
logger.ranking_rules(&ranking_rules);
|
||||
@ -159,7 +161,13 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
|
||||
};
|
||||
}
|
||||
|
||||
while valid_docids.len() < length {
|
||||
let max_len_to_evaluate =
|
||||
match (max_total_hits, exhaustive_number_hits && ranking_score_threshold.is_some()) {
|
||||
(Some(max_total_hits), true) => max_total_hits,
|
||||
_ => length,
|
||||
};
|
||||
|
||||
while valid_docids.len() < max_len_to_evaluate {
|
||||
if time_budget.exceeded() {
|
||||
loop {
|
||||
let bucket = std::mem::take(&mut ranking_rule_universes[cur_ranking_rule_index]);
|
||||
|
@ -510,6 +510,7 @@ mod tests {
|
||||
crate::TermsMatchingStrategy::default(),
|
||||
crate::score_details::ScoringStrategy::Skip,
|
||||
false,
|
||||
None,
|
||||
universe,
|
||||
&None,
|
||||
&None,
|
||||
|
@ -626,6 +626,8 @@ pub fn execute_vector_search(
|
||||
ctx: &mut SearchContext<'_>,
|
||||
vector: &[f32],
|
||||
scoring_strategy: ScoringStrategy,
|
||||
exhaustive_number_hits: bool,
|
||||
max_total_hits: Option<usize>,
|
||||
universe: RoaringBitmap,
|
||||
sort_criteria: &Option<Vec<AscDesc>>,
|
||||
distinct: &Option<String>,
|
||||
@ -669,6 +671,8 @@ pub fn execute_vector_search(
|
||||
placeholder_search_logger,
|
||||
time_budget,
|
||||
ranking_score_threshold,
|
||||
exhaustive_number_hits,
|
||||
max_total_hits,
|
||||
)?;
|
||||
|
||||
Ok(PartialSearchResult {
|
||||
@ -689,6 +693,7 @@ pub fn execute_search(
|
||||
terms_matching_strategy: TermsMatchingStrategy,
|
||||
scoring_strategy: ScoringStrategy,
|
||||
exhaustive_number_hits: bool,
|
||||
max_total_hits: Option<usize>,
|
||||
mut universe: RoaringBitmap,
|
||||
sort_criteria: &Option<Vec<AscDesc>>,
|
||||
distinct: &Option<String>,
|
||||
@ -825,6 +830,8 @@ pub fn execute_search(
|
||||
query_graph_logger,
|
||||
time_budget,
|
||||
ranking_score_threshold,
|
||||
exhaustive_number_hits,
|
||||
max_total_hits,
|
||||
)?
|
||||
} else {
|
||||
let ranking_rules =
|
||||
@ -841,6 +848,8 @@ pub fn execute_search(
|
||||
placeholder_search_logger,
|
||||
time_budget,
|
||||
ranking_score_threshold,
|
||||
exhaustive_number_hits,
|
||||
max_total_hits,
|
||||
)?
|
||||
};
|
||||
|
||||
|
Reference in New Issue
Block a user