mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-07-27 00:31:02 +00:00
Merge pull request #5725 from meilisearch/fix-threshold-overcounting-bug
Fix Total Hits being wrong when rankingScoreThreshold is used
This commit is contained in:
@ -1051,6 +1051,7 @@ pub fn prepare_search<'t>(
|
|||||||
.unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS);
|
.unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS);
|
||||||
|
|
||||||
search.exhaustive_number_hits(is_finite_pagination);
|
search.exhaustive_number_hits(is_finite_pagination);
|
||||||
|
search.max_total_hits(Some(max_total_hits));
|
||||||
search.scoring_strategy(
|
search.scoring_strategy(
|
||||||
if query.show_ranking_score
|
if query.show_ranking_score
|
||||||
|| query.show_ranking_score_details
|
|| query.show_ranking_score_details
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
use super::shared_index_with_documents;
|
use super::shared_index_with_documents;
|
||||||
use crate::common::Server;
|
use crate::common::Server;
|
||||||
use crate::json;
|
use crate::json;
|
||||||
|
use meili_snap::{json_string, snapshot};
|
||||||
|
|
||||||
#[actix_rt::test]
|
#[actix_rt::test]
|
||||||
async fn default_search_should_return_estimated_total_hit() {
|
async fn default_search_should_return_estimated_total_hit() {
|
||||||
@ -133,3 +134,61 @@ async fn ensure_placeholder_search_hit_count_valid() {
|
|||||||
.await;
|
.await;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn test_issue_5274() {
|
||||||
|
let server = Server::new_shared();
|
||||||
|
let index = server.unique_index();
|
||||||
|
|
||||||
|
let documents = json!([
|
||||||
|
{
|
||||||
|
"id": 1,
|
||||||
|
"title": "Document 1",
|
||||||
|
"content": "This is the first."
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 2,
|
||||||
|
"title": "Document 2",
|
||||||
|
"content": "This is the second doc."
|
||||||
|
}
|
||||||
|
]);
|
||||||
|
let (task, _code) = index.add_documents(documents, None).await;
|
||||||
|
server.wait_task(task.uid()).await.succeeded();
|
||||||
|
|
||||||
|
// Find out the lowest ranking score among the documents
|
||||||
|
let (rep, _status) = index
|
||||||
|
.search_post(json!({"q": "doc", "page": 1, "hitsPerPage": 2, "showRankingScore": true}))
|
||||||
|
.await;
|
||||||
|
let hits = rep["hits"].as_array().expect("Missing hits array");
|
||||||
|
let second_hit = hits.get(1).expect("Missing second hit");
|
||||||
|
let ranking_score = second_hit
|
||||||
|
.get("_rankingScore")
|
||||||
|
.expect("Missing _rankingScore field")
|
||||||
|
.as_f64()
|
||||||
|
.expect("Expected _rankingScore to be a f64");
|
||||||
|
|
||||||
|
// Search with a ranking score threshold just above and expect to be a single hit
|
||||||
|
let (rep, _status) = index
|
||||||
|
.search_post(json!({"q": "doc", "page": 1, "hitsPerPage": 1, "rankingScoreThreshold": ranking_score + 0.0001}))
|
||||||
|
.await;
|
||||||
|
|
||||||
|
snapshot!(json_string!(rep, {
|
||||||
|
".processingTimeMs" => "[ignored]",
|
||||||
|
}), @r#"
|
||||||
|
{
|
||||||
|
"hits": [
|
||||||
|
{
|
||||||
|
"id": 2,
|
||||||
|
"title": "Document 2",
|
||||||
|
"content": "This is the second doc."
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"query": "doc",
|
||||||
|
"processingTimeMs": "[ignored]",
|
||||||
|
"hitsPerPage": 1,
|
||||||
|
"page": 1,
|
||||||
|
"totalPages": 1,
|
||||||
|
"totalHits": 1
|
||||||
|
}
|
||||||
|
"#);
|
||||||
|
}
|
||||||
|
@ -210,6 +210,7 @@ impl Search<'_> {
|
|||||||
scoring_strategy: ScoringStrategy::Detailed,
|
scoring_strategy: ScoringStrategy::Detailed,
|
||||||
words_limit: self.words_limit,
|
words_limit: self.words_limit,
|
||||||
exhaustive_number_hits: self.exhaustive_number_hits,
|
exhaustive_number_hits: self.exhaustive_number_hits,
|
||||||
|
max_total_hits: self.max_total_hits,
|
||||||
rtxn: self.rtxn,
|
rtxn: self.rtxn,
|
||||||
index: self.index,
|
index: self.index,
|
||||||
semantic: self.semantic.clone(),
|
semantic: self.semantic.clone(),
|
||||||
|
@ -52,6 +52,7 @@ pub struct Search<'a> {
|
|||||||
scoring_strategy: ScoringStrategy,
|
scoring_strategy: ScoringStrategy,
|
||||||
words_limit: usize,
|
words_limit: usize,
|
||||||
exhaustive_number_hits: bool,
|
exhaustive_number_hits: bool,
|
||||||
|
max_total_hits: Option<usize>,
|
||||||
rtxn: &'a heed::RoTxn<'a>,
|
rtxn: &'a heed::RoTxn<'a>,
|
||||||
index: &'a Index,
|
index: &'a Index,
|
||||||
semantic: Option<SemanticSearch>,
|
semantic: Option<SemanticSearch>,
|
||||||
@ -74,6 +75,7 @@ impl<'a> Search<'a> {
|
|||||||
terms_matching_strategy: TermsMatchingStrategy::default(),
|
terms_matching_strategy: TermsMatchingStrategy::default(),
|
||||||
scoring_strategy: Default::default(),
|
scoring_strategy: Default::default(),
|
||||||
exhaustive_number_hits: false,
|
exhaustive_number_hits: false,
|
||||||
|
max_total_hits: None,
|
||||||
words_limit: 10,
|
words_limit: 10,
|
||||||
rtxn,
|
rtxn,
|
||||||
index,
|
index,
|
||||||
@ -165,6 +167,11 @@ impl<'a> Search<'a> {
|
|||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn max_total_hits(&mut self, max_total_hits: Option<usize>) -> &mut Search<'a> {
|
||||||
|
self.max_total_hits = max_total_hits;
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
pub fn time_budget(&mut self, time_budget: TimeBudget) -> &mut Search<'a> {
|
pub fn time_budget(&mut self, time_budget: TimeBudget) -> &mut Search<'a> {
|
||||||
self.time_budget = time_budget;
|
self.time_budget = time_budget;
|
||||||
self
|
self
|
||||||
@ -243,6 +250,8 @@ impl<'a> Search<'a> {
|
|||||||
&mut ctx,
|
&mut ctx,
|
||||||
vector,
|
vector,
|
||||||
self.scoring_strategy,
|
self.scoring_strategy,
|
||||||
|
self.exhaustive_number_hits,
|
||||||
|
self.max_total_hits,
|
||||||
universe,
|
universe,
|
||||||
&self.sort_criteria,
|
&self.sort_criteria,
|
||||||
&self.distinct,
|
&self.distinct,
|
||||||
@ -261,6 +270,7 @@ impl<'a> Search<'a> {
|
|||||||
self.terms_matching_strategy,
|
self.terms_matching_strategy,
|
||||||
self.scoring_strategy,
|
self.scoring_strategy,
|
||||||
self.exhaustive_number_hits,
|
self.exhaustive_number_hits,
|
||||||
|
self.max_total_hits,
|
||||||
universe,
|
universe,
|
||||||
&self.sort_criteria,
|
&self.sort_criteria,
|
||||||
&self.distinct,
|
&self.distinct,
|
||||||
@ -314,6 +324,7 @@ impl fmt::Debug for Search<'_> {
|
|||||||
scoring_strategy,
|
scoring_strategy,
|
||||||
words_limit,
|
words_limit,
|
||||||
exhaustive_number_hits,
|
exhaustive_number_hits,
|
||||||
|
max_total_hits,
|
||||||
rtxn: _,
|
rtxn: _,
|
||||||
index: _,
|
index: _,
|
||||||
semantic,
|
semantic,
|
||||||
@ -333,6 +344,7 @@ impl fmt::Debug for Search<'_> {
|
|||||||
.field("terms_matching_strategy", terms_matching_strategy)
|
.field("terms_matching_strategy", terms_matching_strategy)
|
||||||
.field("scoring_strategy", scoring_strategy)
|
.field("scoring_strategy", scoring_strategy)
|
||||||
.field("exhaustive_number_hits", exhaustive_number_hits)
|
.field("exhaustive_number_hits", exhaustive_number_hits)
|
||||||
|
.field("max_total_hits", max_total_hits)
|
||||||
.field("words_limit", words_limit)
|
.field("words_limit", words_limit)
|
||||||
.field(
|
.field(
|
||||||
"semantic.embedder_name",
|
"semantic.embedder_name",
|
||||||
|
@ -32,6 +32,8 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
|
|||||||
logger: &mut dyn SearchLogger<Q>,
|
logger: &mut dyn SearchLogger<Q>,
|
||||||
time_budget: TimeBudget,
|
time_budget: TimeBudget,
|
||||||
ranking_score_threshold: Option<f64>,
|
ranking_score_threshold: Option<f64>,
|
||||||
|
exhaustive_number_hits: bool,
|
||||||
|
max_total_hits: Option<usize>,
|
||||||
) -> Result<BucketSortOutput> {
|
) -> Result<BucketSortOutput> {
|
||||||
logger.initial_query(query);
|
logger.initial_query(query);
|
||||||
logger.ranking_rules(&ranking_rules);
|
logger.ranking_rules(&ranking_rules);
|
||||||
@ -159,7 +161,13 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
while valid_docids.len() < length {
|
let max_len_to_evaluate =
|
||||||
|
match (max_total_hits, exhaustive_number_hits && ranking_score_threshold.is_some()) {
|
||||||
|
(Some(max_total_hits), true) => max_total_hits,
|
||||||
|
_ => length,
|
||||||
|
};
|
||||||
|
|
||||||
|
while valid_docids.len() < max_len_to_evaluate {
|
||||||
if time_budget.exceeded() {
|
if time_budget.exceeded() {
|
||||||
loop {
|
loop {
|
||||||
let bucket = std::mem::take(&mut ranking_rule_universes[cur_ranking_rule_index]);
|
let bucket = std::mem::take(&mut ranking_rule_universes[cur_ranking_rule_index]);
|
||||||
|
@ -510,6 +510,7 @@ mod tests {
|
|||||||
crate::TermsMatchingStrategy::default(),
|
crate::TermsMatchingStrategy::default(),
|
||||||
crate::score_details::ScoringStrategy::Skip,
|
crate::score_details::ScoringStrategy::Skip,
|
||||||
false,
|
false,
|
||||||
|
None,
|
||||||
universe,
|
universe,
|
||||||
&None,
|
&None,
|
||||||
&None,
|
&None,
|
||||||
|
@ -626,6 +626,8 @@ pub fn execute_vector_search(
|
|||||||
ctx: &mut SearchContext<'_>,
|
ctx: &mut SearchContext<'_>,
|
||||||
vector: &[f32],
|
vector: &[f32],
|
||||||
scoring_strategy: ScoringStrategy,
|
scoring_strategy: ScoringStrategy,
|
||||||
|
exhaustive_number_hits: bool,
|
||||||
|
max_total_hits: Option<usize>,
|
||||||
universe: RoaringBitmap,
|
universe: RoaringBitmap,
|
||||||
sort_criteria: &Option<Vec<AscDesc>>,
|
sort_criteria: &Option<Vec<AscDesc>>,
|
||||||
distinct: &Option<String>,
|
distinct: &Option<String>,
|
||||||
@ -669,6 +671,8 @@ pub fn execute_vector_search(
|
|||||||
placeholder_search_logger,
|
placeholder_search_logger,
|
||||||
time_budget,
|
time_budget,
|
||||||
ranking_score_threshold,
|
ranking_score_threshold,
|
||||||
|
exhaustive_number_hits,
|
||||||
|
max_total_hits,
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
Ok(PartialSearchResult {
|
Ok(PartialSearchResult {
|
||||||
@ -689,6 +693,7 @@ pub fn execute_search(
|
|||||||
terms_matching_strategy: TermsMatchingStrategy,
|
terms_matching_strategy: TermsMatchingStrategy,
|
||||||
scoring_strategy: ScoringStrategy,
|
scoring_strategy: ScoringStrategy,
|
||||||
exhaustive_number_hits: bool,
|
exhaustive_number_hits: bool,
|
||||||
|
max_total_hits: Option<usize>,
|
||||||
mut universe: RoaringBitmap,
|
mut universe: RoaringBitmap,
|
||||||
sort_criteria: &Option<Vec<AscDesc>>,
|
sort_criteria: &Option<Vec<AscDesc>>,
|
||||||
distinct: &Option<String>,
|
distinct: &Option<String>,
|
||||||
@ -825,6 +830,8 @@ pub fn execute_search(
|
|||||||
query_graph_logger,
|
query_graph_logger,
|
||||||
time_budget,
|
time_budget,
|
||||||
ranking_score_threshold,
|
ranking_score_threshold,
|
||||||
|
exhaustive_number_hits,
|
||||||
|
max_total_hits,
|
||||||
)?
|
)?
|
||||||
} else {
|
} else {
|
||||||
let ranking_rules =
|
let ranking_rules =
|
||||||
@ -841,6 +848,8 @@ pub fn execute_search(
|
|||||||
placeholder_search_logger,
|
placeholder_search_logger,
|
||||||
time_budget,
|
time_budget,
|
||||||
ranking_score_threshold,
|
ranking_score_threshold,
|
||||||
|
exhaustive_number_hits,
|
||||||
|
max_total_hits,
|
||||||
)?
|
)?
|
||||||
};
|
};
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user