Change bucket_sort logic to pass the time budget and allow for retrieving non-blocking buckets

This commit is contained in:
Louis Dureuil
2025-10-15 15:33:45 +02:00
parent 58f30e9d8a
commit b418054ee4

View File

@@ -97,7 +97,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
logger.start_iteration_ranking_rule(0, ranking_rules[0].as_ref(), query, universe); logger.start_iteration_ranking_rule(0, ranking_rules[0].as_ref(), query, universe);
ranking_rules[0].start_iteration(ctx, logger, universe, query)?; ranking_rules[0].start_iteration(ctx, logger, universe, query, &time_budget)?;
let mut ranking_rule_scores: Vec<ScoreDetails> = vec![]; let mut ranking_rule_scores: Vec<ScoreDetails> = vec![];
@@ -168,15 +168,36 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
}; };
while valid_docids.len() < max_len_to_evaluate { while valid_docids.len() < max_len_to_evaluate {
if time_budget.exceeded() { // The universe for this bucket is zero, so we don't need to sort
loop { // anything, just go back to the parent ranking rule.
if ranking_rule_universes[cur_ranking_rule_index].is_empty()
|| (scoring_strategy == ScoringStrategy::Skip
&& ranking_rule_universes[cur_ranking_rule_index].len() == 1)
{
let bucket = std::mem::take(&mut ranking_rule_universes[cur_ranking_rule_index]); let bucket = std::mem::take(&mut ranking_rule_universes[cur_ranking_rule_index]);
maybe_add_to_results!(bucket);
back!();
continue;
}
let next_bucket = if time_budget.exceeded() {
match ranking_rules[cur_ranking_rule_index].non_blocking_next_bucket(
ctx,
logger,
&ranking_rule_universes[cur_ranking_rule_index],
)? {
std::task::Poll::Ready(bucket) => bucket,
std::task::Poll::Pending => {
loop {
let bucket =
std::mem::take(&mut ranking_rule_universes[cur_ranking_rule_index]);
ranking_rule_scores.push(ScoreDetails::Skipped); ranking_rule_scores.push(ScoreDetails::Skipped);
// remove candidates from the universe without adding them to result if their score is below the threshold // remove candidates from the universe without adding them to result if their score is below the threshold
let is_below_threshold = let is_below_threshold =
ranking_score_threshold.is_some_and(|ranking_score_threshold| { ranking_score_threshold.is_some_and(|ranking_score_threshold| {
let current_score = ScoreDetails::global_score(ranking_rule_scores.iter()); let current_score =
ScoreDetails::global_score(ranking_rule_scores.iter());
current_score < ranking_score_threshold current_score < ranking_score_threshold
}); });
@@ -203,28 +224,20 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
degraded: true, degraded: true,
}); });
} }
// The universe for this bucket is zero, so we don't need to sort
// anything, just go back to the parent ranking rule.
if ranking_rule_universes[cur_ranking_rule_index].is_empty()
|| (scoring_strategy == ScoringStrategy::Skip
&& ranking_rule_universes[cur_ranking_rule_index].len() == 1)
{
let bucket = std::mem::take(&mut ranking_rule_universes[cur_ranking_rule_index]);
maybe_add_to_results!(bucket);
back!();
continue;
} }
} else {
let Some(next_bucket) = ranking_rules[cur_ranking_rule_index].next_bucket( let Some(next_bucket) = ranking_rules[cur_ranking_rule_index].next_bucket(
ctx, ctx,
logger, logger,
&ranking_rule_universes[cur_ranking_rule_index], &ranking_rule_universes[cur_ranking_rule_index],
&time_budget,
)? )?
else { else {
back!(); back!();
continue; continue;
}; };
next_bucket
};
ranking_rule_scores.push(next_bucket.score); ranking_rule_scores.push(next_bucket.score);
@@ -275,6 +288,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
logger, logger,
&next_bucket.candidates, &next_bucket.candidates,
&next_bucket.query, &next_bucket.query,
&time_budget,
)?; )?;
} }