implements a first version of the cutoff without settings

This commit is contained in:
Tamo
2024-03-05 11:21:46 +01:00
parent 10d053cd2f
commit 4a467739cd
11 changed files with 210 additions and 52 deletions

View File

@ -11,7 +11,7 @@ use crate::score_details::{ScoreDetails, ScoringStrategy};
use crate::vector::DistributionShift;
use crate::{
execute_search, filtered_universe, AscDesc, DefaultSearchLogger, DocumentId, Index, Result,
SearchContext,
SearchContext, TimeBudget,
};
// Building these factories is not free.
@ -43,6 +43,8 @@ pub struct Search<'a> {
index: &'a Index,
distribution_shift: Option<DistributionShift>,
embedder_name: Option<String>,
time_budget: TimeBudget,
}
impl<'a> Search<'a> {
@ -64,6 +66,7 @@ impl<'a> Search<'a> {
index,
distribution_shift: None,
embedder_name: None,
time_budget: TimeBudget::max(),
}
}
@ -143,6 +146,11 @@ impl<'a> Search<'a> {
self
}
pub fn time_budget(&mut self, time_budget: TimeBudget) -> &mut Search<'a> {
self.time_budget = time_budget;
self
}
pub fn execute_for_candidates(&self, has_vector_search: bool) -> Result<RoaringBitmap> {
if has_vector_search {
let ctx = SearchContext::new(self.index, self.rtxn);
@ -169,36 +177,43 @@ impl<'a> Search<'a> {
}
let universe = filtered_universe(&ctx, &self.filter)?;
let PartialSearchResult { located_query_terms, candidates, documents_ids, document_scores } =
match self.vector.as_ref() {
Some(vector) => execute_vector_search(
&mut ctx,
vector,
self.scoring_strategy,
universe,
&self.sort_criteria,
self.geo_strategy,
self.offset,
self.limit,
self.distribution_shift,
embedder_name,
)?,
None => execute_search(
&mut ctx,
self.query.as_deref(),
self.terms_matching_strategy,
self.scoring_strategy,
self.exhaustive_number_hits,
universe,
&self.sort_criteria,
self.geo_strategy,
self.offset,
self.limit,
Some(self.words_limit),
&mut DefaultSearchLogger,
&mut DefaultSearchLogger,
)?,
};
let PartialSearchResult {
located_query_terms,
candidates,
documents_ids,
document_scores,
degraded,
} = match self.vector.as_ref() {
Some(vector) => execute_vector_search(
&mut ctx,
vector,
self.scoring_strategy,
universe,
&self.sort_criteria,
self.geo_strategy,
self.offset,
self.limit,
self.distribution_shift,
embedder_name,
self.time_budget,
)?,
None => execute_search(
&mut ctx,
self.query.as_deref(),
self.terms_matching_strategy,
self.scoring_strategy,
self.exhaustive_number_hits,
universe,
&self.sort_criteria,
self.geo_strategy,
self.offset,
self.limit,
Some(self.words_limit),
&mut DefaultSearchLogger,
&mut DefaultSearchLogger,
self.time_budget,
)?,
};
// consume context and located_query_terms to build MatchingWords.
let matching_words = match located_query_terms {
@ -206,7 +221,7 @@ impl<'a> Search<'a> {
None => MatchingWords::default(),
};
Ok(SearchResult { matching_words, candidates, document_scores, documents_ids })
Ok(SearchResult { matching_words, candidates, document_scores, documents_ids, degraded })
}
}
@ -229,6 +244,7 @@ impl fmt::Debug for Search<'_> {
index: _,
distribution_shift,
embedder_name,
time_budget,
} = self;
f.debug_struct("Search")
.field("query", query)
@ -244,6 +260,7 @@ impl fmt::Debug for Search<'_> {
.field("words_limit", words_limit)
.field("distribution_shift", distribution_shift)
.field("embedder_name", embedder_name)
.field("time_bduget", time_budget)
.finish()
}
}
@ -254,6 +271,7 @@ pub struct SearchResult {
pub candidates: RoaringBitmap,
pub documents_ids: Vec<DocumentId>,
pub document_scores: Vec<Vec<ScoreDetails>>,
pub degraded: bool,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]