mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-11-30 17:55:36 +00:00
Compare commits
5 Commits
prototype-
...
set-search
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
68362cf5dd | ||
|
|
70a860a0f0 | ||
|
|
600178c5ab | ||
|
|
dedae94102 | ||
|
|
7ae9a4afee |
@@ -1050,7 +1050,8 @@ pub fn prepare_search<'t>(
|
||||
.map(|x| x as usize)
|
||||
.unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS);
|
||||
|
||||
search.exhaustive_number_hits(is_finite_pagination);
|
||||
search.is_exhaustive_pagination(is_finite_pagination);
|
||||
search.max_total_hits(Some(max_total_hits));
|
||||
search.scoring_strategy(
|
||||
if query.show_ranking_score
|
||||
|| query.show_ranking_score_details
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
use super::shared_index_with_documents;
|
||||
use crate::common::Server;
|
||||
use crate::json;
|
||||
use meili_snap::{json_string, snapshot};
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn default_search_should_return_estimated_total_hit() {
|
||||
@@ -133,3 +134,61 @@ async fn ensure_placeholder_search_hit_count_valid() {
|
||||
.await;
|
||||
}
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn test_issue_5274() {
|
||||
let server = Server::new_shared();
|
||||
let index = server.unique_index();
|
||||
|
||||
let documents = json!([
|
||||
{
|
||||
"id": 1,
|
||||
"title": "Document 1",
|
||||
"content": "This is the first."
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"title": "Document 2",
|
||||
"content": "This is the second doc."
|
||||
}
|
||||
]);
|
||||
let (task, _code) = index.add_documents(documents, None).await;
|
||||
server.wait_task(task.uid()).await.succeeded();
|
||||
|
||||
// Find out the lowest ranking score among the documents
|
||||
let (rep, _status) = index
|
||||
.search_post(json!({"q": "doc", "page": 1, "hitsPerPage": 2, "showRankingScore": true}))
|
||||
.await;
|
||||
let hits = rep["hits"].as_array().expect("Missing hits array");
|
||||
let second_hit = hits.get(1).expect("Missing second hit");
|
||||
let ranking_score = second_hit
|
||||
.get("_rankingScore")
|
||||
.expect("Missing _rankingScore field")
|
||||
.as_f64()
|
||||
.expect("Expected _rankingScore to be a f64");
|
||||
|
||||
// Search with a ranking score threshold just above and expect to be a single hit
|
||||
let (rep, _status) = index
|
||||
.search_post(json!({"q": "doc", "page": 1, "hitsPerPage": 1, "rankingScoreThreshold": ranking_score + 0.0001}))
|
||||
.await;
|
||||
|
||||
snapshot!(json_string!(rep, {
|
||||
".processingTimeMs" => "[ignored]",
|
||||
}), @r#"
|
||||
{
|
||||
"hits": [
|
||||
{
|
||||
"id": 2,
|
||||
"title": "Document 2",
|
||||
"content": "This is the second doc."
|
||||
}
|
||||
],
|
||||
"query": "doc",
|
||||
"processingTimeMs": "[ignored]",
|
||||
"hitsPerPage": 1,
|
||||
"page": 1,
|
||||
"totalPages": 1,
|
||||
"totalHits": 1
|
||||
}
|
||||
"#);
|
||||
}
|
||||
|
||||
@@ -209,7 +209,8 @@ impl Search<'_> {
|
||||
terms_matching_strategy: self.terms_matching_strategy,
|
||||
scoring_strategy: ScoringStrategy::Detailed,
|
||||
words_limit: self.words_limit,
|
||||
exhaustive_number_hits: self.exhaustive_number_hits,
|
||||
is_exhaustive_pagination: self.is_exhaustive_pagination,
|
||||
max_total_hits: self.max_total_hits,
|
||||
rtxn: self.rtxn,
|
||||
index: self.index,
|
||||
semantic: self.semantic.clone(),
|
||||
|
||||
@@ -51,7 +51,8 @@ pub struct Search<'a> {
|
||||
terms_matching_strategy: TermsMatchingStrategy,
|
||||
scoring_strategy: ScoringStrategy,
|
||||
words_limit: usize,
|
||||
exhaustive_number_hits: bool,
|
||||
is_exhaustive_pagination: bool,
|
||||
max_total_hits: Option<usize>,
|
||||
rtxn: &'a heed::RoTxn<'a>,
|
||||
index: &'a Index,
|
||||
semantic: Option<SemanticSearch>,
|
||||
@@ -73,7 +74,8 @@ impl<'a> Search<'a> {
|
||||
geo_param: new::GeoSortParameter::default(),
|
||||
terms_matching_strategy: TermsMatchingStrategy::default(),
|
||||
scoring_strategy: Default::default(),
|
||||
exhaustive_number_hits: false,
|
||||
is_exhaustive_pagination: false,
|
||||
max_total_hits: None,
|
||||
words_limit: 10,
|
||||
rtxn,
|
||||
index,
|
||||
@@ -160,8 +162,13 @@ impl<'a> Search<'a> {
|
||||
|
||||
/// Forces the search to exhaustively compute the number of candidates,
|
||||
/// this will increase the search time but allows finite pagination.
|
||||
pub fn exhaustive_number_hits(&mut self, exhaustive_number_hits: bool) -> &mut Search<'a> {
|
||||
self.exhaustive_number_hits = exhaustive_number_hits;
|
||||
pub fn is_exhaustive_pagination(&mut self, is_exhaustive_pagination: bool) -> &mut Search<'a> {
|
||||
self.is_exhaustive_pagination = is_exhaustive_pagination;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn max_total_hits(&mut self, max_total_hits: Option<usize>) -> &mut Search<'a> {
|
||||
self.max_total_hits = max_total_hits;
|
||||
self
|
||||
}
|
||||
|
||||
@@ -224,6 +231,13 @@ impl<'a> Search<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
let mut search_k_div_trees = None;
|
||||
if self.is_exhaustive_pagination {
|
||||
if let Some(max_total_hits) = self.max_total_hits {
|
||||
search_k_div_trees = Some(max_total_hits);
|
||||
}
|
||||
}
|
||||
|
||||
let universe = filtered_universe(ctx.index, ctx.txn, &self.filter)?;
|
||||
let PartialSearchResult {
|
||||
located_query_terms,
|
||||
@@ -243,6 +257,8 @@ impl<'a> Search<'a> {
|
||||
&mut ctx,
|
||||
vector,
|
||||
self.scoring_strategy,
|
||||
self.is_exhaustive_pagination,
|
||||
self.max_total_hits,
|
||||
universe,
|
||||
&self.sort_criteria,
|
||||
&self.distinct,
|
||||
@@ -252,6 +268,7 @@ impl<'a> Search<'a> {
|
||||
embedder_name,
|
||||
embedder,
|
||||
*quantized,
|
||||
search_k_div_trees,
|
||||
self.time_budget.clone(),
|
||||
self.ranking_score_threshold,
|
||||
)?,
|
||||
@@ -260,7 +277,8 @@ impl<'a> Search<'a> {
|
||||
self.query.as_deref(),
|
||||
self.terms_matching_strategy,
|
||||
self.scoring_strategy,
|
||||
self.exhaustive_number_hits,
|
||||
self.is_exhaustive_pagination,
|
||||
self.max_total_hits,
|
||||
universe,
|
||||
&self.sort_criteria,
|
||||
&self.distinct,
|
||||
@@ -313,7 +331,8 @@ impl fmt::Debug for Search<'_> {
|
||||
terms_matching_strategy,
|
||||
scoring_strategy,
|
||||
words_limit,
|
||||
exhaustive_number_hits,
|
||||
is_exhaustive_pagination,
|
||||
max_total_hits,
|
||||
rtxn: _,
|
||||
index: _,
|
||||
semantic,
|
||||
@@ -332,7 +351,8 @@ impl fmt::Debug for Search<'_> {
|
||||
.field("searchable_attributes", searchable_attributes)
|
||||
.field("terms_matching_strategy", terms_matching_strategy)
|
||||
.field("scoring_strategy", scoring_strategy)
|
||||
.field("exhaustive_number_hits", exhaustive_number_hits)
|
||||
.field("is_exhaustive_pagination", is_exhaustive_pagination)
|
||||
.field("max_total_hits", max_total_hits)
|
||||
.field("words_limit", words_limit)
|
||||
.field(
|
||||
"semantic.embedder_name",
|
||||
|
||||
@@ -32,6 +32,8 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
|
||||
logger: &mut dyn SearchLogger<Q>,
|
||||
time_budget: TimeBudget,
|
||||
ranking_score_threshold: Option<f64>,
|
||||
exhaustive_number_hits: bool,
|
||||
max_total_hits: Option<usize>,
|
||||
) -> Result<BucketSortOutput> {
|
||||
logger.initial_query(query);
|
||||
logger.ranking_rules(&ranking_rules);
|
||||
@@ -159,7 +161,12 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
|
||||
};
|
||||
}
|
||||
|
||||
while valid_docids.len() < length {
|
||||
let max_total_hits = max_total_hits.unwrap_or(usize::MAX);
|
||||
while valid_docids.len() < length
|
||||
|| (exhaustive_number_hits
|
||||
&& ranking_score_threshold.is_some()
|
||||
&& valid_docids.len() < max_total_hits)
|
||||
{
|
||||
if time_budget.exceeded() {
|
||||
loop {
|
||||
let bucket = std::mem::take(&mut ranking_rule_universes[cur_ranking_rule_index]);
|
||||
|
||||
@@ -510,6 +510,7 @@ mod tests {
|
||||
crate::TermsMatchingStrategy::default(),
|
||||
crate::score_details::ScoringStrategy::Skip,
|
||||
false,
|
||||
None,
|
||||
universe,
|
||||
&None,
|
||||
&None,
|
||||
|
||||
@@ -377,6 +377,7 @@ fn get_ranking_rules_for_vector<'ctx>(
|
||||
embedder_name: &str,
|
||||
embedder: &Embedder,
|
||||
quantized: bool,
|
||||
search_k_div_trees: Option<usize>,
|
||||
) -> Result<Vec<BoxRankingRule<'ctx, PlaceholderQuery>>> {
|
||||
// query graph search
|
||||
|
||||
@@ -405,6 +406,7 @@ fn get_ranking_rules_for_vector<'ctx>(
|
||||
embedder_name,
|
||||
embedder,
|
||||
quantized,
|
||||
search_k_div_trees,
|
||||
)?;
|
||||
ranking_rules.push(Box::new(vector_sort));
|
||||
vector = true;
|
||||
@@ -626,6 +628,8 @@ pub fn execute_vector_search(
|
||||
ctx: &mut SearchContext<'_>,
|
||||
vector: &[f32],
|
||||
scoring_strategy: ScoringStrategy,
|
||||
exhaustive_number_hits: bool,
|
||||
max_total_hits: Option<usize>,
|
||||
universe: RoaringBitmap,
|
||||
sort_criteria: &Option<Vec<AscDesc>>,
|
||||
distinct: &Option<String>,
|
||||
@@ -635,6 +639,7 @@ pub fn execute_vector_search(
|
||||
embedder_name: &str,
|
||||
embedder: &Embedder,
|
||||
quantized: bool,
|
||||
search_k_div_trees: Option<usize>,
|
||||
time_budget: TimeBudget,
|
||||
ranking_score_threshold: Option<f64>,
|
||||
) -> Result<PartialSearchResult> {
|
||||
@@ -651,6 +656,7 @@ pub fn execute_vector_search(
|
||||
embedder_name,
|
||||
embedder,
|
||||
quantized,
|
||||
search_k_div_trees,
|
||||
)?;
|
||||
|
||||
let mut placeholder_search_logger = logger::DefaultSearchLogger;
|
||||
@@ -669,6 +675,8 @@ pub fn execute_vector_search(
|
||||
placeholder_search_logger,
|
||||
time_budget,
|
||||
ranking_score_threshold,
|
||||
exhaustive_number_hits,
|
||||
max_total_hits,
|
||||
)?;
|
||||
|
||||
Ok(PartialSearchResult {
|
||||
@@ -689,6 +697,7 @@ pub fn execute_search(
|
||||
terms_matching_strategy: TermsMatchingStrategy,
|
||||
scoring_strategy: ScoringStrategy,
|
||||
exhaustive_number_hits: bool,
|
||||
max_total_hits: Option<usize>,
|
||||
mut universe: RoaringBitmap,
|
||||
sort_criteria: &Option<Vec<AscDesc>>,
|
||||
distinct: &Option<String>,
|
||||
@@ -825,6 +834,8 @@ pub fn execute_search(
|
||||
query_graph_logger,
|
||||
time_budget,
|
||||
ranking_score_threshold,
|
||||
exhaustive_number_hits,
|
||||
max_total_hits,
|
||||
)?
|
||||
} else {
|
||||
let ranking_rules =
|
||||
@@ -841,6 +852,8 @@ pub fn execute_search(
|
||||
placeholder_search_logger,
|
||||
time_budget,
|
||||
ranking_score_threshold,
|
||||
exhaustive_number_hits,
|
||||
max_total_hits,
|
||||
)?
|
||||
};
|
||||
|
||||
|
||||
@@ -572,7 +572,7 @@ fn test_distinct_all_candidates() {
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::Last);
|
||||
s.sort_criteria(vec![AscDesc::Desc(Member::Field(S("rank1")))]);
|
||||
s.exhaustive_number_hits(true);
|
||||
s.is_exhaustive_pagination(true);
|
||||
|
||||
let SearchResult { documents_ids, candidates, .. } = s.execute().unwrap();
|
||||
let candidates = candidates.iter().collect::<Vec<_>>();
|
||||
|
||||
@@ -18,9 +18,11 @@ pub struct VectorSort<Q: RankingRuleQueryTrait> {
|
||||
distribution_shift: Option<DistributionShift>,
|
||||
embedder_index: u8,
|
||||
quantized: bool,
|
||||
search_k_div_trees: Option<usize>,
|
||||
}
|
||||
|
||||
impl<Q: RankingRuleQueryTrait> VectorSort<Q> {
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub fn new(
|
||||
ctx: &SearchContext<'_>,
|
||||
target: Vec<f32>,
|
||||
@@ -29,6 +31,7 @@ impl<Q: RankingRuleQueryTrait> VectorSort<Q> {
|
||||
embedder_name: &str,
|
||||
embedder: &Embedder,
|
||||
quantized: bool,
|
||||
search_k_div_trees: Option<usize>,
|
||||
) -> Result<Self> {
|
||||
let embedder_index = ctx
|
||||
.index
|
||||
@@ -42,6 +45,7 @@ impl<Q: RankingRuleQueryTrait> VectorSort<Q> {
|
||||
vector_candidates,
|
||||
cached_sorted_docids: Default::default(),
|
||||
limit,
|
||||
search_k_div_trees,
|
||||
distribution_shift: embedder.distribution(),
|
||||
embedder_index,
|
||||
quantized,
|
||||
@@ -57,7 +61,13 @@ impl<Q: RankingRuleQueryTrait> VectorSort<Q> {
|
||||
|
||||
let before = Instant::now();
|
||||
let reader = ArroyWrapper::new(ctx.index.vector_arroy, self.embedder_index, self.quantized);
|
||||
let results = reader.nns_by_vector(ctx.txn, target, self.limit, Some(vector_candidates))?;
|
||||
let results = reader.nns_by_vector(
|
||||
ctx.txn,
|
||||
target,
|
||||
self.limit,
|
||||
self.search_k_div_trees,
|
||||
Some(vector_candidates),
|
||||
)?;
|
||||
self.cached_sorted_docids = results.into_iter();
|
||||
*ctx.vector_store_stats.get_or_insert_default() += VectorStoreStats {
|
||||
total_time: before.elapsed(),
|
||||
|
||||
@@ -483,12 +483,20 @@ impl ArroyWrapper {
|
||||
rtxn: &RoTxn,
|
||||
vector: &[f32],
|
||||
limit: usize,
|
||||
search_k_div_trees: Option<usize>,
|
||||
filter: Option<&RoaringBitmap>,
|
||||
) -> Result<Vec<(ItemId, f32)>, arroy::Error> {
|
||||
if self.quantized {
|
||||
self._nns_by_vector(rtxn, self.quantized_db(), vector, limit, filter)
|
||||
self._nns_by_vector(
|
||||
rtxn,
|
||||
self.quantized_db(),
|
||||
vector,
|
||||
limit,
|
||||
search_k_div_trees,
|
||||
filter,
|
||||
)
|
||||
} else {
|
||||
self._nns_by_vector(rtxn, self.angular_db(), vector, limit, filter)
|
||||
self._nns_by_vector(rtxn, self.angular_db(), vector, limit, search_k_div_trees, filter)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -498,6 +506,7 @@ impl ArroyWrapper {
|
||||
db: arroy::Database<D>,
|
||||
vector: &[f32],
|
||||
limit: usize,
|
||||
search_k_div_trees: Option<usize>,
|
||||
filter: Option<&RoaringBitmap>,
|
||||
) -> Result<Vec<(ItemId, f32)>, arroy::Error> {
|
||||
let mut results = Vec::new();
|
||||
@@ -509,6 +518,12 @@ impl ArroyWrapper {
|
||||
if reader.item_ids().is_disjoint(filter) {
|
||||
continue;
|
||||
}
|
||||
if let Some(mut search_k) = search_k_div_trees {
|
||||
search_k *= reader.n_trees();
|
||||
if let Ok(search_k) = search_k.try_into() {
|
||||
searcher.search_k(search_k);
|
||||
}
|
||||
}
|
||||
searcher.candidates(filter);
|
||||
}
|
||||
|
||||
|
||||
@@ -29,7 +29,7 @@ macro_rules! test_distinct {
|
||||
search.query(search::TEST_QUERY);
|
||||
search.limit($limit);
|
||||
search.offset($offset);
|
||||
search.exhaustive_number_hits($exhaustive);
|
||||
search.is_exhaustive_pagination($exhaustive);
|
||||
|
||||
search.terms_matching_strategy(TermsMatchingStrategy::default());
|
||||
|
||||
|
||||
Reference in New Issue
Block a user