diff --git a/crates/meilisearch/src/search/mod.rs b/crates/meilisearch/src/search/mod.rs index 31af28902..568ce1e36 100644 --- a/crates/meilisearch/src/search/mod.rs +++ b/crates/meilisearch/src/search/mod.rs @@ -1050,7 +1050,7 @@ pub fn prepare_search<'t>( .map(|x| x as usize) .unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS); - search.exhaustive_number_hits(is_finite_pagination); + search.is_exhaustive_pagination(is_finite_pagination); search.max_total_hits(Some(max_total_hits)); search.scoring_strategy( if query.show_ranking_score diff --git a/crates/milli/src/search/hybrid.rs b/crates/milli/src/search/hybrid.rs index a29b6c4c7..8512dc1ad 100644 --- a/crates/milli/src/search/hybrid.rs +++ b/crates/milli/src/search/hybrid.rs @@ -209,7 +209,7 @@ impl Search<'_> { terms_matching_strategy: self.terms_matching_strategy, scoring_strategy: ScoringStrategy::Detailed, words_limit: self.words_limit, - exhaustive_number_hits: self.exhaustive_number_hits, + is_exhaustive_pagination: self.is_exhaustive_pagination, max_total_hits: self.max_total_hits, rtxn: self.rtxn, index: self.index, diff --git a/crates/milli/src/search/mod.rs b/crates/milli/src/search/mod.rs index c00563af7..a753343e3 100644 --- a/crates/milli/src/search/mod.rs +++ b/crates/milli/src/search/mod.rs @@ -51,7 +51,7 @@ pub struct Search<'a> { terms_matching_strategy: TermsMatchingStrategy, scoring_strategy: ScoringStrategy, words_limit: usize, - exhaustive_number_hits: bool, + is_exhaustive_pagination: bool, max_total_hits: Option, rtxn: &'a heed::RoTxn<'a>, index: &'a Index, @@ -74,7 +74,7 @@ impl<'a> Search<'a> { geo_param: new::GeoSortParameter::default(), terms_matching_strategy: TermsMatchingStrategy::default(), scoring_strategy: Default::default(), - exhaustive_number_hits: false, + is_exhaustive_pagination: false, max_total_hits: None, words_limit: 10, rtxn, @@ -162,8 +162,8 @@ impl<'a> Search<'a> { /// Forces the search to exhaustively compute the number of candidates, /// this will increase the search time but allows finite pagination. - pub fn exhaustive_number_hits(&mut self, exhaustive_number_hits: bool) -> &mut Search<'a> { - self.exhaustive_number_hits = exhaustive_number_hits; + pub fn is_exhaustive_pagination(&mut self, is_exhaustive_pagination: bool) -> &mut Search<'a> { + self.is_exhaustive_pagination = is_exhaustive_pagination; self } @@ -231,6 +231,13 @@ impl<'a> Search<'a> { } } + let mut search_k_div_trees = None; + if self.is_exhaustive_pagination { + if let Some(max_total_hits) = self.max_total_hits { + search_k_div_trees = Some(max_total_hits); + } + } + let universe = filtered_universe(ctx.index, ctx.txn, &self.filter)?; let PartialSearchResult { located_query_terms, @@ -250,7 +257,7 @@ impl<'a> Search<'a> { &mut ctx, vector, self.scoring_strategy, - self.exhaustive_number_hits, + self.is_exhaustive_pagination, self.max_total_hits, universe, &self.sort_criteria, @@ -261,6 +268,7 @@ impl<'a> Search<'a> { embedder_name, embedder, *quantized, + search_k_div_trees, self.time_budget.clone(), self.ranking_score_threshold, )?, @@ -269,7 +277,7 @@ impl<'a> Search<'a> { self.query.as_deref(), self.terms_matching_strategy, self.scoring_strategy, - self.exhaustive_number_hits, + self.is_exhaustive_pagination, self.max_total_hits, universe, &self.sort_criteria, @@ -323,7 +331,7 @@ impl fmt::Debug for Search<'_> { terms_matching_strategy, scoring_strategy, words_limit, - exhaustive_number_hits, + is_exhaustive_pagination, max_total_hits, rtxn: _, index: _, @@ -343,7 +351,7 @@ impl fmt::Debug for Search<'_> { .field("searchable_attributes", searchable_attributes) .field("terms_matching_strategy", terms_matching_strategy) .field("scoring_strategy", scoring_strategy) - .field("exhaustive_number_hits", exhaustive_number_hits) + .field("is_exhaustive_pagination", is_exhaustive_pagination) .field("max_total_hits", max_total_hits) .field("words_limit", words_limit) .field( diff --git a/crates/milli/src/search/new/mod.rs b/crates/milli/src/search/new/mod.rs index 047d08202..691ffebd7 100644 --- a/crates/milli/src/search/new/mod.rs +++ b/crates/milli/src/search/new/mod.rs @@ -377,6 +377,7 @@ fn get_ranking_rules_for_vector<'ctx>( embedder_name: &str, embedder: &Embedder, quantized: bool, + search_k_div_trees: Option, ) -> Result>> { // query graph search @@ -405,6 +406,7 @@ fn get_ranking_rules_for_vector<'ctx>( embedder_name, embedder, quantized, + search_k_div_trees, )?; ranking_rules.push(Box::new(vector_sort)); vector = true; @@ -637,6 +639,7 @@ pub fn execute_vector_search( embedder_name: &str, embedder: &Embedder, quantized: bool, + search_k_div_trees: Option, time_budget: TimeBudget, ranking_score_threshold: Option, ) -> Result { @@ -653,6 +656,7 @@ pub fn execute_vector_search( embedder_name, embedder, quantized, + search_k_div_trees, )?; let mut placeholder_search_logger = logger::DefaultSearchLogger; diff --git a/crates/milli/src/search/new/tests/distinct.rs b/crates/milli/src/search/new/tests/distinct.rs index d3c453957..35de1ca16 100644 --- a/crates/milli/src/search/new/tests/distinct.rs +++ b/crates/milli/src/search/new/tests/distinct.rs @@ -572,7 +572,7 @@ fn test_distinct_all_candidates() { let mut s = Search::new(&txn, &index); s.terms_matching_strategy(TermsMatchingStrategy::Last); s.sort_criteria(vec![AscDesc::Desc(Member::Field(S("rank1")))]); - s.exhaustive_number_hits(true); + s.is_exhaustive_pagination(true); let SearchResult { documents_ids, candidates, .. } = s.execute().unwrap(); let candidates = candidates.iter().collect::>(); diff --git a/crates/milli/src/search/new/vector_sort.rs b/crates/milli/src/search/new/vector_sort.rs index 2c201e899..4e42710f3 100644 --- a/crates/milli/src/search/new/vector_sort.rs +++ b/crates/milli/src/search/new/vector_sort.rs @@ -18,9 +18,11 @@ pub struct VectorSort { distribution_shift: Option, embedder_index: u8, quantized: bool, + search_k_div_trees: Option, } impl VectorSort { + #[allow(clippy::too_many_arguments)] pub fn new( ctx: &SearchContext<'_>, target: Vec, @@ -29,6 +31,7 @@ impl VectorSort { embedder_name: &str, embedder: &Embedder, quantized: bool, + search_k_div_trees: Option, ) -> Result { let embedder_index = ctx .index @@ -42,6 +45,7 @@ impl VectorSort { vector_candidates, cached_sorted_docids: Default::default(), limit, + search_k_div_trees, distribution_shift: embedder.distribution(), embedder_index, quantized, @@ -57,7 +61,13 @@ impl VectorSort { let before = Instant::now(); let reader = ArroyWrapper::new(ctx.index.vector_arroy, self.embedder_index, self.quantized); - let results = reader.nns_by_vector(ctx.txn, target, self.limit, Some(vector_candidates))?; + let results = reader.nns_by_vector( + ctx.txn, + target, + self.limit, + self.search_k_div_trees, + Some(vector_candidates), + )?; self.cached_sorted_docids = results.into_iter(); *ctx.vector_store_stats.get_or_insert_default() += VectorStoreStats { total_time: before.elapsed(), diff --git a/crates/milli/src/vector/mod.rs b/crates/milli/src/vector/mod.rs index f64223e41..d45bdbf0d 100644 --- a/crates/milli/src/vector/mod.rs +++ b/crates/milli/src/vector/mod.rs @@ -483,12 +483,20 @@ impl ArroyWrapper { rtxn: &RoTxn, vector: &[f32], limit: usize, + search_k_div_trees: Option, filter: Option<&RoaringBitmap>, ) -> Result, arroy::Error> { if self.quantized { - self._nns_by_vector(rtxn, self.quantized_db(), vector, limit, filter) + self._nns_by_vector( + rtxn, + self.quantized_db(), + vector, + limit, + search_k_div_trees, + filter, + ) } else { - self._nns_by_vector(rtxn, self.angular_db(), vector, limit, filter) + self._nns_by_vector(rtxn, self.angular_db(), vector, limit, search_k_div_trees, filter) } } @@ -498,6 +506,7 @@ impl ArroyWrapper { db: arroy::Database, vector: &[f32], limit: usize, + search_k_div_trees: Option, filter: Option<&RoaringBitmap>, ) -> Result, arroy::Error> { let mut results = Vec::new(); @@ -509,6 +518,12 @@ impl ArroyWrapper { if reader.item_ids().is_disjoint(filter) { continue; } + if let Some(mut search_k) = search_k_div_trees { + search_k *= reader.n_trees(); + if let Ok(search_k) = search_k.try_into() { + searcher.search_k(search_k); + } + } searcher.candidates(filter); } diff --git a/crates/milli/tests/search/distinct.rs b/crates/milli/tests/search/distinct.rs index c7fa9befa..f6bcaf902 100644 --- a/crates/milli/tests/search/distinct.rs +++ b/crates/milli/tests/search/distinct.rs @@ -29,7 +29,7 @@ macro_rules! test_distinct { search.query(search::TEST_QUERY); search.limit($limit); search.offset($offset); - search.exhaustive_number_hits($exhaustive); + search.is_exhaustive_pagination($exhaustive); search.terms_matching_strategy(TermsMatchingStrategy::default());