mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-11-04 01:46:28 +00:00 
			
		
		
		
	Merge #4466
4466: Implements the search cutoff r=irevoire a=irevoire # Pull Request ## Related issue Fixes https://github.com/meilisearch/meilisearch/issues/4488 ## What does this PR do? - Adds a cutoff to the bucket sort after 150ms has been spent - Adds a new setting to customize the default value of 150ms - When the time is exceeded, we exit early with what we had the time to sort - If the cutoff has been reached, the search details are updated with a new `Skip` ranking details for the ranking rules that were skipped - Adds analytics to measure the total number of degraded search requests - Adds the number of degraded search requests to the Prometheus metrics and Grafana dashboard - The cutoff **must not** skip the filters; otherwise, we would leak documents to people who don’t have the right to see them Co-authored-by: Tamo <tamo@meilisearch.com> Co-authored-by: Louis Dureuil <louis@meilisearch.com>
This commit is contained in:
		@@ -6,7 +6,7 @@ use std::time::Instant;
 | 
			
		||||
use heed::EnvOpenOptions;
 | 
			
		||||
use milli::{
 | 
			
		||||
    execute_search, filtered_universe, DefaultSearchLogger, GeoSortStrategy, Index, SearchContext,
 | 
			
		||||
    SearchLogger, TermsMatchingStrategy,
 | 
			
		||||
    SearchLogger, TermsMatchingStrategy, TimeBudget,
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
#[global_allocator]
 | 
			
		||||
@@ -65,6 +65,7 @@ fn main() -> Result<(), Box<dyn Error>> {
 | 
			
		||||
                None,
 | 
			
		||||
                &mut DefaultSearchLogger,
 | 
			
		||||
                logger,
 | 
			
		||||
                TimeBudget::max(),
 | 
			
		||||
            )?;
 | 
			
		||||
            if let Some((logger, dir)) = detailed_logger {
 | 
			
		||||
                logger.finish(&mut ctx, Path::new(dir))?;
 | 
			
		||||
 
 | 
			
		||||
@@ -67,6 +67,7 @@ pub mod main_key {
 | 
			
		||||
    pub const PAGINATION_MAX_TOTAL_HITS: &str = "pagination-max-total-hits";
 | 
			
		||||
    pub const PROXIMITY_PRECISION: &str = "proximity-precision";
 | 
			
		||||
    pub const EMBEDDING_CONFIGS: &str = "embedding_configs";
 | 
			
		||||
    pub const SEARCH_CUTOFF: &str = "search_cutoff";
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
pub mod db_name {
 | 
			
		||||
@@ -1505,6 +1506,18 @@ impl Index {
 | 
			
		||||
            _ => "default".to_owned(),
 | 
			
		||||
        })
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    pub(crate) fn put_search_cutoff(&self, wtxn: &mut RwTxn<'_>, cutoff: u64) -> heed::Result<()> {
 | 
			
		||||
        self.main.remap_types::<Str, BEU64>().put(wtxn, main_key::SEARCH_CUTOFF, &cutoff)
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    pub fn search_cutoff(&self, rtxn: &RoTxn<'_>) -> Result<Option<u64>> {
 | 
			
		||||
        Ok(self.main.remap_types::<Str, BEU64>().get(rtxn, main_key::SEARCH_CUTOFF)?)
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    pub(crate) fn delete_search_cutoff(&self, wtxn: &mut RwTxn<'_>) -> heed::Result<bool> {
 | 
			
		||||
        self.main.remap_key_type::<Str>().delete(wtxn, main_key::SEARCH_CUTOFF)
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#[cfg(test)]
 | 
			
		||||
@@ -2421,6 +2434,7 @@ pub(crate) mod tests {
 | 
			
		||||
            candidates: _,
 | 
			
		||||
            document_scores: _,
 | 
			
		||||
            mut documents_ids,
 | 
			
		||||
            degraded: _,
 | 
			
		||||
        } = search.execute().unwrap();
 | 
			
		||||
        let primary_key_id = index.fields_ids_map(&rtxn).unwrap().id("primary_key").unwrap();
 | 
			
		||||
        documents_ids.sort_unstable();
 | 
			
		||||
 
 | 
			
		||||
@@ -30,6 +30,7 @@ pub mod snapshot_tests;
 | 
			
		||||
 | 
			
		||||
use std::collections::{BTreeMap, HashMap};
 | 
			
		||||
use std::convert::{TryFrom, TryInto};
 | 
			
		||||
use std::fmt;
 | 
			
		||||
use std::hash::BuildHasherDefault;
 | 
			
		||||
 | 
			
		||||
use charabia::normalizer::{CharNormalizer, CompatibilityDecompositionNormalizer};
 | 
			
		||||
@@ -104,6 +105,73 @@ pub const MAX_WORD_LENGTH: usize = MAX_LMDB_KEY_LENGTH / 2;
 | 
			
		||||
 | 
			
		||||
pub const MAX_POSITION_PER_ATTRIBUTE: u32 = u16::MAX as u32 + 1;
 | 
			
		||||
 | 
			
		||||
#[derive(Clone)]
 | 
			
		||||
pub struct TimeBudget {
 | 
			
		||||
    started_at: std::time::Instant,
 | 
			
		||||
    budget: std::time::Duration,
 | 
			
		||||
 | 
			
		||||
    /// When testing the time budget, ensuring we did more than iteration of the bucket sort can be useful.
 | 
			
		||||
    /// But to avoid being flaky, the only option is to add the ability to stop after a specific number of calls instead of a `Duration`.
 | 
			
		||||
    #[cfg(test)]
 | 
			
		||||
    stop_after: Option<(std::sync::Arc<std::sync::atomic::AtomicUsize>, usize)>,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
impl fmt::Debug for TimeBudget {
 | 
			
		||||
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
 | 
			
		||||
        f.debug_struct("TimeBudget")
 | 
			
		||||
            .field("started_at", &self.started_at)
 | 
			
		||||
            .field("budget", &self.budget)
 | 
			
		||||
            .field("left", &(self.budget - self.started_at.elapsed()))
 | 
			
		||||
            .finish()
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
impl Default for TimeBudget {
 | 
			
		||||
    fn default() -> Self {
 | 
			
		||||
        Self::new(std::time::Duration::from_millis(150))
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
impl TimeBudget {
 | 
			
		||||
    pub fn new(budget: std::time::Duration) -> Self {
 | 
			
		||||
        Self {
 | 
			
		||||
            started_at: std::time::Instant::now(),
 | 
			
		||||
            budget,
 | 
			
		||||
 | 
			
		||||
            #[cfg(test)]
 | 
			
		||||
            stop_after: None,
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    pub fn max() -> Self {
 | 
			
		||||
        Self::new(std::time::Duration::from_secs(u64::MAX))
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    #[cfg(test)]
 | 
			
		||||
    pub fn with_stop_after(mut self, stop_after: usize) -> Self {
 | 
			
		||||
        use std::sync::atomic::AtomicUsize;
 | 
			
		||||
        use std::sync::Arc;
 | 
			
		||||
 | 
			
		||||
        self.stop_after = Some((Arc::new(AtomicUsize::new(0)), stop_after));
 | 
			
		||||
        self
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    pub fn exceeded(&self) -> bool {
 | 
			
		||||
        #[cfg(test)]
 | 
			
		||||
        if let Some((current, stop_after)) = &self.stop_after {
 | 
			
		||||
            let current = current.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
 | 
			
		||||
            if current >= *stop_after {
 | 
			
		||||
                return true;
 | 
			
		||||
            } else {
 | 
			
		||||
                // if a number has been specified then we ignore entirely the time budget
 | 
			
		||||
                return false;
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        self.started_at.elapsed() > self.budget
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Convert an absolute word position into a relative position.
 | 
			
		||||
// Return the field id of the attribute related to the absolute position
 | 
			
		||||
// and the relative position in the attribute.
 | 
			
		||||
 
 | 
			
		||||
@@ -17,6 +17,9 @@ pub enum ScoreDetails {
 | 
			
		||||
    Sort(Sort),
 | 
			
		||||
    Vector(Vector),
 | 
			
		||||
    GeoSort(GeoSort),
 | 
			
		||||
 | 
			
		||||
    /// Returned when we don't have the time to finish applying all the subsequent ranking-rules
 | 
			
		||||
    Skipped,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#[derive(Clone, Copy)]
 | 
			
		||||
@@ -50,6 +53,7 @@ impl ScoreDetails {
 | 
			
		||||
            ScoreDetails::Sort(_) => None,
 | 
			
		||||
            ScoreDetails::GeoSort(_) => None,
 | 
			
		||||
            ScoreDetails::Vector(_) => None,
 | 
			
		||||
            ScoreDetails::Skipped => Some(Rank { rank: 0, max_rank: 1 }),
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
@@ -97,6 +101,7 @@ impl ScoreDetails {
 | 
			
		||||
            ScoreDetails::Vector(vector) => RankOrValue::Score(
 | 
			
		||||
                vector.value_similarity.as_ref().map(|(_, s)| *s as f64).unwrap_or(0.0f64),
 | 
			
		||||
            ),
 | 
			
		||||
            ScoreDetails::Skipped => RankOrValue::Rank(Rank { rank: 0, max_rank: 1 }),
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
@@ -256,6 +261,11 @@ impl ScoreDetails {
 | 
			
		||||
                    details_map.insert(vector, details);
 | 
			
		||||
                    order += 1;
 | 
			
		||||
                }
 | 
			
		||||
                ScoreDetails::Skipped => {
 | 
			
		||||
                    details_map
 | 
			
		||||
                        .insert("skipped".to_string(), serde_json::json!({ "order": order }));
 | 
			
		||||
                    order += 1;
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
        details_map
 | 
			
		||||
 
 | 
			
		||||
@@ -10,6 +10,7 @@ struct ScoreWithRatioResult {
 | 
			
		||||
    matching_words: MatchingWords,
 | 
			
		||||
    candidates: RoaringBitmap,
 | 
			
		||||
    document_scores: Vec<(u32, ScoreWithRatio)>,
 | 
			
		||||
    degraded: bool,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
type ScoreWithRatio = (Vec<ScoreDetails>, f32);
 | 
			
		||||
@@ -49,8 +50,12 @@ fn compare_scores(
 | 
			
		||||
                    order => return order,
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
            (Some(ScoreValue::Score(_)), Some(_)) => return Ordering::Greater,
 | 
			
		||||
            (Some(_), Some(ScoreValue::Score(_))) => return Ordering::Less,
 | 
			
		||||
            (Some(ScoreValue::Score(x)), Some(_)) => {
 | 
			
		||||
                return if x == 0. { Ordering::Less } else { Ordering::Greater }
 | 
			
		||||
            }
 | 
			
		||||
            (Some(_), Some(ScoreValue::Score(x))) => {
 | 
			
		||||
                return if x == 0. { Ordering::Greater } else { Ordering::Less }
 | 
			
		||||
            }
 | 
			
		||||
            // if we have this, we're bad
 | 
			
		||||
            (Some(ScoreValue::GeoSort(_)), Some(ScoreValue::Sort(_)))
 | 
			
		||||
            | (Some(ScoreValue::Sort(_)), Some(ScoreValue::GeoSort(_))) => {
 | 
			
		||||
@@ -72,6 +77,7 @@ impl ScoreWithRatioResult {
 | 
			
		||||
            matching_words: results.matching_words,
 | 
			
		||||
            candidates: results.candidates,
 | 
			
		||||
            document_scores,
 | 
			
		||||
            degraded: results.degraded,
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
@@ -106,6 +112,7 @@ impl ScoreWithRatioResult {
 | 
			
		||||
            candidates: left.candidates | right.candidates,
 | 
			
		||||
            documents_ids,
 | 
			
		||||
            document_scores,
 | 
			
		||||
            degraded: left.degraded | right.degraded,
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
@@ -131,6 +138,7 @@ impl<'a> Search<'a> {
 | 
			
		||||
            index: self.index,
 | 
			
		||||
            distribution_shift: self.distribution_shift,
 | 
			
		||||
            embedder_name: self.embedder_name.clone(),
 | 
			
		||||
            time_budget: self.time_budget.clone(),
 | 
			
		||||
        };
 | 
			
		||||
 | 
			
		||||
        let vector_query = search.vector.take();
 | 
			
		||||
 
 | 
			
		||||
@@ -11,7 +11,7 @@ use crate::score_details::{ScoreDetails, ScoringStrategy};
 | 
			
		||||
use crate::vector::DistributionShift;
 | 
			
		||||
use crate::{
 | 
			
		||||
    execute_search, filtered_universe, AscDesc, DefaultSearchLogger, DocumentId, Index, Result,
 | 
			
		||||
    SearchContext,
 | 
			
		||||
    SearchContext, TimeBudget,
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
// Building these factories is not free.
 | 
			
		||||
@@ -43,6 +43,8 @@ pub struct Search<'a> {
 | 
			
		||||
    index: &'a Index,
 | 
			
		||||
    distribution_shift: Option<DistributionShift>,
 | 
			
		||||
    embedder_name: Option<String>,
 | 
			
		||||
 | 
			
		||||
    time_budget: TimeBudget,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
impl<'a> Search<'a> {
 | 
			
		||||
@@ -64,6 +66,7 @@ impl<'a> Search<'a> {
 | 
			
		||||
            index,
 | 
			
		||||
            distribution_shift: None,
 | 
			
		||||
            embedder_name: None,
 | 
			
		||||
            time_budget: TimeBudget::max(),
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
@@ -143,6 +146,11 @@ impl<'a> Search<'a> {
 | 
			
		||||
        self
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    pub fn time_budget(&mut self, time_budget: TimeBudget) -> &mut Search<'a> {
 | 
			
		||||
        self.time_budget = time_budget;
 | 
			
		||||
        self
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    pub fn execute_for_candidates(&self, has_vector_search: bool) -> Result<RoaringBitmap> {
 | 
			
		||||
        if has_vector_search {
 | 
			
		||||
            let ctx = SearchContext::new(self.index, self.rtxn);
 | 
			
		||||
@@ -169,36 +177,43 @@ impl<'a> Search<'a> {
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        let universe = filtered_universe(&ctx, &self.filter)?;
 | 
			
		||||
        let PartialSearchResult { located_query_terms, candidates, documents_ids, document_scores } =
 | 
			
		||||
            match self.vector.as_ref() {
 | 
			
		||||
                Some(vector) => execute_vector_search(
 | 
			
		||||
                    &mut ctx,
 | 
			
		||||
                    vector,
 | 
			
		||||
                    self.scoring_strategy,
 | 
			
		||||
                    universe,
 | 
			
		||||
                    &self.sort_criteria,
 | 
			
		||||
                    self.geo_strategy,
 | 
			
		||||
                    self.offset,
 | 
			
		||||
                    self.limit,
 | 
			
		||||
                    self.distribution_shift,
 | 
			
		||||
                    embedder_name,
 | 
			
		||||
                )?,
 | 
			
		||||
                None => execute_search(
 | 
			
		||||
                    &mut ctx,
 | 
			
		||||
                    self.query.as_deref(),
 | 
			
		||||
                    self.terms_matching_strategy,
 | 
			
		||||
                    self.scoring_strategy,
 | 
			
		||||
                    self.exhaustive_number_hits,
 | 
			
		||||
                    universe,
 | 
			
		||||
                    &self.sort_criteria,
 | 
			
		||||
                    self.geo_strategy,
 | 
			
		||||
                    self.offset,
 | 
			
		||||
                    self.limit,
 | 
			
		||||
                    Some(self.words_limit),
 | 
			
		||||
                    &mut DefaultSearchLogger,
 | 
			
		||||
                    &mut DefaultSearchLogger,
 | 
			
		||||
                )?,
 | 
			
		||||
            };
 | 
			
		||||
        let PartialSearchResult {
 | 
			
		||||
            located_query_terms,
 | 
			
		||||
            candidates,
 | 
			
		||||
            documents_ids,
 | 
			
		||||
            document_scores,
 | 
			
		||||
            degraded,
 | 
			
		||||
        } = match self.vector.as_ref() {
 | 
			
		||||
            Some(vector) => execute_vector_search(
 | 
			
		||||
                &mut ctx,
 | 
			
		||||
                vector,
 | 
			
		||||
                self.scoring_strategy,
 | 
			
		||||
                universe,
 | 
			
		||||
                &self.sort_criteria,
 | 
			
		||||
                self.geo_strategy,
 | 
			
		||||
                self.offset,
 | 
			
		||||
                self.limit,
 | 
			
		||||
                self.distribution_shift,
 | 
			
		||||
                embedder_name,
 | 
			
		||||
                self.time_budget.clone(),
 | 
			
		||||
            )?,
 | 
			
		||||
            None => execute_search(
 | 
			
		||||
                &mut ctx,
 | 
			
		||||
                self.query.as_deref(),
 | 
			
		||||
                self.terms_matching_strategy,
 | 
			
		||||
                self.scoring_strategy,
 | 
			
		||||
                self.exhaustive_number_hits,
 | 
			
		||||
                universe,
 | 
			
		||||
                &self.sort_criteria,
 | 
			
		||||
                self.geo_strategy,
 | 
			
		||||
                self.offset,
 | 
			
		||||
                self.limit,
 | 
			
		||||
                Some(self.words_limit),
 | 
			
		||||
                &mut DefaultSearchLogger,
 | 
			
		||||
                &mut DefaultSearchLogger,
 | 
			
		||||
                self.time_budget.clone(),
 | 
			
		||||
            )?,
 | 
			
		||||
        };
 | 
			
		||||
 | 
			
		||||
        // consume context and located_query_terms to build MatchingWords.
 | 
			
		||||
        let matching_words = match located_query_terms {
 | 
			
		||||
@@ -206,7 +221,7 @@ impl<'a> Search<'a> {
 | 
			
		||||
            None => MatchingWords::default(),
 | 
			
		||||
        };
 | 
			
		||||
 | 
			
		||||
        Ok(SearchResult { matching_words, candidates, document_scores, documents_ids })
 | 
			
		||||
        Ok(SearchResult { matching_words, candidates, document_scores, documents_ids, degraded })
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@@ -229,6 +244,7 @@ impl fmt::Debug for Search<'_> {
 | 
			
		||||
            index: _,
 | 
			
		||||
            distribution_shift,
 | 
			
		||||
            embedder_name,
 | 
			
		||||
            time_budget,
 | 
			
		||||
        } = self;
 | 
			
		||||
        f.debug_struct("Search")
 | 
			
		||||
            .field("query", query)
 | 
			
		||||
@@ -244,6 +260,7 @@ impl fmt::Debug for Search<'_> {
 | 
			
		||||
            .field("words_limit", words_limit)
 | 
			
		||||
            .field("distribution_shift", distribution_shift)
 | 
			
		||||
            .field("embedder_name", embedder_name)
 | 
			
		||||
            .field("time_budget", time_budget)
 | 
			
		||||
            .finish()
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
@@ -254,6 +271,7 @@ pub struct SearchResult {
 | 
			
		||||
    pub candidates: RoaringBitmap,
 | 
			
		||||
    pub documents_ids: Vec<DocumentId>,
 | 
			
		||||
    pub document_scores: Vec<Vec<ScoreDetails>>,
 | 
			
		||||
    pub degraded: bool,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
 | 
			
		||||
 
 | 
			
		||||
@@ -5,12 +5,14 @@ use super::ranking_rules::{BoxRankingRule, RankingRuleQueryTrait};
 | 
			
		||||
use super::SearchContext;
 | 
			
		||||
use crate::score_details::{ScoreDetails, ScoringStrategy};
 | 
			
		||||
use crate::search::new::distinct::{apply_distinct_rule, distinct_single_docid, DistinctOutput};
 | 
			
		||||
use crate::Result;
 | 
			
		||||
use crate::{Result, TimeBudget};
 | 
			
		||||
 | 
			
		||||
pub struct BucketSortOutput {
 | 
			
		||||
    pub docids: Vec<u32>,
 | 
			
		||||
    pub scores: Vec<Vec<ScoreDetails>>,
 | 
			
		||||
    pub all_candidates: RoaringBitmap,
 | 
			
		||||
 | 
			
		||||
    pub degraded: bool,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// TODO: would probably be good to regroup some of these inside of a struct?
 | 
			
		||||
@@ -25,6 +27,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
 | 
			
		||||
    length: usize,
 | 
			
		||||
    scoring_strategy: ScoringStrategy,
 | 
			
		||||
    logger: &mut dyn SearchLogger<Q>,
 | 
			
		||||
    time_budget: TimeBudget,
 | 
			
		||||
) -> Result<BucketSortOutput> {
 | 
			
		||||
    logger.initial_query(query);
 | 
			
		||||
    logger.ranking_rules(&ranking_rules);
 | 
			
		||||
@@ -41,6 +44,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
 | 
			
		||||
            docids: vec![],
 | 
			
		||||
            scores: vec![],
 | 
			
		||||
            all_candidates: universe.clone(),
 | 
			
		||||
            degraded: false,
 | 
			
		||||
        });
 | 
			
		||||
    }
 | 
			
		||||
    if ranking_rules.is_empty() {
 | 
			
		||||
@@ -74,6 +78,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
 | 
			
		||||
                scores: vec![Default::default(); results.len()],
 | 
			
		||||
                docids: results,
 | 
			
		||||
                all_candidates,
 | 
			
		||||
                degraded: false,
 | 
			
		||||
            });
 | 
			
		||||
        } else {
 | 
			
		||||
            let docids: Vec<u32> = universe.iter().skip(from).take(length).collect();
 | 
			
		||||
@@ -81,6 +86,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
 | 
			
		||||
                scores: vec![Default::default(); docids.len()],
 | 
			
		||||
                docids,
 | 
			
		||||
                all_candidates: universe.clone(),
 | 
			
		||||
                degraded: false,
 | 
			
		||||
            });
 | 
			
		||||
        };
 | 
			
		||||
    }
 | 
			
		||||
@@ -154,6 +160,28 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    while valid_docids.len() < length {
 | 
			
		||||
        if time_budget.exceeded() {
 | 
			
		||||
            loop {
 | 
			
		||||
                let bucket = std::mem::take(&mut ranking_rule_universes[cur_ranking_rule_index]);
 | 
			
		||||
                ranking_rule_scores.push(ScoreDetails::Skipped);
 | 
			
		||||
                maybe_add_to_results!(bucket);
 | 
			
		||||
                ranking_rule_scores.pop();
 | 
			
		||||
 | 
			
		||||
                if cur_ranking_rule_index == 0 {
 | 
			
		||||
                    break;
 | 
			
		||||
                }
 | 
			
		||||
 | 
			
		||||
                back!();
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            return Ok(BucketSortOutput {
 | 
			
		||||
                scores: valid_scores,
 | 
			
		||||
                docids: valid_docids,
 | 
			
		||||
                all_candidates,
 | 
			
		||||
                degraded: true,
 | 
			
		||||
            });
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        // The universe for this bucket is zero, so we don't need to sort
 | 
			
		||||
        // anything, just go back to the parent ranking rule.
 | 
			
		||||
        if ranking_rule_universes[cur_ranking_rule_index].is_empty()
 | 
			
		||||
@@ -219,7 +247,12 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
 | 
			
		||||
        )?;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    Ok(BucketSortOutput { docids: valid_docids, scores: valid_scores, all_candidates })
 | 
			
		||||
    Ok(BucketSortOutput {
 | 
			
		||||
        docids: valid_docids,
 | 
			
		||||
        scores: valid_scores,
 | 
			
		||||
        all_candidates,
 | 
			
		||||
        degraded: false,
 | 
			
		||||
    })
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/// Add the candidates to the results. Take `distinct`, `from`, `length`, and `cur_offset`
 | 
			
		||||
 
 | 
			
		||||
@@ -502,7 +502,7 @@ mod tests {
 | 
			
		||||
 | 
			
		||||
    use super::*;
 | 
			
		||||
    use crate::index::tests::TempIndex;
 | 
			
		||||
    use crate::{execute_search, filtered_universe, SearchContext};
 | 
			
		||||
    use crate::{execute_search, filtered_universe, SearchContext, TimeBudget};
 | 
			
		||||
 | 
			
		||||
    impl<'a> MatcherBuilder<'a> {
 | 
			
		||||
        fn new_test(rtxn: &'a heed::RoTxn, index: &'a TempIndex, query: &str) -> Self {
 | 
			
		||||
@@ -522,6 +522,7 @@ mod tests {
 | 
			
		||||
                Some(10),
 | 
			
		||||
                &mut crate::DefaultSearchLogger,
 | 
			
		||||
                &mut crate::DefaultSearchLogger,
 | 
			
		||||
                TimeBudget::max(),
 | 
			
		||||
            )
 | 
			
		||||
            .unwrap();
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -52,7 +52,8 @@ use crate::score_details::{ScoreDetails, ScoringStrategy};
 | 
			
		||||
use crate::search::new::distinct::apply_distinct_rule;
 | 
			
		||||
use crate::vector::DistributionShift;
 | 
			
		||||
use crate::{
 | 
			
		||||
    AscDesc, DocumentId, FieldId, Filter, Index, Member, Result, TermsMatchingStrategy, UserError,
 | 
			
		||||
    AscDesc, DocumentId, FieldId, Filter, Index, Member, Result, TermsMatchingStrategy, TimeBudget,
 | 
			
		||||
    UserError,
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
/// A structure used throughout the execution of a search query.
 | 
			
		||||
@@ -518,6 +519,7 @@ pub fn execute_vector_search(
 | 
			
		||||
    length: usize,
 | 
			
		||||
    distribution_shift: Option<DistributionShift>,
 | 
			
		||||
    embedder_name: &str,
 | 
			
		||||
    time_budget: TimeBudget,
 | 
			
		||||
) -> Result<PartialSearchResult> {
 | 
			
		||||
    check_sort_criteria(ctx, sort_criteria.as_ref())?;
 | 
			
		||||
 | 
			
		||||
@@ -537,7 +539,7 @@ pub fn execute_vector_search(
 | 
			
		||||
    let placeholder_search_logger: &mut dyn SearchLogger<PlaceholderQuery> =
 | 
			
		||||
        &mut placeholder_search_logger;
 | 
			
		||||
 | 
			
		||||
    let BucketSortOutput { docids, scores, all_candidates } = bucket_sort(
 | 
			
		||||
    let BucketSortOutput { docids, scores, all_candidates, degraded } = bucket_sort(
 | 
			
		||||
        ctx,
 | 
			
		||||
        ranking_rules,
 | 
			
		||||
        &PlaceholderQuery,
 | 
			
		||||
@@ -546,6 +548,7 @@ pub fn execute_vector_search(
 | 
			
		||||
        length,
 | 
			
		||||
        scoring_strategy,
 | 
			
		||||
        placeholder_search_logger,
 | 
			
		||||
        time_budget,
 | 
			
		||||
    )?;
 | 
			
		||||
 | 
			
		||||
    Ok(PartialSearchResult {
 | 
			
		||||
@@ -553,6 +556,7 @@ pub fn execute_vector_search(
 | 
			
		||||
        document_scores: scores,
 | 
			
		||||
        documents_ids: docids,
 | 
			
		||||
        located_query_terms: None,
 | 
			
		||||
        degraded,
 | 
			
		||||
    })
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@@ -572,6 +576,7 @@ pub fn execute_search(
 | 
			
		||||
    words_limit: Option<usize>,
 | 
			
		||||
    placeholder_search_logger: &mut dyn SearchLogger<PlaceholderQuery>,
 | 
			
		||||
    query_graph_logger: &mut dyn SearchLogger<QueryGraph>,
 | 
			
		||||
    time_budget: TimeBudget,
 | 
			
		||||
) -> Result<PartialSearchResult> {
 | 
			
		||||
    check_sort_criteria(ctx, sort_criteria.as_ref())?;
 | 
			
		||||
 | 
			
		||||
@@ -648,6 +653,7 @@ pub fn execute_search(
 | 
			
		||||
            length,
 | 
			
		||||
            scoring_strategy,
 | 
			
		||||
            query_graph_logger,
 | 
			
		||||
            time_budget,
 | 
			
		||||
        )?
 | 
			
		||||
    } else {
 | 
			
		||||
        let ranking_rules =
 | 
			
		||||
@@ -661,10 +667,11 @@ pub fn execute_search(
 | 
			
		||||
            length,
 | 
			
		||||
            scoring_strategy,
 | 
			
		||||
            placeholder_search_logger,
 | 
			
		||||
            time_budget,
 | 
			
		||||
        )?
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    let BucketSortOutput { docids, scores, mut all_candidates } = bucket_sort_output;
 | 
			
		||||
    let BucketSortOutput { docids, scores, mut all_candidates, degraded } = bucket_sort_output;
 | 
			
		||||
    let fields_ids_map = ctx.index.fields_ids_map(ctx.txn)?;
 | 
			
		||||
 | 
			
		||||
    // The candidates is the universe unless the exhaustive number of hits
 | 
			
		||||
@@ -682,6 +689,7 @@ pub fn execute_search(
 | 
			
		||||
        document_scores: scores,
 | 
			
		||||
        documents_ids: docids,
 | 
			
		||||
        located_query_terms,
 | 
			
		||||
        degraded,
 | 
			
		||||
    })
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@@ -742,4 +750,6 @@ pub struct PartialSearchResult {
 | 
			
		||||
    pub candidates: RoaringBitmap,
 | 
			
		||||
    pub documents_ids: Vec<DocumentId>,
 | 
			
		||||
    pub document_scores: Vec<Vec<ScoreDetails>>,
 | 
			
		||||
 | 
			
		||||
    pub degraded: bool,
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										429
									
								
								milli/src/search/new/tests/cutoff.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										429
									
								
								milli/src/search/new/tests/cutoff.rs
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,429 @@
 | 
			
		||||
//! This module test the search cutoff and ensure a few things:
 | 
			
		||||
//! 1. A basic test works and mark the search as degraded
 | 
			
		||||
//! 2. A test that ensure the filters are affectively applied even with a cutoff of 0
 | 
			
		||||
//! 3. A test that ensure the cutoff works well with the ranking scores
 | 
			
		||||
 | 
			
		||||
use std::time::Duration;
 | 
			
		||||
 | 
			
		||||
use big_s::S;
 | 
			
		||||
use maplit::hashset;
 | 
			
		||||
use meili_snap::snapshot;
 | 
			
		||||
 | 
			
		||||
use crate::index::tests::TempIndex;
 | 
			
		||||
use crate::score_details::{ScoreDetails, ScoringStrategy};
 | 
			
		||||
use crate::{Criterion, Filter, Search, TimeBudget};
 | 
			
		||||
 | 
			
		||||
fn create_index() -> TempIndex {
 | 
			
		||||
    let index = TempIndex::new();
 | 
			
		||||
 | 
			
		||||
    index
 | 
			
		||||
        .update_settings(|s| {
 | 
			
		||||
            s.set_primary_key("id".to_owned());
 | 
			
		||||
            s.set_searchable_fields(vec!["text".to_owned()]);
 | 
			
		||||
            s.set_filterable_fields(hashset! { S("id") });
 | 
			
		||||
            s.set_criteria(vec![Criterion::Words, Criterion::Typo]);
 | 
			
		||||
        })
 | 
			
		||||
        .unwrap();
 | 
			
		||||
 | 
			
		||||
    // reverse the ID / insertion order so we see better what was sorted from what got the insertion order ordering
 | 
			
		||||
    index
 | 
			
		||||
        .add_documents(documents!([
 | 
			
		||||
            {
 | 
			
		||||
                "id": 4,
 | 
			
		||||
                "text": "hella puppo kefir",
 | 
			
		||||
            },
 | 
			
		||||
            {
 | 
			
		||||
                "id": 3,
 | 
			
		||||
                "text": "hella puppy kefir",
 | 
			
		||||
            },
 | 
			
		||||
            {
 | 
			
		||||
                "id": 2,
 | 
			
		||||
                "text": "hello",
 | 
			
		||||
            },
 | 
			
		||||
            {
 | 
			
		||||
                "id": 1,
 | 
			
		||||
                "text": "hello puppy",
 | 
			
		||||
            },
 | 
			
		||||
            {
 | 
			
		||||
                "id": 0,
 | 
			
		||||
                "text": "hello puppy kefir",
 | 
			
		||||
            },
 | 
			
		||||
        ]))
 | 
			
		||||
        .unwrap();
 | 
			
		||||
    index
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#[test]
 | 
			
		||||
fn basic_degraded_search() {
 | 
			
		||||
    let index = create_index();
 | 
			
		||||
    let rtxn = index.read_txn().unwrap();
 | 
			
		||||
 | 
			
		||||
    let mut search = Search::new(&rtxn, &index);
 | 
			
		||||
    search.query("hello puppy kefir");
 | 
			
		||||
    search.limit(3);
 | 
			
		||||
    search.time_budget(TimeBudget::new(Duration::from_millis(0)));
 | 
			
		||||
 | 
			
		||||
    let result = search.execute().unwrap();
 | 
			
		||||
    assert!(result.degraded);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#[test]
 | 
			
		||||
fn degraded_search_cannot_skip_filter() {
 | 
			
		||||
    let index = create_index();
 | 
			
		||||
    let rtxn = index.read_txn().unwrap();
 | 
			
		||||
 | 
			
		||||
    let mut search = Search::new(&rtxn, &index);
 | 
			
		||||
    search.query("hello puppy kefir");
 | 
			
		||||
    search.limit(100);
 | 
			
		||||
    search.time_budget(TimeBudget::new(Duration::from_millis(0)));
 | 
			
		||||
    let filter_condition = Filter::from_str("id > 2").unwrap().unwrap();
 | 
			
		||||
    search.filter(filter_condition);
 | 
			
		||||
 | 
			
		||||
    let result = search.execute().unwrap();
 | 
			
		||||
    assert!(result.degraded);
 | 
			
		||||
    snapshot!(format!("{:?}\n{:?}", result.candidates, result.documents_ids), @r###"
 | 
			
		||||
    RoaringBitmap<[0, 1]>
 | 
			
		||||
    [0, 1]
 | 
			
		||||
    "###);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#[test]
 | 
			
		||||
#[allow(clippy::format_collect)] // the test is already quite big
 | 
			
		||||
fn degraded_search_and_score_details() {
 | 
			
		||||
    let index = create_index();
 | 
			
		||||
    let rtxn = index.read_txn().unwrap();
 | 
			
		||||
 | 
			
		||||
    let mut search = Search::new(&rtxn, &index);
 | 
			
		||||
    search.query("hello puppy kefir");
 | 
			
		||||
    search.limit(4);
 | 
			
		||||
    search.scoring_strategy(ScoringStrategy::Detailed);
 | 
			
		||||
    search.time_budget(TimeBudget::max());
 | 
			
		||||
 | 
			
		||||
    let result = search.execute().unwrap();
 | 
			
		||||
    snapshot!(format!("IDs: {:?}\nScores: {}\nScore Details:\n{:#?}", result.documents_ids, result.document_scores.iter().map(|scores| format!("{:.4} ", ScoreDetails::global_score(scores.iter()))).collect::<String>(), result.document_scores), @r###"
 | 
			
		||||
    IDs: [4, 1, 0, 3]
 | 
			
		||||
    Scores: 1.0000 0.9167 0.8333 0.6667 
 | 
			
		||||
    Score Details:
 | 
			
		||||
    [
 | 
			
		||||
        [
 | 
			
		||||
            Words(
 | 
			
		||||
                Words {
 | 
			
		||||
                    matching_words: 3,
 | 
			
		||||
                    max_matching_words: 3,
 | 
			
		||||
                },
 | 
			
		||||
            ),
 | 
			
		||||
            Typo(
 | 
			
		||||
                Typo {
 | 
			
		||||
                    typo_count: 0,
 | 
			
		||||
                    max_typo_count: 3,
 | 
			
		||||
                },
 | 
			
		||||
            ),
 | 
			
		||||
        ],
 | 
			
		||||
        [
 | 
			
		||||
            Words(
 | 
			
		||||
                Words {
 | 
			
		||||
                    matching_words: 3,
 | 
			
		||||
                    max_matching_words: 3,
 | 
			
		||||
                },
 | 
			
		||||
            ),
 | 
			
		||||
            Typo(
 | 
			
		||||
                Typo {
 | 
			
		||||
                    typo_count: 1,
 | 
			
		||||
                    max_typo_count: 3,
 | 
			
		||||
                },
 | 
			
		||||
            ),
 | 
			
		||||
        ],
 | 
			
		||||
        [
 | 
			
		||||
            Words(
 | 
			
		||||
                Words {
 | 
			
		||||
                    matching_words: 3,
 | 
			
		||||
                    max_matching_words: 3,
 | 
			
		||||
                },
 | 
			
		||||
            ),
 | 
			
		||||
            Typo(
 | 
			
		||||
                Typo {
 | 
			
		||||
                    typo_count: 2,
 | 
			
		||||
                    max_typo_count: 3,
 | 
			
		||||
                },
 | 
			
		||||
            ),
 | 
			
		||||
        ],
 | 
			
		||||
        [
 | 
			
		||||
            Words(
 | 
			
		||||
                Words {
 | 
			
		||||
                    matching_words: 2,
 | 
			
		||||
                    max_matching_words: 3,
 | 
			
		||||
                },
 | 
			
		||||
            ),
 | 
			
		||||
            Typo(
 | 
			
		||||
                Typo {
 | 
			
		||||
                    typo_count: 0,
 | 
			
		||||
                    max_typo_count: 2,
 | 
			
		||||
                },
 | 
			
		||||
            ),
 | 
			
		||||
        ],
 | 
			
		||||
    ]
 | 
			
		||||
    "###);
 | 
			
		||||
 | 
			
		||||
    // Do ONE loop iteration. Not much can be deduced, almost everyone matched the words first bucket.
 | 
			
		||||
    search.time_budget(TimeBudget::max().with_stop_after(1));
 | 
			
		||||
 | 
			
		||||
    let result = search.execute().unwrap();
 | 
			
		||||
    snapshot!(format!("IDs: {:?}\nScores: {}\nScore Details:\n{:#?}", result.documents_ids, result.document_scores.iter().map(|scores| format!("{:.4} ", ScoreDetails::global_score(scores.iter()))).collect::<String>(), result.document_scores), @r###"
 | 
			
		||||
    IDs: [0, 1, 4, 2]
 | 
			
		||||
    Scores: 0.6667 0.6667 0.6667 0.0000 
 | 
			
		||||
    Score Details:
 | 
			
		||||
    [
 | 
			
		||||
        [
 | 
			
		||||
            Words(
 | 
			
		||||
                Words {
 | 
			
		||||
                    matching_words: 3,
 | 
			
		||||
                    max_matching_words: 3,
 | 
			
		||||
                },
 | 
			
		||||
            ),
 | 
			
		||||
            Skipped,
 | 
			
		||||
        ],
 | 
			
		||||
        [
 | 
			
		||||
            Words(
 | 
			
		||||
                Words {
 | 
			
		||||
                    matching_words: 3,
 | 
			
		||||
                    max_matching_words: 3,
 | 
			
		||||
                },
 | 
			
		||||
            ),
 | 
			
		||||
            Skipped,
 | 
			
		||||
        ],
 | 
			
		||||
        [
 | 
			
		||||
            Words(
 | 
			
		||||
                Words {
 | 
			
		||||
                    matching_words: 3,
 | 
			
		||||
                    max_matching_words: 3,
 | 
			
		||||
                },
 | 
			
		||||
            ),
 | 
			
		||||
            Skipped,
 | 
			
		||||
        ],
 | 
			
		||||
        [
 | 
			
		||||
            Skipped,
 | 
			
		||||
        ],
 | 
			
		||||
    ]
 | 
			
		||||
    "###);
 | 
			
		||||
 | 
			
		||||
    // Do TWO loop iterations. The first document should be entirely sorted
 | 
			
		||||
    search.time_budget(TimeBudget::max().with_stop_after(2));
 | 
			
		||||
 | 
			
		||||
    let result = search.execute().unwrap();
 | 
			
		||||
    snapshot!(format!("IDs: {:?}\nScores: {}\nScore Details:\n{:#?}", result.documents_ids, result.document_scores.iter().map(|scores| format!("{:.4} ", ScoreDetails::global_score(scores.iter()))).collect::<String>(), result.document_scores), @r###"
 | 
			
		||||
    IDs: [4, 0, 1, 2]
 | 
			
		||||
    Scores: 1.0000 0.6667 0.6667 0.0000 
 | 
			
		||||
    Score Details:
 | 
			
		||||
    [
 | 
			
		||||
        [
 | 
			
		||||
            Words(
 | 
			
		||||
                Words {
 | 
			
		||||
                    matching_words: 3,
 | 
			
		||||
                    max_matching_words: 3,
 | 
			
		||||
                },
 | 
			
		||||
            ),
 | 
			
		||||
            Typo(
 | 
			
		||||
                Typo {
 | 
			
		||||
                    typo_count: 0,
 | 
			
		||||
                    max_typo_count: 3,
 | 
			
		||||
                },
 | 
			
		||||
            ),
 | 
			
		||||
        ],
 | 
			
		||||
        [
 | 
			
		||||
            Words(
 | 
			
		||||
                Words {
 | 
			
		||||
                    matching_words: 3,
 | 
			
		||||
                    max_matching_words: 3,
 | 
			
		||||
                },
 | 
			
		||||
            ),
 | 
			
		||||
            Skipped,
 | 
			
		||||
        ],
 | 
			
		||||
        [
 | 
			
		||||
            Words(
 | 
			
		||||
                Words {
 | 
			
		||||
                    matching_words: 3,
 | 
			
		||||
                    max_matching_words: 3,
 | 
			
		||||
                },
 | 
			
		||||
            ),
 | 
			
		||||
            Skipped,
 | 
			
		||||
        ],
 | 
			
		||||
        [
 | 
			
		||||
            Skipped,
 | 
			
		||||
        ],
 | 
			
		||||
    ]
 | 
			
		||||
    "###);
 | 
			
		||||
 | 
			
		||||
    // Do THREE loop iterations. The second document should be entirely sorted as well
 | 
			
		||||
    search.time_budget(TimeBudget::max().with_stop_after(3));
 | 
			
		||||
 | 
			
		||||
    let result = search.execute().unwrap();
 | 
			
		||||
    snapshot!(format!("IDs: {:?}\nScores: {}\nScore Details:\n{:#?}", result.documents_ids, result.document_scores.iter().map(|scores| format!("{:.4} ", ScoreDetails::global_score(scores.iter()))).collect::<String>(), result.document_scores), @r###"
 | 
			
		||||
    IDs: [4, 1, 0, 2]
 | 
			
		||||
    Scores: 1.0000 0.9167 0.6667 0.0000 
 | 
			
		||||
    Score Details:
 | 
			
		||||
    [
 | 
			
		||||
        [
 | 
			
		||||
            Words(
 | 
			
		||||
                Words {
 | 
			
		||||
                    matching_words: 3,
 | 
			
		||||
                    max_matching_words: 3,
 | 
			
		||||
                },
 | 
			
		||||
            ),
 | 
			
		||||
            Typo(
 | 
			
		||||
                Typo {
 | 
			
		||||
                    typo_count: 0,
 | 
			
		||||
                    max_typo_count: 3,
 | 
			
		||||
                },
 | 
			
		||||
            ),
 | 
			
		||||
        ],
 | 
			
		||||
        [
 | 
			
		||||
            Words(
 | 
			
		||||
                Words {
 | 
			
		||||
                    matching_words: 3,
 | 
			
		||||
                    max_matching_words: 3,
 | 
			
		||||
                },
 | 
			
		||||
            ),
 | 
			
		||||
            Typo(
 | 
			
		||||
                Typo {
 | 
			
		||||
                    typo_count: 1,
 | 
			
		||||
                    max_typo_count: 3,
 | 
			
		||||
                },
 | 
			
		||||
            ),
 | 
			
		||||
        ],
 | 
			
		||||
        [
 | 
			
		||||
            Words(
 | 
			
		||||
                Words {
 | 
			
		||||
                    matching_words: 3,
 | 
			
		||||
                    max_matching_words: 3,
 | 
			
		||||
                },
 | 
			
		||||
            ),
 | 
			
		||||
            Skipped,
 | 
			
		||||
        ],
 | 
			
		||||
        [
 | 
			
		||||
            Skipped,
 | 
			
		||||
        ],
 | 
			
		||||
    ]
 | 
			
		||||
    "###);
 | 
			
		||||
 | 
			
		||||
    // Do FOUR loop iterations. The third document should be entirely sorted as well
 | 
			
		||||
    // The words bucket have still not progressed thus the last document doesn't have any info yet.
 | 
			
		||||
    search.time_budget(TimeBudget::max().with_stop_after(4));
 | 
			
		||||
 | 
			
		||||
    let result = search.execute().unwrap();
 | 
			
		||||
    snapshot!(format!("IDs: {:?}\nScores: {}\nScore Details:\n{:#?}", result.documents_ids, result.document_scores.iter().map(|scores| format!("{:.4} ", ScoreDetails::global_score(scores.iter()))).collect::<String>(), result.document_scores), @r###"
 | 
			
		||||
    IDs: [4, 1, 0, 2]
 | 
			
		||||
    Scores: 1.0000 0.9167 0.8333 0.0000 
 | 
			
		||||
    Score Details:
 | 
			
		||||
    [
 | 
			
		||||
        [
 | 
			
		||||
            Words(
 | 
			
		||||
                Words {
 | 
			
		||||
                    matching_words: 3,
 | 
			
		||||
                    max_matching_words: 3,
 | 
			
		||||
                },
 | 
			
		||||
            ),
 | 
			
		||||
            Typo(
 | 
			
		||||
                Typo {
 | 
			
		||||
                    typo_count: 0,
 | 
			
		||||
                    max_typo_count: 3,
 | 
			
		||||
                },
 | 
			
		||||
            ),
 | 
			
		||||
        ],
 | 
			
		||||
        [
 | 
			
		||||
            Words(
 | 
			
		||||
                Words {
 | 
			
		||||
                    matching_words: 3,
 | 
			
		||||
                    max_matching_words: 3,
 | 
			
		||||
                },
 | 
			
		||||
            ),
 | 
			
		||||
            Typo(
 | 
			
		||||
                Typo {
 | 
			
		||||
                    typo_count: 1,
 | 
			
		||||
                    max_typo_count: 3,
 | 
			
		||||
                },
 | 
			
		||||
            ),
 | 
			
		||||
        ],
 | 
			
		||||
        [
 | 
			
		||||
            Words(
 | 
			
		||||
                Words {
 | 
			
		||||
                    matching_words: 3,
 | 
			
		||||
                    max_matching_words: 3,
 | 
			
		||||
                },
 | 
			
		||||
            ),
 | 
			
		||||
            Typo(
 | 
			
		||||
                Typo {
 | 
			
		||||
                    typo_count: 2,
 | 
			
		||||
                    max_typo_count: 3,
 | 
			
		||||
                },
 | 
			
		||||
            ),
 | 
			
		||||
        ],
 | 
			
		||||
        [
 | 
			
		||||
            Skipped,
 | 
			
		||||
        ],
 | 
			
		||||
    ]
 | 
			
		||||
    "###);
 | 
			
		||||
 | 
			
		||||
    // After SIX loop iteration. The words ranking rule gave us a new bucket.
 | 
			
		||||
    // Since we reached the limit we were able to early exit without checking the typo ranking rule.
 | 
			
		||||
    search.time_budget(TimeBudget::max().with_stop_after(6));
 | 
			
		||||
 | 
			
		||||
    let result = search.execute().unwrap();
 | 
			
		||||
    snapshot!(format!("IDs: {:?}\nScores: {}\nScore Details:\n{:#?}", result.documents_ids, result.document_scores.iter().map(|scores| format!("{:.4} ", ScoreDetails::global_score(scores.iter()))).collect::<String>(), result.document_scores), @r###"
 | 
			
		||||
    IDs: [4, 1, 0, 3]
 | 
			
		||||
    Scores: 1.0000 0.9167 0.8333 0.3333 
 | 
			
		||||
    Score Details:
 | 
			
		||||
    [
 | 
			
		||||
        [
 | 
			
		||||
            Words(
 | 
			
		||||
                Words {
 | 
			
		||||
                    matching_words: 3,
 | 
			
		||||
                    max_matching_words: 3,
 | 
			
		||||
                },
 | 
			
		||||
            ),
 | 
			
		||||
            Typo(
 | 
			
		||||
                Typo {
 | 
			
		||||
                    typo_count: 0,
 | 
			
		||||
                    max_typo_count: 3,
 | 
			
		||||
                },
 | 
			
		||||
            ),
 | 
			
		||||
        ],
 | 
			
		||||
        [
 | 
			
		||||
            Words(
 | 
			
		||||
                Words {
 | 
			
		||||
                    matching_words: 3,
 | 
			
		||||
                    max_matching_words: 3,
 | 
			
		||||
                },
 | 
			
		||||
            ),
 | 
			
		||||
            Typo(
 | 
			
		||||
                Typo {
 | 
			
		||||
                    typo_count: 1,
 | 
			
		||||
                    max_typo_count: 3,
 | 
			
		||||
                },
 | 
			
		||||
            ),
 | 
			
		||||
        ],
 | 
			
		||||
        [
 | 
			
		||||
            Words(
 | 
			
		||||
                Words {
 | 
			
		||||
                    matching_words: 3,
 | 
			
		||||
                    max_matching_words: 3,
 | 
			
		||||
                },
 | 
			
		||||
            ),
 | 
			
		||||
            Typo(
 | 
			
		||||
                Typo {
 | 
			
		||||
                    typo_count: 2,
 | 
			
		||||
                    max_typo_count: 3,
 | 
			
		||||
                },
 | 
			
		||||
            ),
 | 
			
		||||
        ],
 | 
			
		||||
        [
 | 
			
		||||
            Words(
 | 
			
		||||
                Words {
 | 
			
		||||
                    matching_words: 2,
 | 
			
		||||
                    max_matching_words: 3,
 | 
			
		||||
                },
 | 
			
		||||
            ),
 | 
			
		||||
            Skipped,
 | 
			
		||||
        ],
 | 
			
		||||
    ]
 | 
			
		||||
    "###);
 | 
			
		||||
}
 | 
			
		||||
@@ -1,5 +1,6 @@
 | 
			
		||||
pub mod attribute_fid;
 | 
			
		||||
pub mod attribute_position;
 | 
			
		||||
pub mod cutoff;
 | 
			
		||||
pub mod distinct;
 | 
			
		||||
pub mod exactness;
 | 
			
		||||
pub mod geo_sort;
 | 
			
		||||
 
 | 
			
		||||
@@ -150,6 +150,7 @@ pub struct Settings<'a, 't, 'i> {
 | 
			
		||||
    pagination_max_total_hits: Setting<usize>,
 | 
			
		||||
    proximity_precision: Setting<ProximityPrecision>,
 | 
			
		||||
    embedder_settings: Setting<BTreeMap<String, Setting<EmbeddingSettings>>>,
 | 
			
		||||
    search_cutoff: Setting<u64>,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
impl<'a, 't, 'i> Settings<'a, 't, 'i> {
 | 
			
		||||
@@ -183,6 +184,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
 | 
			
		||||
            pagination_max_total_hits: Setting::NotSet,
 | 
			
		||||
            proximity_precision: Setting::NotSet,
 | 
			
		||||
            embedder_settings: Setting::NotSet,
 | 
			
		||||
            search_cutoff: Setting::NotSet,
 | 
			
		||||
            indexer_config,
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
@@ -373,6 +375,14 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
 | 
			
		||||
        self.embedder_settings = Setting::Reset;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    pub fn set_search_cutoff(&mut self, value: u64) {
 | 
			
		||||
        self.search_cutoff = Setting::Set(value);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    pub fn reset_search_cutoff(&mut self) {
 | 
			
		||||
        self.search_cutoff = Setting::Reset;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    #[tracing::instrument(
 | 
			
		||||
        level = "trace"
 | 
			
		||||
        skip(self, progress_callback, should_abort, old_fields_ids_map),
 | 
			
		||||
@@ -1026,6 +1036,24 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
 | 
			
		||||
        Ok(update)
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    fn update_search_cutoff(&mut self) -> Result<bool> {
 | 
			
		||||
        let changed = match self.search_cutoff {
 | 
			
		||||
            Setting::Set(new) => {
 | 
			
		||||
                let old = self.index.search_cutoff(self.wtxn)?;
 | 
			
		||||
                if old == Some(new) {
 | 
			
		||||
                    false
 | 
			
		||||
                } else {
 | 
			
		||||
                    self.index.put_search_cutoff(self.wtxn, new)?;
 | 
			
		||||
                    true
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
            Setting::Reset => self.index.delete_search_cutoff(self.wtxn)?,
 | 
			
		||||
            Setting::NotSet => false,
 | 
			
		||||
        };
 | 
			
		||||
 | 
			
		||||
        Ok(changed)
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    pub fn execute<FP, FA>(mut self, progress_callback: FP, should_abort: FA) -> Result<()>
 | 
			
		||||
    where
 | 
			
		||||
        FP: Fn(UpdateIndexingStep) + Sync,
 | 
			
		||||
@@ -1079,6 +1107,9 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
 | 
			
		||||
        // 3. Keep the old vectors but reattempt indexing on a prompt change: only actually changed prompt will need embedding + storage
 | 
			
		||||
        let embedding_configs_updated = self.update_embedding_configs()?;
 | 
			
		||||
 | 
			
		||||
        // never trigger re-indexing
 | 
			
		||||
        self.update_search_cutoff()?;
 | 
			
		||||
 | 
			
		||||
        if stop_words_updated
 | 
			
		||||
            || non_separator_tokens_updated
 | 
			
		||||
            || separator_tokens_updated
 | 
			
		||||
@@ -2035,6 +2066,7 @@ mod tests {
 | 
			
		||||
                    pagination_max_total_hits,
 | 
			
		||||
                    proximity_precision,
 | 
			
		||||
                    embedder_settings,
 | 
			
		||||
                    search_cutoff,
 | 
			
		||||
                } = settings;
 | 
			
		||||
                assert!(matches!(searchable_fields, Setting::NotSet));
 | 
			
		||||
                assert!(matches!(displayed_fields, Setting::NotSet));
 | 
			
		||||
@@ -2058,6 +2090,7 @@ mod tests {
 | 
			
		||||
                assert!(matches!(pagination_max_total_hits, Setting::NotSet));
 | 
			
		||||
                assert!(matches!(proximity_precision, Setting::NotSet));
 | 
			
		||||
                assert!(matches!(embedder_settings, Setting::NotSet));
 | 
			
		||||
                assert!(matches!(search_cutoff, Setting::NotSet));
 | 
			
		||||
            })
 | 
			
		||||
            .unwrap();
 | 
			
		||||
    }
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user