Expose rankingScoreThreshold in API

Add ranking_score_threshold to milli
Merge #4619
2025-11-27 08:12:36 +00:00 · 2024-05-06 15:51:57 +02:00 · 2024-05-06 15:51:57 +02:00 · 2024-05-06 09:37:32 +00:00 · 2024-05-06 11:36:37 +02:00 · 2024-05-03 12:29:31 +01:00
25 changed files with 142 additions and 35 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2169,9 +2169,9 @@ checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"

 [[package]]
 name = "grenad"
-version = "0.4.5"
+version = "0.4.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6a007932af5475ebb5c63bef8812bb1c36f317983bb4ca663e9d6dd58d6a0f8c"
+checksum = "c297f45167e6d543eb728e12ff284283e4ba2182a25c6cdcec883fda3316c7e7"
 dependencies = [
 "bytemuck",
 "byteorder",
@@ -2181,9 +2181,9 @@ dependencies = [

 [[package]]
 name = "h2"
-version = "0.3.24"
+version = "0.3.26"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bb2c4422095b67ee78da96fbb51a4cc413b3b25883c7717ff7ca1ab31022c9c9"
+checksum = "81fe527a889e1532da5c525686d96d4c2e74cdd345badf8dfef9f6b39dd5f5e8"
 dependencies = [
 "bytes",
 "fnv",
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -17,7 +17,8 @@ members = [
    "benchmarks",
    "fuzzers",
    "tracing-trace",
-    "xtask", "build-info",
+    "xtask",
+    "build-info",
 ]

 [workspace.package]
--- a/filter-parser/src/lib.rs
+++ b/filter-parser/src/lib.rs
@@ -568,7 +568,7 @@ pub mod tests {
        insta::assert_display_snapshot!(p(r"title = 'foo\\\\'"), @r#"{title} = {foo\\}"#);
        insta::assert_display_snapshot!(p(r"title = 'foo\\\\\\'"), @r#"{title} = {foo\\\}"#);
        insta::assert_display_snapshot!(p(r"title = 'foo\\\\\\\\'"), @r#"{title} = {foo\\\\}"#);
-        // but it also works with other sequencies
+        // but it also works with other sequences
        insta::assert_display_snapshot!(p(r#"title = 'foo\x20\n\t\"\'"'"#), @"{title} = {foo \n\t\"\'\"}");
    }

--- a/index-scheduler/src/batch.rs
+++ b/index-scheduler/src/batch.rs
@@ -13,7 +13,7 @@ We can combine the two tasks in a single batch:
 1. import documents X and Y

 Processing this batch is functionally equivalent to processing the two
-tasks individally, but should be much faster since we are only performing
+tasks individually, but should be much faster since we are only performing
 one indexing operation.
 */

--- a/meilisearch-types/src/deserr/mod.rs
+++ b/meilisearch-types/src/deserr/mod.rs
@@ -26,7 +26,7 @@ pub type DeserrQueryParamError<C = BadRequest> = DeserrError<DeserrQueryParam, C

 /// A request deserialization error.
 ///
-/// The first generic paramater is a marker type describing the format of the request: either json (e.g. [`DeserrJson`] or [`DeserrQueryParam`]).
+/// The first generic parameter is a marker type describing the format of the request: either json (e.g. [`DeserrJson`] or [`DeserrQueryParam`]).
 /// The second generic parameter is the default error code for the deserialization error, in case it is not given.
 pub struct DeserrError<Format, C: Default + ErrorCode> {
    pub msg: String,
@@ -189,3 +189,4 @@ merge_with_error_impl_take_error_message!(ParseTaskKindError);
 merge_with_error_impl_take_error_message!(ParseTaskStatusError);
 merge_with_error_impl_take_error_message!(IndexUidFormatError);
 merge_with_error_impl_take_error_message!(InvalidSearchSemanticRatio);
+merge_with_error_impl_take_error_message!(InvalidSearchRankingScoreThreshold);
--- a/meilisearch-types/src/error.rs
+++ b/meilisearch-types/src/error.rs
@@ -240,6 +240,7 @@ InvalidSearchAttributesToSearchOn     , InvalidRequest       , BAD_REQUEST ;
 InvalidSearchAttributesToCrop         , InvalidRequest       , BAD_REQUEST ;
 InvalidSearchAttributesToHighlight    , InvalidRequest       , BAD_REQUEST ;
 InvalidSearchAttributesToRetrieve     , InvalidRequest       , BAD_REQUEST ;
+InvalidSearchRankingScoreThreshold    , InvalidRequest       , BAD_REQUEST ;
 InvalidSearchCropLength               , InvalidRequest       , BAD_REQUEST ;
 InvalidSearchCropMarker               , InvalidRequest       , BAD_REQUEST ;
 InvalidSearchFacets                   , InvalidRequest       , BAD_REQUEST ;
@@ -488,6 +489,15 @@ impl fmt::Display for deserr_codes::InvalidSearchSemanticRatio {
    }
 }

+impl fmt::Display for deserr_codes::InvalidSearchRankingScoreThreshold {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(
+            f,
+            "the value of `rankingScoreThreshold` is invalid, expected a float between `0.0` and `1.0`."
+        )
+    }
+}
+
 #[macro_export]
 macro_rules! internal_error {
    ($target:ty : $($other:path), *) => {
--- a/meilisearch/src/analytics/segment_analytics.rs
+++ b/meilisearch/src/analytics/segment_analytics.rs
@@ -672,6 +672,7 @@ impl SearchAggregator {
            matching_strategy,
            attributes_to_search_on,
            hybrid,
+            ranking_score_threshold,
        } = query;

        let mut ret = Self::default();
@@ -1083,6 +1084,7 @@ impl MultiSearchAggregator {
                    matching_strategy: _,
                    attributes_to_search_on: _,
                    hybrid: _,
+                    ranking_score_threshold: _,
                } = query;

                index_uid.as_str()
@@ -1230,6 +1232,7 @@ impl FacetSearchAggregator {
            matching_strategy,
            attributes_to_search_on,
            hybrid,
+            ranking_score_threshold,
        } = query;

        let mut ret = Self::default();
--- a/meilisearch/src/middleware.rs
+++ b/meilisearch/src/middleware.rs
@@ -59,10 +59,12 @@ where
            let request_path = req.path();
            let is_registered_resource = req.resource_map().has_resource(request_path);
            if is_registered_resource {
+                let request_pattern = req.match_pattern();
+                let metric_path = request_pattern.as_ref().map_or(request_path, String::as_str);
                let request_method = req.method().to_string();
                histogram_timer = Some(
                    crate::metrics::MEILISEARCH_HTTP_RESPONSE_TIME_SECONDS
-                        .with_label_values(&[&request_method, request_path])
+                        .with_label_values(&[&request_method, metric_path])
                        .start_timer(),
                );
            }
--- a/meilisearch/src/routes/indexes/facet_search.rs
+++ b/meilisearch/src/routes/indexes/facet_search.rs
@@ -14,9 +14,7 @@ use crate::extractors::authentication::policies::*;
 use crate::extractors::authentication::GuardedData;
 use crate::routes::indexes::search::search_kind;
 use crate::search::{
-    add_search_rules, perform_facet_search, HybridQuery, MatchingStrategy, SearchQuery,
-    DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG,
-    DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET,
+    add_search_rules, perform_facet_search, HybridQuery, MatchingStrategy, RankingScoreThreshold, SearchQuery, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET
 };
 use crate::search_queue::SearchQueue;

@@ -46,6 +44,8 @@ pub struct FacetSearchQuery {
    pub matching_strategy: MatchingStrategy,
    #[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToSearchOn>, default)]
    pub attributes_to_search_on: Option<Vec<String>>,
+    #[deserr(default, error = DeserrJsonError<InvalidSearchRankingScoreThreshold>, default)]
+    pub ranking_score_threshold: Option<RankingScoreThreshold>,
 }

 pub async fn search(
@@ -103,6 +103,7 @@ impl From<FacetSearchQuery> for SearchQuery {
            matching_strategy,
            attributes_to_search_on,
            hybrid,
+            ranking_score_threshold,
        } = value;

        SearchQuery {
@@ -128,6 +129,7 @@ impl From<FacetSearchQuery> for SearchQuery {
            vector,
            attributes_to_search_on,
            hybrid,
+            ranking_score_threshold,
        }
    }
 }
--- a/meilisearch/src/routes/indexes/search.rs
+++ b/meilisearch/src/routes/indexes/search.rs
@@ -19,9 +19,10 @@ use crate::extractors::authentication::GuardedData;
 use crate::extractors::sequential_extractor::SeqHandler;
 use crate::metrics::MEILISEARCH_DEGRADED_SEARCH_REQUESTS;
 use crate::search::{
-    add_search_rules, perform_search, HybridQuery, MatchingStrategy, SearchKind, SearchQuery,
-    SemanticRatio, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG,
-    DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET, DEFAULT_SEMANTIC_RATIO,
+    add_search_rules, perform_search, HybridQuery, MatchingStrategy, RankingScoreThreshold,
+    SearchKind, SearchQuery, SemanticRatio, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
+    DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT,
+    DEFAULT_SEARCH_OFFSET, DEFAULT_SEMANTIC_RATIO,
 };
 use crate::search_queue::SearchQueue;

@@ -82,6 +83,21 @@ pub struct SearchQueryGet {
    pub hybrid_embedder: Option<String>,
    #[deserr(default, error = DeserrQueryParamError<InvalidSearchSemanticRatio>)]
    pub hybrid_semantic_ratio: Option<SemanticRatioGet>,
+    #[deserr(default, error = DeserrQueryParamError<InvalidSearchRankingScoreThreshold>, default)]
+    pub ranking_score_threshold: Option<RankingScoreThresholdGet>,
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, deserr::Deserr)]
+#[deserr(try_from(String) = TryFrom::try_from -> InvalidSearchRankingScoreThreshold)]
+pub struct RankingScoreThresholdGet(RankingScoreThreshold);
+
+impl std::convert::TryFrom<String> for RankingScoreThresholdGet {
+    type Error = InvalidSearchRankingScoreThreshold;
+
+    fn try_from(s: String) -> Result<Self, Self::Error> {
+        let f: f64 = s.parse().map_err(|_| InvalidSearchRankingScoreThreshold)?;
+        Ok(RankingScoreThresholdGet(RankingScoreThreshold::try_from(f)?))
+    }
 }

 #[derive(Debug, Clone, Copy, Default, PartialEq, deserr::Deserr)]
@@ -152,6 +168,7 @@ impl From<SearchQueryGet> for SearchQuery {
            matching_strategy: other.matching_strategy,
            attributes_to_search_on: other.attributes_to_search_on.map(|o| o.into_iter().collect()),
            hybrid,
+            ranking_score_threshold: other.ranking_score_threshold.map(|o| o.0),
        }
    }
 }
--- a/meilisearch/src/routes/mod.rs
+++ b/meilisearch/src/routes/mod.rs
@@ -376,12 +376,6 @@ async fn get_version(
    })
 }

-#[derive(Serialize)]
-struct KeysResponse {
-    private: Option<String>,
-    public: Option<String>,
-}
-
 pub async fn get_health(
    req: HttpRequest,
    index_scheduler: Data<IndexScheduler>,
--- a/meilisearch/src/search.rs
+++ b/meilisearch/src/search.rs
@@ -86,6 +86,26 @@ pub struct SearchQuery {
    pub matching_strategy: MatchingStrategy,
    #[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToSearchOn>, default)]
    pub attributes_to_search_on: Option<Vec<String>>,
+    #[deserr(default, error = DeserrJsonError<InvalidSearchRankingScoreThreshold>, default)]
+    pub ranking_score_threshold: Option<RankingScoreThreshold>,
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Deserr)]
+#[deserr(try_from(f64) = TryFrom::try_from -> InvalidSearchRankingScoreThreshold)]
+pub struct RankingScoreThreshold(f64);
+
+impl std::convert::TryFrom<f64> for RankingScoreThreshold {
+    type Error = InvalidSearchRankingScoreThreshold;
+
+    fn try_from(f: f64) -> Result<Self, Self::Error> {
+        // the suggested "fix" is: `!(0.0..=1.0).contains(&f)`` which is allegedly less readable
+        #[allow(clippy::manual_range_contains)]
+        if f > 1.0 || f < 0.0 {
+            Err(InvalidSearchRankingScoreThreshold)
+        } else {
+            Ok(RankingScoreThreshold(f))
+        }
+    }
 }

 #[derive(Debug, Clone, Default, PartialEq, Deserr)]
@@ -251,6 +271,8 @@ pub struct SearchQueryWithIndex {
    pub matching_strategy: MatchingStrategy,
    #[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToSearchOn>, default)]
    pub attributes_to_search_on: Option<Vec<String>>,
+    #[deserr(default, error = DeserrJsonError<InvalidSearchRankingScoreThreshold>, default)]
+    pub ranking_score_threshold: Option<RankingScoreThreshold>,
 }

 impl SearchQueryWithIndex {
@@ -279,6 +301,7 @@ impl SearchQueryWithIndex {
            matching_strategy,
            attributes_to_search_on,
            hybrid,
+            ranking_score_threshold,
        } = self;
        (
            index_uid,
@@ -305,6 +328,7 @@ impl SearchQueryWithIndex {
                matching_strategy,
                attributes_to_search_on,
                hybrid,
+                ranking_score_threshold,
                // do not use ..Default::default() here,
                // rather add any missing field from `SearchQuery` to `SearchQueryWithIndex`
            },
@@ -453,6 +477,7 @@ fn prepare_search<'t>(
 ) -> Result<(milli::Search<'t>, bool, usize, usize), MeilisearchHttpError> {
    let mut search = index.search(rtxn);
    search.time_budget(time_budget);
+    search.ranking_score_threshold(query.ranking_score_threshold.map(|rst| rst.0));

    match search_kind {
        SearchKind::KeywordOnly => {
@@ -494,11 +519,16 @@ fn prepare_search<'t>(
        .unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS);

    search.exhaustive_number_hits(is_finite_pagination);
-    search.scoring_strategy(if query.show_ranking_score || query.show_ranking_score_details {
-        ScoringStrategy::Detailed
-    } else {
-        ScoringStrategy::Skip
-    });
+    search.scoring_strategy(
+        if query.show_ranking_score
+            || query.show_ranking_score_details
+            || query.ranking_score_threshold.is_some()
+        {
+            ScoringStrategy::Detailed
+        } else {
+            ScoringStrategy::Skip
+        },
+    );

    // compute the offset on the limit depending on the pagination mode.
    let (offset, limit) = if is_finite_pagination {
--- a/meilitool/src/main.rs
+++ b/meilitool/src/main.rs
@@ -129,7 +129,7 @@ fn clear_task_queue(db_path: PathBuf) -> anyhow::Result<()> {
        }
    }

-    eprintln!("Sucessfully deleted {count} content files from disk!");
+    eprintln!("Successfully deleted {count} content files from disk!");

    Ok(())
 }
--- a/milli/Cargo.toml
+++ b/milli/Cargo.toml
@@ -26,7 +26,7 @@ flatten-serde-json = { path = "../flatten-serde-json" }
 fst = "0.4.7"
 fxhash = "0.2.1"
 geoutils = "0.5.1"
-grenad = { version = "0.4.5", default-features = false, features = [
+grenad = { version = "0.4.6", default-features = false, features = [
    "rayon",
    "tempfile",
 ] }
--- a/milli/examples/search.rs
+++ b/milli/examples/search.rs
@@ -66,6 +66,7 @@ fn main() -> Result<(), Box<dyn Error>> {
                &mut DefaultSearchLogger,
                logger,
                TimeBudget::max(),
+                None,
            )?;
            if let Some((logger, dir)) = detailed_logger {
                logger.finish(&mut ctx, Path::new(dir))?;
--- a/milli/src/documents/builder.rs
+++ b/milli/src/documents/builder.rs
@@ -203,7 +203,7 @@ fn parse_csv_header(header: &str) -> (&str, AllowedType) {
            "string" => (field_name, AllowedType::String),
            "boolean" => (field_name, AllowedType::Boolean),
            "number" => (field_name, AllowedType::Number),
-            // if the pattern isn't reconized, we keep the whole field.
+            // if the pattern isn't recognized, we keep the whole field.
            _otherwise => (header, AllowedType::String),
        },
        None => (header, AllowedType::String),
--- a/milli/src/search/hybrid.rs
+++ b/milli/src/search/hybrid.rs
@@ -169,6 +169,7 @@ impl<'a> Search<'a> {
            index: self.index,
            semantic: self.semantic.clone(),
            time_budget: self.time_budget.clone(),
+            ranking_score_threshold: self.ranking_score_threshold,
        };

        let semantic = search.semantic.take();
--- a/milli/src/search/mod.rs
+++ b/milli/src/search/mod.rs
@@ -49,6 +49,7 @@ pub struct Search<'a> {
    index: &'a Index,
    semantic: Option<SemanticSearch>,
    time_budget: TimeBudget,
+    ranking_score_threshold: Option<f64>,
 }

 impl<'a> Search<'a> {
@@ -69,6 +70,7 @@ impl<'a> Search<'a> {
            index,
            semantic: None,
            time_budget: TimeBudget::max(),
+            ranking_score_threshold: None,
        }
    }

@@ -145,6 +147,14 @@ impl<'a> Search<'a> {
        self
    }

+    pub fn ranking_score_threshold(
+        &mut self,
+        ranking_score_threshold: Option<f64>,
+    ) -> &mut Search<'a> {
+        self.ranking_score_threshold = ranking_score_threshold;
+        self
+    }
+
    pub fn execute_for_candidates(&self, has_vector_search: bool) -> Result<RoaringBitmap> {
        if has_vector_search {
            let ctx = SearchContext::new(self.index, self.rtxn);
@@ -183,6 +193,7 @@ impl<'a> Search<'a> {
                    embedder_name,
                    embedder,
                    self.time_budget.clone(),
+                    self.ranking_score_threshold,
                )?
            }
            _ => execute_search(
@@ -200,6 +211,7 @@ impl<'a> Search<'a> {
                &mut DefaultSearchLogger,
                &mut DefaultSearchLogger,
                self.time_budget.clone(),
+                self.ranking_score_threshold,
            )?,
        };

@@ -238,6 +250,7 @@ impl fmt::Debug for Search<'_> {
            index: _,
            semantic,
            time_budget,
+            ranking_score_threshold,
        } = self;
        f.debug_struct("Search")
            .field("query", query)
@@ -256,6 +269,7 @@ impl fmt::Debug for Search<'_> {
                &semantic.as_ref().map(|semantic| &semantic.embedder_name),
            )
            .field("time_budget", time_budget)
+            .field("ranking_score_threshold", ranking_score_threshold)
            .finish()
    }
 }
--- a/milli/src/search/new/bucket_sort.rs
+++ b/milli/src/search/new/bucket_sort.rs
@@ -28,6 +28,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
    scoring_strategy: ScoringStrategy,
    logger: &mut dyn SearchLogger<Q>,
    time_budget: TimeBudget,
+    ranking_score_threshold: Option<f64>,
 ) -> Result<BucketSortOutput> {
    logger.initial_query(query);
    logger.ranking_rules(&ranking_rules);
@@ -144,6 +145,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
                ctx,
                from,
                length,
+                ranking_score_threshold,
                logger,
                &mut valid_docids,
                &mut valid_scores,
@@ -164,7 +166,9 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
            loop {
                let bucket = std::mem::take(&mut ranking_rule_universes[cur_ranking_rule_index]);
                ranking_rule_scores.push(ScoreDetails::Skipped);
+
                maybe_add_to_results!(bucket);
+
                ranking_rule_scores.pop();

                if cur_ranking_rule_index == 0 {
@@ -220,6 +224,17 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
        debug_assert!(
            ranking_rule_universes[cur_ranking_rule_index].is_superset(&next_bucket.candidates)
        );
+
+        if let Some(ranking_score_threshold) = ranking_score_threshold {
+            let current_score = ScoreDetails::global_score(ranking_rule_scores.iter());
+            if current_score < ranking_score_threshold {
+                all_candidates -=
+                    next_bucket.candidates | &ranking_rule_universes[cur_ranking_rule_index];
+                back!();
+                continue;
+            }
+        }
+
        ranking_rule_universes[cur_ranking_rule_index] -= &next_bucket.candidates;

        if cur_ranking_rule_index == ranking_rules_len - 1
@@ -262,6 +277,7 @@ fn maybe_add_to_results<'ctx, Q: RankingRuleQueryTrait>(
    ctx: &mut SearchContext<'ctx>,
    from: usize,
    length: usize,
+    ranking_score_threshold: Option<f64>,
    logger: &mut dyn SearchLogger<Q>,

    valid_docids: &mut Vec<u32>,
@@ -279,6 +295,15 @@ fn maybe_add_to_results<'ctx, Q: RankingRuleQueryTrait>(
    ranking_rule_scores: &[ScoreDetails],
    candidates: RoaringBitmap,
 ) -> Result<()> {
+    // remove candidates from the universe without adding them to result if their score is below the threshold
+    if let Some(ranking_score_threshold) = ranking_score_threshold {
+        let score = ScoreDetails::global_score(ranking_rule_scores.iter());
+        if score < ranking_score_threshold {
+            *all_candidates -= candidates | &ranking_rule_universes[cur_ranking_rule_index];
+            return Ok(());
+        }
+    }
+
    // First apply the distinct rule on the candidates, reducing the universes if necessary
    let candidates = if let Some(distinct_fid) = distinct_fid {
        let DistinctOutput { remaining, excluded } =
--- a/milli/src/search/new/geo_sort.rs
+++ b/milli/src/search/new/geo_sort.rs
@@ -42,7 +42,7 @@ fn facet_number_values<'a>(
 }

 /// Define the strategy used by the geo sort.
-/// The paramater represents the cache size, and, in the case of the Dynamic strategy,
+/// The parameter represents the cache size, and, in the case of the Dynamic strategy,
 /// the point where we move from using the iterative strategy to the rtree.
 #[derive(Debug, Clone, Copy)]
 pub enum Strategy {
--- a/milli/src/search/new/matches/mod.rs
+++ b/milli/src/search/new/matches/mod.rs
@@ -134,7 +134,7 @@ impl<'t> Matcher<'t, '_> {
            for (token_position, word_position, word) in words_positions {
                partial = match partial.match_token(word) {
                    // token matches the partial match, but the match is not full,
-                    // we temporarly save the current token then we try to match the next one.
+                    // we temporarily save the current token then we try to match the next one.
                    Some(MatchType::Partial(partial)) => {
                        potential_matches.push((token_position, word_position, partial.char_len()));
                        partial
@@ -523,6 +523,7 @@ mod tests {
                &mut crate::DefaultSearchLogger,
                &mut crate::DefaultSearchLogger,
                TimeBudget::max(),
+                None,
            )
            .unwrap();

@@ -722,7 +723,7 @@ mod tests {
            @"…void void void void void split the world void void"
        );

-        // Text containing matches with diferent density.
+        // Text containing matches with different density.
        let text = "split void the void void world void void void void void void void void void void split the world void void";
        let mut matcher = builder.build(text);
        // crop should return 10 last words with a marker at the start.
--- a/milli/src/search/new/mod.rs
+++ b/milli/src/search/new/mod.rs
@@ -551,6 +551,7 @@ pub fn execute_vector_search(
    embedder_name: &str,
    embedder: &Embedder,
    time_budget: TimeBudget,
+    ranking_score_threshold: Option<f64>,
 ) -> Result<PartialSearchResult> {
    check_sort_criteria(ctx, sort_criteria.as_ref())?;

@@ -580,6 +581,7 @@ pub fn execute_vector_search(
        scoring_strategy,
        placeholder_search_logger,
        time_budget,
+        ranking_score_threshold,
    )?;

    Ok(PartialSearchResult {
@@ -609,6 +611,7 @@ pub fn execute_search(
    placeholder_search_logger: &mut dyn SearchLogger<PlaceholderQuery>,
    query_graph_logger: &mut dyn SearchLogger<QueryGraph>,
    time_budget: TimeBudget,
+    ranking_score_threshold: Option<f64>,
 ) -> Result<PartialSearchResult> {
    check_sort_criteria(ctx, sort_criteria.as_ref())?;

@@ -697,6 +700,7 @@ pub fn execute_search(
            scoring_strategy,
            query_graph_logger,
            time_budget,
+            ranking_score_threshold,
        )?
    } else {
        let ranking_rules =
@@ -711,6 +715,7 @@ pub fn execute_search(
            scoring_strategy,
            placeholder_search_logger,
            time_budget,
+            ranking_score_threshold,
        )?
    };

--- a/milli/src/search/new/query_term/parse_query.rs
+++ b/milli/src/search/new/query_term/parse_query.rs
@@ -119,7 +119,7 @@ pub fn located_query_terms_from_tokens(
                            if let Some(located_query_term) = phrase.build(ctx) {
                                // as we are evaluating a negative operator we put the phrase
                                // in the negative one *but* we don't reset the negative operator
-                                // as we are immediatly starting a new negative phrase.
+                                // as we are immediately starting a new negative phrase.
                                if negative_phrase {
                                    negative_phrases.push(located_query_term);
                                } else {
--- a/milli/src/update/facet/incremental.rs
+++ b/milli/src/update/facet/incremental.rs
@@ -499,7 +499,7 @@ impl FacetsUpdateIncrementalInner {
                    ModificationResult::Expand | ModificationResult::Reduce { .. }
                )
            {
-                // if any modification occured, insert it in the database.
+                // if any modification occurred, insert it in the database.
                self.db.put(txn, &insertion_key.as_ref(), &updated_value)?;
                Ok(insertion_key_modification)
            } else {
--- a/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs
+++ b/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs
@@ -36,7 +36,7 @@ pub struct ExtractedFacetValues {

 /// Extracts the facet values of each faceted field of each document.
 ///
-/// Returns the generated grenad reader containing the docid the fid and the orginal value as key
+/// Returns the generated grenad reader containing the docid the fid and the original value as key
 /// and the normalized value as value extracted from the given chunk of documents.
 /// We need the fid of the geofields to correctly parse them as numbers if they were sent as strings initially.
 #[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")]
Author	SHA1	Message	Date
Louis Dureuil	413f86fa3d	Expose rankingScoreThreshold in API	2024-05-06 15:51:57 +02:00
Louis Dureuil	faf7696a0c	Add ranking_score_threshold to milli	2024-05-06 15:51:57 +02:00
meili-bors[bot]	ecb5c506b3	Merge #4619 4619: Use http path pattern instead of full path in metrics r=irevoire a=gh2k # Pull Request ## Related issue Fixes #3983 ## What does this PR do? - This records only the HTTP pattern in metrics instead of the full path An alternative solution was proposed in #4145, but this doesn't really fix the root cause of the issue. The problem I'm experiencing at my end is that by using the full path, the number of labels is far too high to be useful. It is normal practice to use the path with variable placeholders, instead of the fully-expanded path. The example given in the ticket was endpoints under `/tasks`, but this can also be a very significant problem under `/indexes/{index-uid}/documents`. e.g.: <img width="1510" alt="Screenshot 2024-05-03 at 12 14 36" src="https://github.com/meilisearch/meilisearch/assets/6530014/1df2ec19-5f69-4164-90d2-f65c59f9b544"> This patch replaces the fully-expanded path with the matched pattern. The linked PR also mentions paths under other routes, e.g. `/static`, but this feels like a separate concern and these can be stripped out at the Prometheus end by filters if they are unwanted. The most important thing is to make the paths usable so that we can still get stats on e.g. the number of document deletes we see. ## PR checklist Please check if your PR fulfills the following requirements: - [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)? - [x] Have you read the contributing guidelines? - [x] Have you made sure that the title is accurate and descriptive of the changes? Thank you so much for contributing to Meilisearch! Co-authored-by: Simon Detheridge <s@sd.ai> Co-authored-by: Tamo <tamo@meilisearch.com>	2024-05-06 09:37:32 +00:00
Tamo	3698aef66b	fix warning	2024-05-06 11:36:37 +02:00
Simon Detheridge	7f5ab3cef5	Use http path pattern instead of full path in metrics	2024-05-03 12:29:31 +01:00
meili-bors[bot]	248e22005a	Merge #4582 4582: Fix some typos in comments r=curquiza a=writegr # Pull Request ## Related issue No ## What does this PR do? fix some typos in comments ## PR checklist Please check if your PR fulfills the following requirements: - [ ] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)? - [ ] Have you read the contributing guidelines? - [ ] Have you made sure that the title is accurate and descriptive of the changes? Thank you so much for contributing to Meilisearch! Co-authored-by: writegr <wellweek@outlook.com>	2024-04-18 07:07:33 +00:00
writegr	ab43a8a949	chore: fix some typos in comments Signed-off-by: writegr <wellweek@outlook.com>	2024-04-18 14:12:52 +08:00
meili-bors[bot]	4089dd04a5	Merge #4568 4568: Fix some typos in comments r=curquiza a=yudrywet # Pull Request ## Related issue No ## What does this PR do? fix some typos in comments ## PR checklist Please check if your PR fulfills the following requirements: - [ ] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)? - [ ] Have you read the contributing guidelines? - [ ] Have you made sure that the title is accurate and descriptive of the changes? Thank you so much for contributing to Meilisearch! Co-authored-by: yudrywet <yudeyao@yeah.net>	2024-04-15 08:12:43 +00:00
yudrywet	cf864a1c2e	chore: fix some typos in comments Signed-off-by: yudrywet <yudeyao@yeah.net>	2024-04-14 20:11:34 +08:00
meili-bors[bot]	0661c86f16	Merge #4566 4566: Bring back changes from v1.7.6 to main r=irevoire a=dureuill Co-authored-by: Louis Dureuil <louis@meilisearch.com> Co-authored-by: dureuill <dureuill@users.noreply.github.com>	2024-04-11 19:32:29 +00:00
dureuill	a6c02f7684	Update version for the next release (v1.7.6) in Cargo.toml	2024-04-11 21:08:57 +02:00
Louis Dureuil	89e72fab32	Update grenad to fix rare DB corruption	2024-04-11 21:06:59 +02:00
meili-bors[bot]	171b41be24	Merge #4560 4560: Bring back change from v1.7.5 to main r=curquiza a=irevoire Co-authored-by: Tamo <tamo@meilisearch.com> Co-authored-by: irevoire <irevoire@users.noreply.github.com> Co-authored-by: meili-bors[bot] <89034592+meili-bors[bot]@users.noreply.github.com>	2024-04-09 16:58:30 +00:00
Tamo	c26d356a35	Merge branch 'main' into release-v1.7.5-tmp	2024-04-09 14:46:15 +02:00
meili-bors[bot]	217fbc777f	Merge #4554 4554: Update version for the next release (v1.7.5) in Cargo.toml r=curquiza a=meili-bot ⚠️ This PR is automatically generated. Check the new version is the expected one and Cargo.lock has been updated before merging. Co-authored-by: irevoire <irevoire@users.noreply.github.com>	2024-04-04 18:03:04 +00:00
meili-bors[bot]	c2c73c1f25	Merge #4553 4553: update h2 r=curquiza a=irevoire # Pull Request ## Related issue Fixes https://github.com/meilisearch/meilisearch/issues/4551 Co-authored-by: Tamo <tamo@meilisearch.com>	2024-04-04 17:23:00 +00:00
irevoire	7a49a056fa	Update version for the next release (v1.7.5) in Cargo.toml	2024-04-04 16:33:45 +00:00
Tamo	fd4be26718	update h2	2024-04-04 18:27:16 +02:00