mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-26 05:26:27 +00:00 
			
		
		
		
	Merge #4466
4466: Implements the search cutoff r=irevoire a=irevoire # Pull Request ## Related issue Fixes https://github.com/meilisearch/meilisearch/issues/4488 ## What does this PR do? - Adds a cutoff to the bucket sort after 150ms has been spent - Adds a new setting to customize the default value of 150ms - When the time is exceeded, we exit early with what we had the time to sort - If the cutoff has been reached, the search details are updated with a new `Skip` ranking details for the ranking rules that were skipped - Adds analytics to measure the total number of degraded search requests - Adds the number of degraded search requests to the Prometheus metrics and Grafana dashboard - The cutoff **must not** skip the filters; otherwise, we would leak documents to people who don’t have the right to see them Co-authored-by: Tamo <tamo@meilisearch.com> Co-authored-by: Louis Dureuil <louis@meilisearch.com>
This commit is contained in:
		| @@ -238,6 +238,70 @@ | |||||||
|       "title": "Total Searches (1h)", |       "title": "Total Searches (1h)", | ||||||
|       "type": "gauge" |       "type": "gauge" | ||||||
|     }, |     }, | ||||||
|  |     { | ||||||
|  |       "datasource": { | ||||||
|  |         "type": "prometheus" | ||||||
|  |       }, | ||||||
|  |       "fieldConfig": { | ||||||
|  |         "defaults": { | ||||||
|  |           "color": { | ||||||
|  |             "mode": "thresholds" | ||||||
|  |           }, | ||||||
|  |           "mappings": [], | ||||||
|  |           "thresholds": { | ||||||
|  |             "mode": "absolute", | ||||||
|  |             "steps": [ | ||||||
|  |               { | ||||||
|  |                 "color": "green", | ||||||
|  |                 "value": null | ||||||
|  |               }, | ||||||
|  |               { | ||||||
|  |                 "color": "red", | ||||||
|  |                 "value": 80 | ||||||
|  |               } | ||||||
|  |             ] | ||||||
|  |           } | ||||||
|  |         }, | ||||||
|  |         "overrides": [] | ||||||
|  |       }, | ||||||
|  |       "gridPos": { | ||||||
|  |         "h": 6, | ||||||
|  |         "w": 4, | ||||||
|  |         "x": 8, | ||||||
|  |         "y": 1 | ||||||
|  |       }, | ||||||
|  |       "id": 26, | ||||||
|  |       "options": { | ||||||
|  |         "orientation": "auto", | ||||||
|  |         "reduceOptions": { | ||||||
|  |           "calcs": [ | ||||||
|  |             "lastNotNull" | ||||||
|  |           ], | ||||||
|  |           "fields": "", | ||||||
|  |           "values": false | ||||||
|  |         }, | ||||||
|  |         "showThresholdLabels": false, | ||||||
|  |         "showThresholdMarkers": true, | ||||||
|  |         "text": {} | ||||||
|  |       }, | ||||||
|  |       "pluginVersion": "9.5.2", | ||||||
|  |       "targets": [ | ||||||
|  |         { | ||||||
|  |           "datasource": { | ||||||
|  |             "type": "prometheus" | ||||||
|  |           }, | ||||||
|  |           "editorMode": "builder", | ||||||
|  |           "exemplar": true, | ||||||
|  |           "expr": "round(increase(meilisearch_degraded_search_requests{job=\"$job\"}[1h]))", | ||||||
|  |           "interval": "", | ||||||
|  |           "legendFormat": "", | ||||||
|  |           "range": true, | ||||||
|  |           "refId": "A" | ||||||
|  |         } | ||||||
|  |       ], | ||||||
|  |       "title": "Total Degraded Searches (1h)", | ||||||
|  |       "type": "gauge" | ||||||
|  |     }, | ||||||
|     { |     { | ||||||
|       "datasource": { |       "datasource": { | ||||||
|         "type": "prometheus" |         "type": "prometheus" | ||||||
|   | |||||||
| @@ -277,6 +277,7 @@ pub(crate) mod test { | |||||||
|             }), |             }), | ||||||
|             pagination: Setting::NotSet, |             pagination: Setting::NotSet, | ||||||
|             embedders: Setting::NotSet, |             embedders: Setting::NotSet, | ||||||
|  |             search_cutoff_ms: Setting::NotSet, | ||||||
|             _kind: std::marker::PhantomData, |             _kind: std::marker::PhantomData, | ||||||
|         }; |         }; | ||||||
|         settings.check() |         settings.check() | ||||||
|   | |||||||
| @@ -379,6 +379,7 @@ impl<T> From<v5::Settings<T>> for v6::Settings<v6::Unchecked> { | |||||||
|                 v5::Setting::NotSet => v6::Setting::NotSet, |                 v5::Setting::NotSet => v6::Setting::NotSet, | ||||||
|             }, |             }, | ||||||
|             embedders: v6::Setting::NotSet, |             embedders: v6::Setting::NotSet, | ||||||
|  |             search_cutoff_ms: v6::Setting::NotSet, | ||||||
|             _kind: std::marker::PhantomData, |             _kind: std::marker::PhantomData, | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|   | |||||||
| @@ -259,6 +259,7 @@ InvalidSettingsProximityPrecision     , InvalidRequest       , BAD_REQUEST ; | |||||||
| InvalidSettingsFaceting               , InvalidRequest       , BAD_REQUEST ; | InvalidSettingsFaceting               , InvalidRequest       , BAD_REQUEST ; | ||||||
| InvalidSettingsFilterableAttributes   , InvalidRequest       , BAD_REQUEST ; | InvalidSettingsFilterableAttributes   , InvalidRequest       , BAD_REQUEST ; | ||||||
| InvalidSettingsPagination             , InvalidRequest       , BAD_REQUEST ; | InvalidSettingsPagination             , InvalidRequest       , BAD_REQUEST ; | ||||||
|  | InvalidSettingsSearchCutoffMs           , InvalidRequest       , BAD_REQUEST ; | ||||||
| InvalidSettingsEmbedders              , InvalidRequest       , BAD_REQUEST ; | InvalidSettingsEmbedders              , InvalidRequest       , BAD_REQUEST ; | ||||||
| InvalidSettingsRankingRules           , InvalidRequest       , BAD_REQUEST ; | InvalidSettingsRankingRules           , InvalidRequest       , BAD_REQUEST ; | ||||||
| InvalidSettingsSearchableAttributes   , InvalidRequest       , BAD_REQUEST ; | InvalidSettingsSearchableAttributes   , InvalidRequest       , BAD_REQUEST ; | ||||||
|   | |||||||
| @@ -202,6 +202,9 @@ pub struct Settings<T> { | |||||||
|     #[serde(default, skip_serializing_if = "Setting::is_not_set")] |     #[serde(default, skip_serializing_if = "Setting::is_not_set")] | ||||||
|     #[deserr(default, error = DeserrJsonError<InvalidSettingsEmbedders>)] |     #[deserr(default, error = DeserrJsonError<InvalidSettingsEmbedders>)] | ||||||
|     pub embedders: Setting<BTreeMap<String, Setting<milli::vector::settings::EmbeddingSettings>>>, |     pub embedders: Setting<BTreeMap<String, Setting<milli::vector::settings::EmbeddingSettings>>>, | ||||||
|  |     #[serde(default, skip_serializing_if = "Setting::is_not_set")] | ||||||
|  |     #[deserr(default, error = DeserrJsonError<InvalidSettingsSearchCutoffMs>)] | ||||||
|  |     pub search_cutoff_ms: Setting<u64>, | ||||||
|  |  | ||||||
|     #[serde(skip)] |     #[serde(skip)] | ||||||
|     #[deserr(skip)] |     #[deserr(skip)] | ||||||
| @@ -227,6 +230,7 @@ impl Settings<Checked> { | |||||||
|             faceting: Setting::Reset, |             faceting: Setting::Reset, | ||||||
|             pagination: Setting::Reset, |             pagination: Setting::Reset, | ||||||
|             embedders: Setting::Reset, |             embedders: Setting::Reset, | ||||||
|  |             search_cutoff_ms: Setting::Reset, | ||||||
|             _kind: PhantomData, |             _kind: PhantomData, | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| @@ -249,6 +253,7 @@ impl Settings<Checked> { | |||||||
|             faceting, |             faceting, | ||||||
|             pagination, |             pagination, | ||||||
|             embedders, |             embedders, | ||||||
|  |             search_cutoff_ms, | ||||||
|             .. |             .. | ||||||
|         } = self; |         } = self; | ||||||
|  |  | ||||||
| @@ -269,6 +274,7 @@ impl Settings<Checked> { | |||||||
|             faceting, |             faceting, | ||||||
|             pagination, |             pagination, | ||||||
|             embedders, |             embedders, | ||||||
|  |             search_cutoff_ms, | ||||||
|             _kind: PhantomData, |             _kind: PhantomData, | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| @@ -315,6 +321,7 @@ impl Settings<Unchecked> { | |||||||
|             faceting: self.faceting, |             faceting: self.faceting, | ||||||
|             pagination: self.pagination, |             pagination: self.pagination, | ||||||
|             embedders: self.embedders, |             embedders: self.embedders, | ||||||
|  |             search_cutoff_ms: self.search_cutoff_ms, | ||||||
|             _kind: PhantomData, |             _kind: PhantomData, | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| @@ -347,19 +354,40 @@ pub fn apply_settings_to_builder( | |||||||
|     settings: &Settings<Checked>, |     settings: &Settings<Checked>, | ||||||
|     builder: &mut milli::update::Settings, |     builder: &mut milli::update::Settings, | ||||||
| ) { | ) { | ||||||
|     match settings.searchable_attributes { |     let Settings { | ||||||
|  |         displayed_attributes, | ||||||
|  |         searchable_attributes, | ||||||
|  |         filterable_attributes, | ||||||
|  |         sortable_attributes, | ||||||
|  |         ranking_rules, | ||||||
|  |         stop_words, | ||||||
|  |         non_separator_tokens, | ||||||
|  |         separator_tokens, | ||||||
|  |         dictionary, | ||||||
|  |         synonyms, | ||||||
|  |         distinct_attribute, | ||||||
|  |         proximity_precision, | ||||||
|  |         typo_tolerance, | ||||||
|  |         faceting, | ||||||
|  |         pagination, | ||||||
|  |         embedders, | ||||||
|  |         search_cutoff_ms, | ||||||
|  |         _kind, | ||||||
|  |     } = settings; | ||||||
|  |  | ||||||
|  |     match searchable_attributes { | ||||||
|         Setting::Set(ref names) => builder.set_searchable_fields(names.clone()), |         Setting::Set(ref names) => builder.set_searchable_fields(names.clone()), | ||||||
|         Setting::Reset => builder.reset_searchable_fields(), |         Setting::Reset => builder.reset_searchable_fields(), | ||||||
|         Setting::NotSet => (), |         Setting::NotSet => (), | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     match settings.displayed_attributes { |     match displayed_attributes { | ||||||
|         Setting::Set(ref names) => builder.set_displayed_fields(names.clone()), |         Setting::Set(ref names) => builder.set_displayed_fields(names.clone()), | ||||||
|         Setting::Reset => builder.reset_displayed_fields(), |         Setting::Reset => builder.reset_displayed_fields(), | ||||||
|         Setting::NotSet => (), |         Setting::NotSet => (), | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     match settings.filterable_attributes { |     match filterable_attributes { | ||||||
|         Setting::Set(ref facets) => { |         Setting::Set(ref facets) => { | ||||||
|             builder.set_filterable_fields(facets.clone().into_iter().collect()) |             builder.set_filterable_fields(facets.clone().into_iter().collect()) | ||||||
|         } |         } | ||||||
| @@ -367,13 +395,13 @@ pub fn apply_settings_to_builder( | |||||||
|         Setting::NotSet => (), |         Setting::NotSet => (), | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     match settings.sortable_attributes { |     match sortable_attributes { | ||||||
|         Setting::Set(ref fields) => builder.set_sortable_fields(fields.iter().cloned().collect()), |         Setting::Set(ref fields) => builder.set_sortable_fields(fields.iter().cloned().collect()), | ||||||
|         Setting::Reset => builder.reset_sortable_fields(), |         Setting::Reset => builder.reset_sortable_fields(), | ||||||
|         Setting::NotSet => (), |         Setting::NotSet => (), | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     match settings.ranking_rules { |     match ranking_rules { | ||||||
|         Setting::Set(ref criteria) => { |         Setting::Set(ref criteria) => { | ||||||
|             builder.set_criteria(criteria.iter().map(|c| c.clone().into()).collect()) |             builder.set_criteria(criteria.iter().map(|c| c.clone().into()).collect()) | ||||||
|         } |         } | ||||||
| @@ -381,13 +409,13 @@ pub fn apply_settings_to_builder( | |||||||
|         Setting::NotSet => (), |         Setting::NotSet => (), | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     match settings.stop_words { |     match stop_words { | ||||||
|         Setting::Set(ref stop_words) => builder.set_stop_words(stop_words.clone()), |         Setting::Set(ref stop_words) => builder.set_stop_words(stop_words.clone()), | ||||||
|         Setting::Reset => builder.reset_stop_words(), |         Setting::Reset => builder.reset_stop_words(), | ||||||
|         Setting::NotSet => (), |         Setting::NotSet => (), | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     match settings.non_separator_tokens { |     match non_separator_tokens { | ||||||
|         Setting::Set(ref non_separator_tokens) => { |         Setting::Set(ref non_separator_tokens) => { | ||||||
|             builder.set_non_separator_tokens(non_separator_tokens.clone()) |             builder.set_non_separator_tokens(non_separator_tokens.clone()) | ||||||
|         } |         } | ||||||
| @@ -395,7 +423,7 @@ pub fn apply_settings_to_builder( | |||||||
|         Setting::NotSet => (), |         Setting::NotSet => (), | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     match settings.separator_tokens { |     match separator_tokens { | ||||||
|         Setting::Set(ref separator_tokens) => { |         Setting::Set(ref separator_tokens) => { | ||||||
|             builder.set_separator_tokens(separator_tokens.clone()) |             builder.set_separator_tokens(separator_tokens.clone()) | ||||||
|         } |         } | ||||||
| @@ -403,31 +431,31 @@ pub fn apply_settings_to_builder( | |||||||
|         Setting::NotSet => (), |         Setting::NotSet => (), | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     match settings.dictionary { |     match dictionary { | ||||||
|         Setting::Set(ref dictionary) => builder.set_dictionary(dictionary.clone()), |         Setting::Set(ref dictionary) => builder.set_dictionary(dictionary.clone()), | ||||||
|         Setting::Reset => builder.reset_dictionary(), |         Setting::Reset => builder.reset_dictionary(), | ||||||
|         Setting::NotSet => (), |         Setting::NotSet => (), | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     match settings.synonyms { |     match synonyms { | ||||||
|         Setting::Set(ref synonyms) => builder.set_synonyms(synonyms.clone().into_iter().collect()), |         Setting::Set(ref synonyms) => builder.set_synonyms(synonyms.clone().into_iter().collect()), | ||||||
|         Setting::Reset => builder.reset_synonyms(), |         Setting::Reset => builder.reset_synonyms(), | ||||||
|         Setting::NotSet => (), |         Setting::NotSet => (), | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     match settings.distinct_attribute { |     match distinct_attribute { | ||||||
|         Setting::Set(ref attr) => builder.set_distinct_field(attr.clone()), |         Setting::Set(ref attr) => builder.set_distinct_field(attr.clone()), | ||||||
|         Setting::Reset => builder.reset_distinct_field(), |         Setting::Reset => builder.reset_distinct_field(), | ||||||
|         Setting::NotSet => (), |         Setting::NotSet => (), | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     match settings.proximity_precision { |     match proximity_precision { | ||||||
|         Setting::Set(ref precision) => builder.set_proximity_precision((*precision).into()), |         Setting::Set(ref precision) => builder.set_proximity_precision((*precision).into()), | ||||||
|         Setting::Reset => builder.reset_proximity_precision(), |         Setting::Reset => builder.reset_proximity_precision(), | ||||||
|         Setting::NotSet => (), |         Setting::NotSet => (), | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     match settings.typo_tolerance { |     match typo_tolerance { | ||||||
|         Setting::Set(ref value) => { |         Setting::Set(ref value) => { | ||||||
|             match value.enabled { |             match value.enabled { | ||||||
|                 Setting::Set(val) => builder.set_autorize_typos(val), |                 Setting::Set(val) => builder.set_autorize_typos(val), | ||||||
| @@ -482,7 +510,7 @@ pub fn apply_settings_to_builder( | |||||||
|         Setting::NotSet => (), |         Setting::NotSet => (), | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     match &settings.faceting { |     match faceting { | ||||||
|         Setting::Set(FacetingSettings { max_values_per_facet, sort_facet_values_by }) => { |         Setting::Set(FacetingSettings { max_values_per_facet, sort_facet_values_by }) => { | ||||||
|             match max_values_per_facet { |             match max_values_per_facet { | ||||||
|                 Setting::Set(val) => builder.set_max_values_per_facet(*val), |                 Setting::Set(val) => builder.set_max_values_per_facet(*val), | ||||||
| @@ -504,7 +532,7 @@ pub fn apply_settings_to_builder( | |||||||
|         Setting::NotSet => (), |         Setting::NotSet => (), | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     match settings.pagination { |     match pagination { | ||||||
|         Setting::Set(ref value) => match value.max_total_hits { |         Setting::Set(ref value) => match value.max_total_hits { | ||||||
|             Setting::Set(val) => builder.set_pagination_max_total_hits(val), |             Setting::Set(val) => builder.set_pagination_max_total_hits(val), | ||||||
|             Setting::Reset => builder.reset_pagination_max_total_hits(), |             Setting::Reset => builder.reset_pagination_max_total_hits(), | ||||||
| @@ -514,11 +542,17 @@ pub fn apply_settings_to_builder( | |||||||
|         Setting::NotSet => (), |         Setting::NotSet => (), | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     match settings.embedders.clone() { |     match embedders { | ||||||
|         Setting::Set(value) => builder.set_embedder_settings(value), |         Setting::Set(value) => builder.set_embedder_settings(value.clone()), | ||||||
|         Setting::Reset => builder.reset_embedder_settings(), |         Setting::Reset => builder.reset_embedder_settings(), | ||||||
|         Setting::NotSet => (), |         Setting::NotSet => (), | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     match search_cutoff_ms { | ||||||
|  |         Setting::Set(cutoff) => builder.set_search_cutoff(*cutoff), | ||||||
|  |         Setting::Reset => builder.reset_search_cutoff(), | ||||||
|  |         Setting::NotSet => (), | ||||||
|  |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| pub fn settings( | pub fn settings( | ||||||
| @@ -607,6 +641,8 @@ pub fn settings( | |||||||
|         .collect(); |         .collect(); | ||||||
|     let embedders = if embedders.is_empty() { Setting::NotSet } else { Setting::Set(embedders) }; |     let embedders = if embedders.is_empty() { Setting::NotSet } else { Setting::Set(embedders) }; | ||||||
|  |  | ||||||
|  |     let search_cutoff_ms = index.search_cutoff(rtxn)?; | ||||||
|  |  | ||||||
|     Ok(Settings { |     Ok(Settings { | ||||||
|         displayed_attributes: match displayed_attributes { |         displayed_attributes: match displayed_attributes { | ||||||
|             Some(attrs) => Setting::Set(attrs), |             Some(attrs) => Setting::Set(attrs), | ||||||
| @@ -633,6 +669,10 @@ pub fn settings( | |||||||
|         faceting: Setting::Set(faceting), |         faceting: Setting::Set(faceting), | ||||||
|         pagination: Setting::Set(pagination), |         pagination: Setting::Set(pagination), | ||||||
|         embedders, |         embedders, | ||||||
|  |         search_cutoff_ms: match search_cutoff_ms { | ||||||
|  |             Some(cutoff) => Setting::Set(cutoff), | ||||||
|  |             None => Setting::Reset, | ||||||
|  |         }, | ||||||
|         _kind: PhantomData, |         _kind: PhantomData, | ||||||
|     }) |     }) | ||||||
| } | } | ||||||
| @@ -783,6 +823,7 @@ pub(crate) mod test { | |||||||
|             faceting: Setting::NotSet, |             faceting: Setting::NotSet, | ||||||
|             pagination: Setting::NotSet, |             pagination: Setting::NotSet, | ||||||
|             embedders: Setting::NotSet, |             embedders: Setting::NotSet, | ||||||
|  |             search_cutoff_ms: Setting::NotSet, | ||||||
|             _kind: PhantomData::<Unchecked>, |             _kind: PhantomData::<Unchecked>, | ||||||
|         }; |         }; | ||||||
|  |  | ||||||
| @@ -809,6 +850,7 @@ pub(crate) mod test { | |||||||
|             faceting: Setting::NotSet, |             faceting: Setting::NotSet, | ||||||
|             pagination: Setting::NotSet, |             pagination: Setting::NotSet, | ||||||
|             embedders: Setting::NotSet, |             embedders: Setting::NotSet, | ||||||
|  |             search_cutoff_ms: Setting::NotSet, | ||||||
|             _kind: PhantomData::<Unchecked>, |             _kind: PhantomData::<Unchecked>, | ||||||
|         }; |         }; | ||||||
|  |  | ||||||
|   | |||||||
| @@ -579,6 +579,7 @@ pub struct SearchAggregator { | |||||||
|     // requests |     // requests | ||||||
|     total_received: usize, |     total_received: usize, | ||||||
|     total_succeeded: usize, |     total_succeeded: usize, | ||||||
|  |     total_degraded: usize, | ||||||
|     time_spent: BinaryHeap<usize>, |     time_spent: BinaryHeap<usize>, | ||||||
|  |  | ||||||
|     // sort |     // sort | ||||||
| @@ -758,9 +759,13 @@ impl SearchAggregator { | |||||||
|             hits_info: _, |             hits_info: _, | ||||||
|             facet_distribution: _, |             facet_distribution: _, | ||||||
|             facet_stats: _, |             facet_stats: _, | ||||||
|  |             degraded, | ||||||
|         } = result; |         } = result; | ||||||
|  |  | ||||||
|         self.total_succeeded = self.total_succeeded.saturating_add(1); |         self.total_succeeded = self.total_succeeded.saturating_add(1); | ||||||
|  |         if *degraded { | ||||||
|  |             self.total_degraded = self.total_degraded.saturating_add(1); | ||||||
|  |         } | ||||||
|         self.time_spent.push(*processing_time_ms as usize); |         self.time_spent.push(*processing_time_ms as usize); | ||||||
|     } |     } | ||||||
|  |  | ||||||
| @@ -802,6 +807,7 @@ impl SearchAggregator { | |||||||
|             semantic_ratio, |             semantic_ratio, | ||||||
|             embedder, |             embedder, | ||||||
|             hybrid, |             hybrid, | ||||||
|  |             total_degraded, | ||||||
|         } = other; |         } = other; | ||||||
|  |  | ||||||
|         if self.timestamp.is_none() { |         if self.timestamp.is_none() { | ||||||
| @@ -816,6 +822,7 @@ impl SearchAggregator { | |||||||
|         // request |         // request | ||||||
|         self.total_received = self.total_received.saturating_add(total_received); |         self.total_received = self.total_received.saturating_add(total_received); | ||||||
|         self.total_succeeded = self.total_succeeded.saturating_add(total_succeeded); |         self.total_succeeded = self.total_succeeded.saturating_add(total_succeeded); | ||||||
|  |         self.total_degraded = self.total_degraded.saturating_add(total_degraded); | ||||||
|         self.time_spent.append(time_spent); |         self.time_spent.append(time_spent); | ||||||
|  |  | ||||||
|         // sort |         // sort | ||||||
| @@ -921,6 +928,7 @@ impl SearchAggregator { | |||||||
|             semantic_ratio, |             semantic_ratio, | ||||||
|             embedder, |             embedder, | ||||||
|             hybrid, |             hybrid, | ||||||
|  |             total_degraded, | ||||||
|         } = self; |         } = self; | ||||||
|  |  | ||||||
|         if total_received == 0 { |         if total_received == 0 { | ||||||
| @@ -940,6 +948,7 @@ impl SearchAggregator { | |||||||
|                     "total_succeeded": total_succeeded, |                     "total_succeeded": total_succeeded, | ||||||
|                     "total_failed": total_received.saturating_sub(total_succeeded), // just to be sure we never panics |                     "total_failed": total_received.saturating_sub(total_succeeded), // just to be sure we never panics | ||||||
|                     "total_received": total_received, |                     "total_received": total_received, | ||||||
|  |                     "total_degraded": total_degraded, | ||||||
|                 }, |                 }, | ||||||
|                 "sort": { |                 "sort": { | ||||||
|                     "with_geoPoint": sort_with_geo_point, |                     "with_geoPoint": sort_with_geo_point, | ||||||
|   | |||||||
| @@ -22,6 +22,11 @@ lazy_static! { | |||||||
|         &["method", "path"] |         &["method", "path"] | ||||||
|     ) |     ) | ||||||
|     .expect("Can't create a metric"); |     .expect("Can't create a metric"); | ||||||
|  |     pub static ref MEILISEARCH_DEGRADED_SEARCH_REQUESTS: IntGauge = register_int_gauge!(opts!( | ||||||
|  |         "meilisearch_degraded_search_requests", | ||||||
|  |         "Meilisearch number of degraded search requests" | ||||||
|  |     )) | ||||||
|  |     .expect("Can't create a metric"); | ||||||
|     pub static ref MEILISEARCH_DB_SIZE_BYTES: IntGauge = |     pub static ref MEILISEARCH_DB_SIZE_BYTES: IntGauge = | ||||||
|         register_int_gauge!(opts!("meilisearch_db_size_bytes", "Meilisearch DB Size In Bytes")) |         register_int_gauge!(opts!("meilisearch_db_size_bytes", "Meilisearch DB Size In Bytes")) | ||||||
|             .expect("Can't create a metric"); |             .expect("Can't create a metric"); | ||||||
|   | |||||||
| @@ -17,6 +17,7 @@ use crate::analytics::{Analytics, SearchAggregator}; | |||||||
| use crate::extractors::authentication::policies::*; | use crate::extractors::authentication::policies::*; | ||||||
| use crate::extractors::authentication::GuardedData; | use crate::extractors::authentication::GuardedData; | ||||||
| use crate::extractors::sequential_extractor::SeqHandler; | use crate::extractors::sequential_extractor::SeqHandler; | ||||||
|  | use crate::metrics::MEILISEARCH_DEGRADED_SEARCH_REQUESTS; | ||||||
| use crate::search::{ | use crate::search::{ | ||||||
|     add_search_rules, perform_search, HybridQuery, MatchingStrategy, SearchQuery, SemanticRatio, |     add_search_rules, perform_search, HybridQuery, MatchingStrategy, SearchQuery, SemanticRatio, | ||||||
|     DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, |     DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, | ||||||
| @@ -247,6 +248,9 @@ pub async fn search_with_post( | |||||||
|             .await?; |             .await?; | ||||||
|     if let Ok(ref search_result) = search_result { |     if let Ok(ref search_result) = search_result { | ||||||
|         aggregate.succeed(search_result); |         aggregate.succeed(search_result); | ||||||
|  |         if search_result.degraded { | ||||||
|  |             MEILISEARCH_DEGRADED_SEARCH_REQUESTS.inc(); | ||||||
|  |         } | ||||||
|     } |     } | ||||||
|     analytics.post_search(aggregate); |     analytics.post_search(aggregate); | ||||||
|  |  | ||||||
|   | |||||||
| @@ -624,6 +624,25 @@ fn embedder_analytics( | |||||||
|     ) |     ) | ||||||
| } | } | ||||||
|  |  | ||||||
|  | make_setting_route!( | ||||||
|  |     "/search-cutoff-ms", | ||||||
|  |     put, | ||||||
|  |     u64, | ||||||
|  |     meilisearch_types::deserr::DeserrJsonError< | ||||||
|  |         meilisearch_types::error::deserr_codes::InvalidSettingsSearchCutoffMs, | ||||||
|  |     >, | ||||||
|  |     search_cutoff_ms, | ||||||
|  |     "searchCutoffMs", | ||||||
|  |     analytics, | ||||||
|  |     |setting: &Option<u64>, req: &HttpRequest| { | ||||||
|  |         analytics.publish( | ||||||
|  |             "Search Cutoff Updated".to_string(), | ||||||
|  |             serde_json::json!({"search_cutoff_ms": setting }), | ||||||
|  |             Some(req), | ||||||
|  |         ); | ||||||
|  |     } | ||||||
|  | ); | ||||||
|  |  | ||||||
| macro_rules! generate_configure { | macro_rules! generate_configure { | ||||||
|     ($($mod:ident),*) => { |     ($($mod:ident),*) => { | ||||||
|         pub fn configure(cfg: &mut web::ServiceConfig) { |         pub fn configure(cfg: &mut web::ServiceConfig) { | ||||||
| @@ -654,7 +673,8 @@ generate_configure!( | |||||||
|     typo_tolerance, |     typo_tolerance, | ||||||
|     pagination, |     pagination, | ||||||
|     faceting, |     faceting, | ||||||
|     embedders |     embedders, | ||||||
|  |     search_cutoff_ms | ||||||
| ); | ); | ||||||
|  |  | ||||||
| pub async fn update_all( | pub async fn update_all( | ||||||
| @@ -765,7 +785,8 @@ pub async fn update_all( | |||||||
|             "synonyms": { |             "synonyms": { | ||||||
|                 "total": new_settings.synonyms.as_ref().set().map(|synonyms| synonyms.len()), |                 "total": new_settings.synonyms.as_ref().set().map(|synonyms| synonyms.len()), | ||||||
|             }, |             }, | ||||||
|             "embedders": crate::routes::indexes::settings::embedder_analytics(new_settings.embedders.as_ref().set()) |             "embedders": crate::routes::indexes::settings::embedder_analytics(new_settings.embedders.as_ref().set()), | ||||||
|  |             "search_cutoff_ms": new_settings.search_cutoff_ms.as_ref().set(), | ||||||
|         }), |         }), | ||||||
|         Some(&req), |         Some(&req), | ||||||
|     ); |     ); | ||||||
|   | |||||||
| @@ -1,7 +1,7 @@ | |||||||
| use std::cmp::min; | use std::cmp::min; | ||||||
| use std::collections::{BTreeMap, BTreeSet, HashSet}; | use std::collections::{BTreeMap, BTreeSet, HashSet}; | ||||||
| use std::str::FromStr; | use std::str::FromStr; | ||||||
| use std::time::Instant; | use std::time::{Duration, Instant}; | ||||||
|  |  | ||||||
| use deserr::Deserr; | use deserr::Deserr; | ||||||
| use either::Either; | use either::Either; | ||||||
| @@ -14,7 +14,7 @@ use meilisearch_types::heed::RoTxn; | |||||||
| use meilisearch_types::index_uid::IndexUid; | use meilisearch_types::index_uid::IndexUid; | ||||||
| use meilisearch_types::milli::score_details::{self, ScoreDetails, ScoringStrategy}; | use meilisearch_types::milli::score_details::{self, ScoreDetails, ScoringStrategy}; | ||||||
| use meilisearch_types::milli::vector::DistributionShift; | use meilisearch_types::milli::vector::DistributionShift; | ||||||
| use meilisearch_types::milli::{FacetValueHit, OrderBy, SearchForFacetValues}; | use meilisearch_types::milli::{FacetValueHit, OrderBy, SearchForFacetValues, TimeBudget}; | ||||||
| use meilisearch_types::settings::DEFAULT_PAGINATION_MAX_TOTAL_HITS; | use meilisearch_types::settings::DEFAULT_PAGINATION_MAX_TOTAL_HITS; | ||||||
| use meilisearch_types::{milli, Document}; | use meilisearch_types::{milli, Document}; | ||||||
| use milli::tokenizer::TokenizerBuilder; | use milli::tokenizer::TokenizerBuilder; | ||||||
| @@ -323,6 +323,10 @@ pub struct SearchResult { | |||||||
|     pub facet_distribution: Option<BTreeMap<String, IndexMap<String, u64>>>, |     pub facet_distribution: Option<BTreeMap<String, IndexMap<String, u64>>>, | ||||||
|     #[serde(skip_serializing_if = "Option::is_none")] |     #[serde(skip_serializing_if = "Option::is_none")] | ||||||
|     pub facet_stats: Option<BTreeMap<String, FacetStats>>, |     pub facet_stats: Option<BTreeMap<String, FacetStats>>, | ||||||
|  |  | ||||||
|  |     // This information is only used for analytics purposes | ||||||
|  |     #[serde(skip)] | ||||||
|  |     pub degraded: bool, | ||||||
| } | } | ||||||
|  |  | ||||||
| #[derive(Serialize, Debug, Clone, PartialEq)] | #[derive(Serialize, Debug, Clone, PartialEq)] | ||||||
| @@ -382,8 +386,10 @@ fn prepare_search<'t>( | |||||||
|     query: &'t SearchQuery, |     query: &'t SearchQuery, | ||||||
|     features: RoFeatures, |     features: RoFeatures, | ||||||
|     distribution: Option<DistributionShift>, |     distribution: Option<DistributionShift>, | ||||||
|  |     time_budget: TimeBudget, | ||||||
| ) -> Result<(milli::Search<'t>, bool, usize, usize), MeilisearchHttpError> { | ) -> Result<(milli::Search<'t>, bool, usize, usize), MeilisearchHttpError> { | ||||||
|     let mut search = index.search(rtxn); |     let mut search = index.search(rtxn); | ||||||
|  |     search.time_budget(time_budget); | ||||||
|  |  | ||||||
|     if query.vector.is_some() { |     if query.vector.is_some() { | ||||||
|         features.check_vector("Passing `vector` as a query parameter")?; |         features.check_vector("Passing `vector` as a query parameter")?; | ||||||
| @@ -492,12 +498,22 @@ pub fn perform_search( | |||||||
| ) -> Result<SearchResult, MeilisearchHttpError> { | ) -> Result<SearchResult, MeilisearchHttpError> { | ||||||
|     let before_search = Instant::now(); |     let before_search = Instant::now(); | ||||||
|     let rtxn = index.read_txn()?; |     let rtxn = index.read_txn()?; | ||||||
|  |     let time_budget = match index.search_cutoff(&rtxn)? { | ||||||
|  |         Some(cutoff) => TimeBudget::new(Duration::from_millis(cutoff)), | ||||||
|  |         None => TimeBudget::default(), | ||||||
|  |     }; | ||||||
|  |  | ||||||
|     let (search, is_finite_pagination, max_total_hits, offset) = |     let (search, is_finite_pagination, max_total_hits, offset) = | ||||||
|         prepare_search(index, &rtxn, &query, features, distribution)?; |         prepare_search(index, &rtxn, &query, features, distribution, time_budget)?; | ||||||
|  |  | ||||||
|     let milli::SearchResult { documents_ids, matching_words, candidates, document_scores, .. } = |     let milli::SearchResult { | ||||||
|         match &query.hybrid { |         documents_ids, | ||||||
|  |         matching_words, | ||||||
|  |         candidates, | ||||||
|  |         document_scores, | ||||||
|  |         degraded, | ||||||
|  |         .. | ||||||
|  |     } = match &query.hybrid { | ||||||
|         Some(hybrid) => match *hybrid.semantic_ratio { |         Some(hybrid) => match *hybrid.semantic_ratio { | ||||||
|             ratio if ratio == 0.0 || ratio == 1.0 => search.execute()?, |             ratio if ratio == 0.0 || ratio == 1.0 => search.execute()?, | ||||||
|             ratio => search.execute_hybrid(ratio)?, |             ratio => search.execute_hybrid(ratio)?, | ||||||
| @@ -700,6 +716,7 @@ pub fn perform_search( | |||||||
|         processing_time_ms: before_search.elapsed().as_millis(), |         processing_time_ms: before_search.elapsed().as_millis(), | ||||||
|         facet_distribution, |         facet_distribution, | ||||||
|         facet_stats, |         facet_stats, | ||||||
|  |         degraded, | ||||||
|     }; |     }; | ||||||
|     Ok(result) |     Ok(result) | ||||||
| } | } | ||||||
| @@ -713,8 +730,13 @@ pub fn perform_facet_search( | |||||||
| ) -> Result<FacetSearchResult, MeilisearchHttpError> { | ) -> Result<FacetSearchResult, MeilisearchHttpError> { | ||||||
|     let before_search = Instant::now(); |     let before_search = Instant::now(); | ||||||
|     let rtxn = index.read_txn()?; |     let rtxn = index.read_txn()?; | ||||||
|  |     let time_budget = match index.search_cutoff(&rtxn)? { | ||||||
|  |         Some(cutoff) => TimeBudget::new(Duration::from_millis(cutoff)), | ||||||
|  |         None => TimeBudget::default(), | ||||||
|  |     }; | ||||||
|  |  | ||||||
|     let (search, _, _, _) = prepare_search(index, &rtxn, &search_query, features, None)?; |     let (search, _, _, _) = | ||||||
|  |         prepare_search(index, &rtxn, &search_query, features, None, time_budget)?; | ||||||
|     let mut facet_search = |     let mut facet_search = | ||||||
|         SearchForFacetValues::new(facet_name, search, search_query.hybrid.is_some()); |         SearchForFacetValues::new(facet_name, search, search_query.hybrid.is_some()); | ||||||
|     if let Some(facet_query) = &facet_query { |     if let Some(facet_query) = &facet_query { | ||||||
|   | |||||||
| @@ -328,6 +328,11 @@ impl Index<'_> { | |||||||
|         self.service.patch_encoded(url, settings, self.encoder).await |         self.service.patch_encoded(url, settings, self.encoder).await | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     pub async fn update_settings_search_cutoff_ms(&self, settings: Value) -> (Value, StatusCode) { | ||||||
|  |         let url = format!("/indexes/{}/settings/search-cutoff-ms", urlencode(self.uid.as_ref())); | ||||||
|  |         self.service.put_encoded(url, settings, self.encoder).await | ||||||
|  |     } | ||||||
|  |  | ||||||
|     pub async fn delete_settings(&self) -> (Value, StatusCode) { |     pub async fn delete_settings(&self) -> (Value, StatusCode) { | ||||||
|         let url = format!("/indexes/{}/settings", urlencode(self.uid.as_ref())); |         let url = format!("/indexes/{}/settings", urlencode(self.uid.as_ref())); | ||||||
|         self.service.delete(url).await |         self.service.delete(url).await | ||||||
|   | |||||||
| @@ -16,6 +16,7 @@ pub use server::{default_settings, Server}; | |||||||
| pub struct Value(pub serde_json::Value); | pub struct Value(pub serde_json::Value); | ||||||
|  |  | ||||||
| impl Value { | impl Value { | ||||||
|  |     #[track_caller] | ||||||
|     pub fn uid(&self) -> u64 { |     pub fn uid(&self) -> u64 { | ||||||
|         if let Some(uid) = self["uid"].as_u64() { |         if let Some(uid) = self["uid"].as_u64() { | ||||||
|             uid |             uid | ||||||
|   | |||||||
| @@ -77,7 +77,8 @@ async fn import_dump_v1_movie_raw() { | |||||||
|       }, |       }, | ||||||
|       "pagination": { |       "pagination": { | ||||||
|         "maxTotalHits": 1000 |         "maxTotalHits": 1000 | ||||||
|       } |       }, | ||||||
|  |       "searchCutoffMs": null | ||||||
|     } |     } | ||||||
|     "### |     "### | ||||||
|     ); |     ); | ||||||
| @@ -238,7 +239,8 @@ async fn import_dump_v1_movie_with_settings() { | |||||||
|       }, |       }, | ||||||
|       "pagination": { |       "pagination": { | ||||||
|         "maxTotalHits": 1000 |         "maxTotalHits": 1000 | ||||||
|       } |       }, | ||||||
|  |       "searchCutoffMs": null | ||||||
|     } |     } | ||||||
|     "### |     "### | ||||||
|     ); |     ); | ||||||
| @@ -385,7 +387,8 @@ async fn import_dump_v1_rubygems_with_settings() { | |||||||
|       }, |       }, | ||||||
|       "pagination": { |       "pagination": { | ||||||
|         "maxTotalHits": 1000 |         "maxTotalHits": 1000 | ||||||
|       } |       }, | ||||||
|  |       "searchCutoffMs": null | ||||||
|     } |     } | ||||||
|     "### |     "### | ||||||
|     ); |     ); | ||||||
| @@ -518,7 +521,8 @@ async fn import_dump_v2_movie_raw() { | |||||||
|       }, |       }, | ||||||
|       "pagination": { |       "pagination": { | ||||||
|         "maxTotalHits": 1000 |         "maxTotalHits": 1000 | ||||||
|       } |       }, | ||||||
|  |       "searchCutoffMs": null | ||||||
|     } |     } | ||||||
|     "### |     "### | ||||||
|     ); |     ); | ||||||
| @@ -663,7 +667,8 @@ async fn import_dump_v2_movie_with_settings() { | |||||||
|       }, |       }, | ||||||
|       "pagination": { |       "pagination": { | ||||||
|         "maxTotalHits": 1000 |         "maxTotalHits": 1000 | ||||||
|       } |       }, | ||||||
|  |       "searchCutoffMs": null | ||||||
|     } |     } | ||||||
|     "### |     "### | ||||||
|     ); |     ); | ||||||
| @@ -807,7 +812,8 @@ async fn import_dump_v2_rubygems_with_settings() { | |||||||
|       }, |       }, | ||||||
|       "pagination": { |       "pagination": { | ||||||
|         "maxTotalHits": 1000 |         "maxTotalHits": 1000 | ||||||
|       } |       }, | ||||||
|  |       "searchCutoffMs": null | ||||||
|     } |     } | ||||||
|     "### |     "### | ||||||
|     ); |     ); | ||||||
| @@ -940,7 +946,8 @@ async fn import_dump_v3_movie_raw() { | |||||||
|       }, |       }, | ||||||
|       "pagination": { |       "pagination": { | ||||||
|         "maxTotalHits": 1000 |         "maxTotalHits": 1000 | ||||||
|       } |       }, | ||||||
|  |       "searchCutoffMs": null | ||||||
|     } |     } | ||||||
|     "### |     "### | ||||||
|     ); |     ); | ||||||
| @@ -1085,7 +1092,8 @@ async fn import_dump_v3_movie_with_settings() { | |||||||
|       }, |       }, | ||||||
|       "pagination": { |       "pagination": { | ||||||
|         "maxTotalHits": 1000 |         "maxTotalHits": 1000 | ||||||
|       } |       }, | ||||||
|  |       "searchCutoffMs": null | ||||||
|     } |     } | ||||||
|     "### |     "### | ||||||
|     ); |     ); | ||||||
| @@ -1229,7 +1237,8 @@ async fn import_dump_v3_rubygems_with_settings() { | |||||||
|       }, |       }, | ||||||
|       "pagination": { |       "pagination": { | ||||||
|         "maxTotalHits": 1000 |         "maxTotalHits": 1000 | ||||||
|       } |       }, | ||||||
|  |       "searchCutoffMs": null | ||||||
|     } |     } | ||||||
|     "### |     "### | ||||||
|     ); |     ); | ||||||
| @@ -1362,7 +1371,8 @@ async fn import_dump_v4_movie_raw() { | |||||||
|       }, |       }, | ||||||
|       "pagination": { |       "pagination": { | ||||||
|         "maxTotalHits": 1000 |         "maxTotalHits": 1000 | ||||||
|       } |       }, | ||||||
|  |       "searchCutoffMs": null | ||||||
|     } |     } | ||||||
|     "### |     "### | ||||||
|     ); |     ); | ||||||
| @@ -1507,7 +1517,8 @@ async fn import_dump_v4_movie_with_settings() { | |||||||
|       }, |       }, | ||||||
|       "pagination": { |       "pagination": { | ||||||
|         "maxTotalHits": 1000 |         "maxTotalHits": 1000 | ||||||
|       } |       }, | ||||||
|  |       "searchCutoffMs": null | ||||||
|     } |     } | ||||||
|     "### |     "### | ||||||
|     ); |     ); | ||||||
| @@ -1651,7 +1662,8 @@ async fn import_dump_v4_rubygems_with_settings() { | |||||||
|       }, |       }, | ||||||
|       "pagination": { |       "pagination": { | ||||||
|         "maxTotalHits": 1000 |         "maxTotalHits": 1000 | ||||||
|       } |       }, | ||||||
|  |       "searchCutoffMs": null | ||||||
|     } |     } | ||||||
|     "### |     "### | ||||||
|     ); |     ); | ||||||
| @@ -1895,7 +1907,8 @@ async fn import_dump_v6_containing_experimental_features() { | |||||||
|       }, |       }, | ||||||
|       "pagination": { |       "pagination": { | ||||||
|         "maxTotalHits": 1000 |         "maxTotalHits": 1000 | ||||||
|       } |       }, | ||||||
|  |       "searchCutoffMs": null | ||||||
|     } |     } | ||||||
|     "###); |     "###); | ||||||
|  |  | ||||||
|   | |||||||
| @@ -834,6 +834,94 @@ async fn test_score_details() { | |||||||
|         .await; |         .await; | ||||||
| } | } | ||||||
|  |  | ||||||
|  | #[actix_rt::test] | ||||||
|  | async fn test_degraded_score_details() { | ||||||
|  |     let server = Server::new().await; | ||||||
|  |     let index = server.index("test"); | ||||||
|  |  | ||||||
|  |     let documents = NESTED_DOCUMENTS.clone(); | ||||||
|  |  | ||||||
|  |     index.add_documents(json!(documents), None).await; | ||||||
|  |     // We can't really use anything else than 0ms here; otherwise, the test will get flaky. | ||||||
|  |     let (res, _code) = index.update_settings(json!({ "searchCutoffMs": 0 })).await; | ||||||
|  |     index.wait_task(res.uid()).await; | ||||||
|  |  | ||||||
|  |     index | ||||||
|  |         .search( | ||||||
|  |             json!({ | ||||||
|  |                 "q": "b", | ||||||
|  |                 "attributesToRetrieve": ["doggos.name", "cattos"], | ||||||
|  |                 "showRankingScoreDetails": true, | ||||||
|  |             }), | ||||||
|  |             |response, code| { | ||||||
|  |                 meili_snap::snapshot!(code, @"200 OK"); | ||||||
|  |                 meili_snap::snapshot!(meili_snap::json_string!(response, { ".processingTimeMs" => "[duration]" }), @r###" | ||||||
|  |                 { | ||||||
|  |                   "hits": [ | ||||||
|  |                     { | ||||||
|  |                       "doggos": [ | ||||||
|  |                         { | ||||||
|  |                           "name": "bobby" | ||||||
|  |                         }, | ||||||
|  |                         { | ||||||
|  |                           "name": "buddy" | ||||||
|  |                         } | ||||||
|  |                       ], | ||||||
|  |                       "cattos": "pésti", | ||||||
|  |                       "_rankingScoreDetails": { | ||||||
|  |                         "skipped": { | ||||||
|  |                           "order": 0 | ||||||
|  |                         } | ||||||
|  |                       } | ||||||
|  |                     }, | ||||||
|  |                     { | ||||||
|  |                       "doggos": [ | ||||||
|  |                         { | ||||||
|  |                           "name": "gros bill" | ||||||
|  |                         } | ||||||
|  |                       ], | ||||||
|  |                       "cattos": [ | ||||||
|  |                         "simba", | ||||||
|  |                         "pestiféré" | ||||||
|  |                       ], | ||||||
|  |                       "_rankingScoreDetails": { | ||||||
|  |                         "skipped": { | ||||||
|  |                           "order": 0 | ||||||
|  |                         } | ||||||
|  |                       } | ||||||
|  |                     }, | ||||||
|  |                     { | ||||||
|  |                       "doggos": [ | ||||||
|  |                         { | ||||||
|  |                           "name": "turbo" | ||||||
|  |                         }, | ||||||
|  |                         { | ||||||
|  |                           "name": "fast" | ||||||
|  |                         } | ||||||
|  |                       ], | ||||||
|  |                       "cattos": [ | ||||||
|  |                         "moumoute", | ||||||
|  |                         "gomez" | ||||||
|  |                       ], | ||||||
|  |                       "_rankingScoreDetails": { | ||||||
|  |                         "skipped": { | ||||||
|  |                           "order": 0 | ||||||
|  |                         } | ||||||
|  |                       } | ||||||
|  |                     } | ||||||
|  |                   ], | ||||||
|  |                   "query": "b", | ||||||
|  |                   "processingTimeMs": "[duration]", | ||||||
|  |                   "limit": 20, | ||||||
|  |                   "offset": 0, | ||||||
|  |                   "estimatedTotalHits": 3 | ||||||
|  |                 } | ||||||
|  |                 "###); | ||||||
|  |             }, | ||||||
|  |         ) | ||||||
|  |         .await; | ||||||
|  | } | ||||||
|  |  | ||||||
| #[actix_rt::test] | #[actix_rt::test] | ||||||
| async fn experimental_feature_vector_store() { | async fn experimental_feature_vector_store() { | ||||||
|     let server = Server::new().await; |     let server = Server::new().await; | ||||||
|   | |||||||
| @@ -337,3 +337,31 @@ async fn settings_bad_pagination() { | |||||||
|     } |     } | ||||||
|     "###); |     "###); | ||||||
| } | } | ||||||
|  |  | ||||||
|  | #[actix_rt::test] | ||||||
|  | async fn settings_bad_search_cutoff_ms() { | ||||||
|  |     let server = Server::new().await; | ||||||
|  |     let index = server.index("test"); | ||||||
|  |  | ||||||
|  |     let (response, code) = index.update_settings(json!({ "searchCutoffMs": "doggo" })).await; | ||||||
|  |     snapshot!(code, @"400 Bad Request"); | ||||||
|  |     snapshot!(json_string!(response), @r###" | ||||||
|  |     { | ||||||
|  |       "message": "Invalid value type at `.searchCutoffMs`: expected a positive integer, but found a string: `\"doggo\"`", | ||||||
|  |       "code": "invalid_settings_search_cutoff_ms", | ||||||
|  |       "type": "invalid_request", | ||||||
|  |       "link": "https://docs.meilisearch.com/errors#invalid_settings_search_cutoff_ms" | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  |  | ||||||
|  |     let (response, code) = index.update_settings_search_cutoff_ms(json!("doggo")).await; | ||||||
|  |     snapshot!(code, @"400 Bad Request"); | ||||||
|  |     snapshot!(json_string!(response), @r###" | ||||||
|  |     { | ||||||
|  |       "message": "Invalid value type: expected a positive integer, but found a string: `\"doggo\"`", | ||||||
|  |       "code": "invalid_settings_search_cutoff_ms", | ||||||
|  |       "type": "invalid_request", | ||||||
|  |       "link": "https://docs.meilisearch.com/errors#invalid_settings_search_cutoff_ms" | ||||||
|  |     } | ||||||
|  |     "###); | ||||||
|  | } | ||||||
|   | |||||||
| @@ -35,6 +35,7 @@ static DEFAULT_SETTINGS_VALUES: Lazy<HashMap<&'static str, Value>> = Lazy::new(| | |||||||
|             "maxTotalHits": json!(1000), |             "maxTotalHits": json!(1000), | ||||||
|         }), |         }), | ||||||
|     ); |     ); | ||||||
|  |     map.insert("search_cutoff_ms", json!(null)); | ||||||
|     map |     map | ||||||
| }); | }); | ||||||
|  |  | ||||||
| @@ -49,12 +50,12 @@ async fn get_settings_unexisting_index() { | |||||||
| async fn get_settings() { | async fn get_settings() { | ||||||
|     let server = Server::new().await; |     let server = Server::new().await; | ||||||
|     let index = server.index("test"); |     let index = server.index("test"); | ||||||
|     index.create(None).await; |     let (response, _code) = index.create(None).await; | ||||||
|     index.wait_task(0).await; |     index.wait_task(response.uid()).await; | ||||||
|     let (response, code) = index.settings().await; |     let (response, code) = index.settings().await; | ||||||
|     assert_eq!(code, 200); |     assert_eq!(code, 200); | ||||||
|     let settings = response.as_object().unwrap(); |     let settings = response.as_object().unwrap(); | ||||||
|     assert_eq!(settings.keys().len(), 15); |     assert_eq!(settings.keys().len(), 16); | ||||||
|     assert_eq!(settings["displayedAttributes"], json!(["*"])); |     assert_eq!(settings["displayedAttributes"], json!(["*"])); | ||||||
|     assert_eq!(settings["searchableAttributes"], json!(["*"])); |     assert_eq!(settings["searchableAttributes"], json!(["*"])); | ||||||
|     assert_eq!(settings["filterableAttributes"], json!([])); |     assert_eq!(settings["filterableAttributes"], json!([])); | ||||||
| @@ -84,6 +85,7 @@ async fn get_settings() { | |||||||
|         }) |         }) | ||||||
|     ); |     ); | ||||||
|     assert_eq!(settings["proximityPrecision"], json!("byWord")); |     assert_eq!(settings["proximityPrecision"], json!("byWord")); | ||||||
|  |     assert_eq!(settings["searchCutoffMs"], json!(null)); | ||||||
| } | } | ||||||
|  |  | ||||||
| #[actix_rt::test] | #[actix_rt::test] | ||||||
| @@ -285,7 +287,8 @@ test_setting_routes!( | |||||||
|     ranking_rules put, |     ranking_rules put, | ||||||
|     synonyms put, |     synonyms put, | ||||||
|     pagination patch, |     pagination patch, | ||||||
|     faceting patch |     faceting patch, | ||||||
|  |     search_cutoff_ms put | ||||||
| ); | ); | ||||||
|  |  | ||||||
| #[actix_rt::test] | #[actix_rt::test] | ||||||
|   | |||||||
| @@ -6,7 +6,7 @@ use std::time::Instant; | |||||||
| use heed::EnvOpenOptions; | use heed::EnvOpenOptions; | ||||||
| use milli::{ | use milli::{ | ||||||
|     execute_search, filtered_universe, DefaultSearchLogger, GeoSortStrategy, Index, SearchContext, |     execute_search, filtered_universe, DefaultSearchLogger, GeoSortStrategy, Index, SearchContext, | ||||||
|     SearchLogger, TermsMatchingStrategy, |     SearchLogger, TermsMatchingStrategy, TimeBudget, | ||||||
| }; | }; | ||||||
|  |  | ||||||
| #[global_allocator] | #[global_allocator] | ||||||
| @@ -65,6 +65,7 @@ fn main() -> Result<(), Box<dyn Error>> { | |||||||
|                 None, |                 None, | ||||||
|                 &mut DefaultSearchLogger, |                 &mut DefaultSearchLogger, | ||||||
|                 logger, |                 logger, | ||||||
|  |                 TimeBudget::max(), | ||||||
|             )?; |             )?; | ||||||
|             if let Some((logger, dir)) = detailed_logger { |             if let Some((logger, dir)) = detailed_logger { | ||||||
|                 logger.finish(&mut ctx, Path::new(dir))?; |                 logger.finish(&mut ctx, Path::new(dir))?; | ||||||
|   | |||||||
| @@ -67,6 +67,7 @@ pub mod main_key { | |||||||
|     pub const PAGINATION_MAX_TOTAL_HITS: &str = "pagination-max-total-hits"; |     pub const PAGINATION_MAX_TOTAL_HITS: &str = "pagination-max-total-hits"; | ||||||
|     pub const PROXIMITY_PRECISION: &str = "proximity-precision"; |     pub const PROXIMITY_PRECISION: &str = "proximity-precision"; | ||||||
|     pub const EMBEDDING_CONFIGS: &str = "embedding_configs"; |     pub const EMBEDDING_CONFIGS: &str = "embedding_configs"; | ||||||
|  |     pub const SEARCH_CUTOFF: &str = "search_cutoff"; | ||||||
| } | } | ||||||
|  |  | ||||||
| pub mod db_name { | pub mod db_name { | ||||||
| @@ -1505,6 +1506,18 @@ impl Index { | |||||||
|             _ => "default".to_owned(), |             _ => "default".to_owned(), | ||||||
|         }) |         }) | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     pub(crate) fn put_search_cutoff(&self, wtxn: &mut RwTxn<'_>, cutoff: u64) -> heed::Result<()> { | ||||||
|  |         self.main.remap_types::<Str, BEU64>().put(wtxn, main_key::SEARCH_CUTOFF, &cutoff) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub fn search_cutoff(&self, rtxn: &RoTxn<'_>) -> Result<Option<u64>> { | ||||||
|  |         Ok(self.main.remap_types::<Str, BEU64>().get(rtxn, main_key::SEARCH_CUTOFF)?) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub(crate) fn delete_search_cutoff(&self, wtxn: &mut RwTxn<'_>) -> heed::Result<bool> { | ||||||
|  |         self.main.remap_key_type::<Str>().delete(wtxn, main_key::SEARCH_CUTOFF) | ||||||
|  |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| #[cfg(test)] | #[cfg(test)] | ||||||
| @@ -2421,6 +2434,7 @@ pub(crate) mod tests { | |||||||
|             candidates: _, |             candidates: _, | ||||||
|             document_scores: _, |             document_scores: _, | ||||||
|             mut documents_ids, |             mut documents_ids, | ||||||
|  |             degraded: _, | ||||||
|         } = search.execute().unwrap(); |         } = search.execute().unwrap(); | ||||||
|         let primary_key_id = index.fields_ids_map(&rtxn).unwrap().id("primary_key").unwrap(); |         let primary_key_id = index.fields_ids_map(&rtxn).unwrap().id("primary_key").unwrap(); | ||||||
|         documents_ids.sort_unstable(); |         documents_ids.sort_unstable(); | ||||||
|   | |||||||
| @@ -30,6 +30,7 @@ pub mod snapshot_tests; | |||||||
|  |  | ||||||
| use std::collections::{BTreeMap, HashMap}; | use std::collections::{BTreeMap, HashMap}; | ||||||
| use std::convert::{TryFrom, TryInto}; | use std::convert::{TryFrom, TryInto}; | ||||||
|  | use std::fmt; | ||||||
| use std::hash::BuildHasherDefault; | use std::hash::BuildHasherDefault; | ||||||
|  |  | ||||||
| use charabia::normalizer::{CharNormalizer, CompatibilityDecompositionNormalizer}; | use charabia::normalizer::{CharNormalizer, CompatibilityDecompositionNormalizer}; | ||||||
| @@ -104,6 +105,73 @@ pub const MAX_WORD_LENGTH: usize = MAX_LMDB_KEY_LENGTH / 2; | |||||||
|  |  | ||||||
| pub const MAX_POSITION_PER_ATTRIBUTE: u32 = u16::MAX as u32 + 1; | pub const MAX_POSITION_PER_ATTRIBUTE: u32 = u16::MAX as u32 + 1; | ||||||
|  |  | ||||||
|  | #[derive(Clone)] | ||||||
|  | pub struct TimeBudget { | ||||||
|  |     started_at: std::time::Instant, | ||||||
|  |     budget: std::time::Duration, | ||||||
|  |  | ||||||
|  |     /// When testing the time budget, ensuring we did more than iteration of the bucket sort can be useful. | ||||||
|  |     /// But to avoid being flaky, the only option is to add the ability to stop after a specific number of calls instead of a `Duration`. | ||||||
|  |     #[cfg(test)] | ||||||
|  |     stop_after: Option<(std::sync::Arc<std::sync::atomic::AtomicUsize>, usize)>, | ||||||
|  | } | ||||||
|  |  | ||||||
|  | impl fmt::Debug for TimeBudget { | ||||||
|  |     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | ||||||
|  |         f.debug_struct("TimeBudget") | ||||||
|  |             .field("started_at", &self.started_at) | ||||||
|  |             .field("budget", &self.budget) | ||||||
|  |             .field("left", &(self.budget - self.started_at.elapsed())) | ||||||
|  |             .finish() | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | impl Default for TimeBudget { | ||||||
|  |     fn default() -> Self { | ||||||
|  |         Self::new(std::time::Duration::from_millis(150)) | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | impl TimeBudget { | ||||||
|  |     pub fn new(budget: std::time::Duration) -> Self { | ||||||
|  |         Self { | ||||||
|  |             started_at: std::time::Instant::now(), | ||||||
|  |             budget, | ||||||
|  |  | ||||||
|  |             #[cfg(test)] | ||||||
|  |             stop_after: None, | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub fn max() -> Self { | ||||||
|  |         Self::new(std::time::Duration::from_secs(u64::MAX)) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     #[cfg(test)] | ||||||
|  |     pub fn with_stop_after(mut self, stop_after: usize) -> Self { | ||||||
|  |         use std::sync::atomic::AtomicUsize; | ||||||
|  |         use std::sync::Arc; | ||||||
|  |  | ||||||
|  |         self.stop_after = Some((Arc::new(AtomicUsize::new(0)), stop_after)); | ||||||
|  |         self | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub fn exceeded(&self) -> bool { | ||||||
|  |         #[cfg(test)] | ||||||
|  |         if let Some((current, stop_after)) = &self.stop_after { | ||||||
|  |             let current = current.fetch_add(1, std::sync::atomic::Ordering::Relaxed); | ||||||
|  |             if current >= *stop_after { | ||||||
|  |                 return true; | ||||||
|  |             } else { | ||||||
|  |                 // if a number has been specified then we ignore entirely the time budget | ||||||
|  |                 return false; | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         self.started_at.elapsed() > self.budget | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
| // Convert an absolute word position into a relative position. | // Convert an absolute word position into a relative position. | ||||||
| // Return the field id of the attribute related to the absolute position | // Return the field id of the attribute related to the absolute position | ||||||
| // and the relative position in the attribute. | // and the relative position in the attribute. | ||||||
|   | |||||||
| @@ -17,6 +17,9 @@ pub enum ScoreDetails { | |||||||
|     Sort(Sort), |     Sort(Sort), | ||||||
|     Vector(Vector), |     Vector(Vector), | ||||||
|     GeoSort(GeoSort), |     GeoSort(GeoSort), | ||||||
|  |  | ||||||
|  |     /// Returned when we don't have the time to finish applying all the subsequent ranking-rules | ||||||
|  |     Skipped, | ||||||
| } | } | ||||||
|  |  | ||||||
| #[derive(Clone, Copy)] | #[derive(Clone, Copy)] | ||||||
| @@ -50,6 +53,7 @@ impl ScoreDetails { | |||||||
|             ScoreDetails::Sort(_) => None, |             ScoreDetails::Sort(_) => None, | ||||||
|             ScoreDetails::GeoSort(_) => None, |             ScoreDetails::GeoSort(_) => None, | ||||||
|             ScoreDetails::Vector(_) => None, |             ScoreDetails::Vector(_) => None, | ||||||
|  |             ScoreDetails::Skipped => Some(Rank { rank: 0, max_rank: 1 }), | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
| @@ -97,6 +101,7 @@ impl ScoreDetails { | |||||||
|             ScoreDetails::Vector(vector) => RankOrValue::Score( |             ScoreDetails::Vector(vector) => RankOrValue::Score( | ||||||
|                 vector.value_similarity.as_ref().map(|(_, s)| *s as f64).unwrap_or(0.0f64), |                 vector.value_similarity.as_ref().map(|(_, s)| *s as f64).unwrap_or(0.0f64), | ||||||
|             ), |             ), | ||||||
|  |             ScoreDetails::Skipped => RankOrValue::Rank(Rank { rank: 0, max_rank: 1 }), | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
| @@ -256,6 +261,11 @@ impl ScoreDetails { | |||||||
|                     details_map.insert(vector, details); |                     details_map.insert(vector, details); | ||||||
|                     order += 1; |                     order += 1; | ||||||
|                 } |                 } | ||||||
|  |                 ScoreDetails::Skipped => { | ||||||
|  |                     details_map | ||||||
|  |                         .insert("skipped".to_string(), serde_json::json!({ "order": order })); | ||||||
|  |                     order += 1; | ||||||
|  |                 } | ||||||
|             } |             } | ||||||
|         } |         } | ||||||
|         details_map |         details_map | ||||||
|   | |||||||
| @@ -10,6 +10,7 @@ struct ScoreWithRatioResult { | |||||||
|     matching_words: MatchingWords, |     matching_words: MatchingWords, | ||||||
|     candidates: RoaringBitmap, |     candidates: RoaringBitmap, | ||||||
|     document_scores: Vec<(u32, ScoreWithRatio)>, |     document_scores: Vec<(u32, ScoreWithRatio)>, | ||||||
|  |     degraded: bool, | ||||||
| } | } | ||||||
|  |  | ||||||
| type ScoreWithRatio = (Vec<ScoreDetails>, f32); | type ScoreWithRatio = (Vec<ScoreDetails>, f32); | ||||||
| @@ -49,8 +50,12 @@ fn compare_scores( | |||||||
|                     order => return order, |                     order => return order, | ||||||
|                 } |                 } | ||||||
|             } |             } | ||||||
|             (Some(ScoreValue::Score(_)), Some(_)) => return Ordering::Greater, |             (Some(ScoreValue::Score(x)), Some(_)) => { | ||||||
|             (Some(_), Some(ScoreValue::Score(_))) => return Ordering::Less, |                 return if x == 0. { Ordering::Less } else { Ordering::Greater } | ||||||
|  |             } | ||||||
|  |             (Some(_), Some(ScoreValue::Score(x))) => { | ||||||
|  |                 return if x == 0. { Ordering::Greater } else { Ordering::Less } | ||||||
|  |             } | ||||||
|             // if we have this, we're bad |             // if we have this, we're bad | ||||||
|             (Some(ScoreValue::GeoSort(_)), Some(ScoreValue::Sort(_))) |             (Some(ScoreValue::GeoSort(_)), Some(ScoreValue::Sort(_))) | ||||||
|             | (Some(ScoreValue::Sort(_)), Some(ScoreValue::GeoSort(_))) => { |             | (Some(ScoreValue::Sort(_)), Some(ScoreValue::GeoSort(_))) => { | ||||||
| @@ -72,6 +77,7 @@ impl ScoreWithRatioResult { | |||||||
|             matching_words: results.matching_words, |             matching_words: results.matching_words, | ||||||
|             candidates: results.candidates, |             candidates: results.candidates, | ||||||
|             document_scores, |             document_scores, | ||||||
|  |             degraded: results.degraded, | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
| @@ -106,6 +112,7 @@ impl ScoreWithRatioResult { | |||||||
|             candidates: left.candidates | right.candidates, |             candidates: left.candidates | right.candidates, | ||||||
|             documents_ids, |             documents_ids, | ||||||
|             document_scores, |             document_scores, | ||||||
|  |             degraded: left.degraded | right.degraded, | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| } | } | ||||||
| @@ -131,6 +138,7 @@ impl<'a> Search<'a> { | |||||||
|             index: self.index, |             index: self.index, | ||||||
|             distribution_shift: self.distribution_shift, |             distribution_shift: self.distribution_shift, | ||||||
|             embedder_name: self.embedder_name.clone(), |             embedder_name: self.embedder_name.clone(), | ||||||
|  |             time_budget: self.time_budget.clone(), | ||||||
|         }; |         }; | ||||||
|  |  | ||||||
|         let vector_query = search.vector.take(); |         let vector_query = search.vector.take(); | ||||||
|   | |||||||
| @@ -11,7 +11,7 @@ use crate::score_details::{ScoreDetails, ScoringStrategy}; | |||||||
| use crate::vector::DistributionShift; | use crate::vector::DistributionShift; | ||||||
| use crate::{ | use crate::{ | ||||||
|     execute_search, filtered_universe, AscDesc, DefaultSearchLogger, DocumentId, Index, Result, |     execute_search, filtered_universe, AscDesc, DefaultSearchLogger, DocumentId, Index, Result, | ||||||
|     SearchContext, |     SearchContext, TimeBudget, | ||||||
| }; | }; | ||||||
|  |  | ||||||
| // Building these factories is not free. | // Building these factories is not free. | ||||||
| @@ -43,6 +43,8 @@ pub struct Search<'a> { | |||||||
|     index: &'a Index, |     index: &'a Index, | ||||||
|     distribution_shift: Option<DistributionShift>, |     distribution_shift: Option<DistributionShift>, | ||||||
|     embedder_name: Option<String>, |     embedder_name: Option<String>, | ||||||
|  |  | ||||||
|  |     time_budget: TimeBudget, | ||||||
| } | } | ||||||
|  |  | ||||||
| impl<'a> Search<'a> { | impl<'a> Search<'a> { | ||||||
| @@ -64,6 +66,7 @@ impl<'a> Search<'a> { | |||||||
|             index, |             index, | ||||||
|             distribution_shift: None, |             distribution_shift: None, | ||||||
|             embedder_name: None, |             embedder_name: None, | ||||||
|  |             time_budget: TimeBudget::max(), | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
| @@ -143,6 +146,11 @@ impl<'a> Search<'a> { | |||||||
|         self |         self | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     pub fn time_budget(&mut self, time_budget: TimeBudget) -> &mut Search<'a> { | ||||||
|  |         self.time_budget = time_budget; | ||||||
|  |         self | ||||||
|  |     } | ||||||
|  |  | ||||||
|     pub fn execute_for_candidates(&self, has_vector_search: bool) -> Result<RoaringBitmap> { |     pub fn execute_for_candidates(&self, has_vector_search: bool) -> Result<RoaringBitmap> { | ||||||
|         if has_vector_search { |         if has_vector_search { | ||||||
|             let ctx = SearchContext::new(self.index, self.rtxn); |             let ctx = SearchContext::new(self.index, self.rtxn); | ||||||
| @@ -169,8 +177,13 @@ impl<'a> Search<'a> { | |||||||
|         } |         } | ||||||
|  |  | ||||||
|         let universe = filtered_universe(&ctx, &self.filter)?; |         let universe = filtered_universe(&ctx, &self.filter)?; | ||||||
|         let PartialSearchResult { located_query_terms, candidates, documents_ids, document_scores } = |         let PartialSearchResult { | ||||||
|             match self.vector.as_ref() { |             located_query_terms, | ||||||
|  |             candidates, | ||||||
|  |             documents_ids, | ||||||
|  |             document_scores, | ||||||
|  |             degraded, | ||||||
|  |         } = match self.vector.as_ref() { | ||||||
|             Some(vector) => execute_vector_search( |             Some(vector) => execute_vector_search( | ||||||
|                 &mut ctx, |                 &mut ctx, | ||||||
|                 vector, |                 vector, | ||||||
| @@ -182,6 +195,7 @@ impl<'a> Search<'a> { | |||||||
|                 self.limit, |                 self.limit, | ||||||
|                 self.distribution_shift, |                 self.distribution_shift, | ||||||
|                 embedder_name, |                 embedder_name, | ||||||
|  |                 self.time_budget.clone(), | ||||||
|             )?, |             )?, | ||||||
|             None => execute_search( |             None => execute_search( | ||||||
|                 &mut ctx, |                 &mut ctx, | ||||||
| @@ -197,6 +211,7 @@ impl<'a> Search<'a> { | |||||||
|                 Some(self.words_limit), |                 Some(self.words_limit), | ||||||
|                 &mut DefaultSearchLogger, |                 &mut DefaultSearchLogger, | ||||||
|                 &mut DefaultSearchLogger, |                 &mut DefaultSearchLogger, | ||||||
|  |                 self.time_budget.clone(), | ||||||
|             )?, |             )?, | ||||||
|         }; |         }; | ||||||
|  |  | ||||||
| @@ -206,7 +221,7 @@ impl<'a> Search<'a> { | |||||||
|             None => MatchingWords::default(), |             None => MatchingWords::default(), | ||||||
|         }; |         }; | ||||||
|  |  | ||||||
|         Ok(SearchResult { matching_words, candidates, document_scores, documents_ids }) |         Ok(SearchResult { matching_words, candidates, document_scores, documents_ids, degraded }) | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -229,6 +244,7 @@ impl fmt::Debug for Search<'_> { | |||||||
|             index: _, |             index: _, | ||||||
|             distribution_shift, |             distribution_shift, | ||||||
|             embedder_name, |             embedder_name, | ||||||
|  |             time_budget, | ||||||
|         } = self; |         } = self; | ||||||
|         f.debug_struct("Search") |         f.debug_struct("Search") | ||||||
|             .field("query", query) |             .field("query", query) | ||||||
| @@ -244,6 +260,7 @@ impl fmt::Debug for Search<'_> { | |||||||
|             .field("words_limit", words_limit) |             .field("words_limit", words_limit) | ||||||
|             .field("distribution_shift", distribution_shift) |             .field("distribution_shift", distribution_shift) | ||||||
|             .field("embedder_name", embedder_name) |             .field("embedder_name", embedder_name) | ||||||
|  |             .field("time_budget", time_budget) | ||||||
|             .finish() |             .finish() | ||||||
|     } |     } | ||||||
| } | } | ||||||
| @@ -254,6 +271,7 @@ pub struct SearchResult { | |||||||
|     pub candidates: RoaringBitmap, |     pub candidates: RoaringBitmap, | ||||||
|     pub documents_ids: Vec<DocumentId>, |     pub documents_ids: Vec<DocumentId>, | ||||||
|     pub document_scores: Vec<Vec<ScoreDetails>>, |     pub document_scores: Vec<Vec<ScoreDetails>>, | ||||||
|  |     pub degraded: bool, | ||||||
| } | } | ||||||
|  |  | ||||||
| #[derive(Debug, Clone, Copy, PartialEq, Eq)] | #[derive(Debug, Clone, Copy, PartialEq, Eq)] | ||||||
|   | |||||||
| @@ -5,12 +5,14 @@ use super::ranking_rules::{BoxRankingRule, RankingRuleQueryTrait}; | |||||||
| use super::SearchContext; | use super::SearchContext; | ||||||
| use crate::score_details::{ScoreDetails, ScoringStrategy}; | use crate::score_details::{ScoreDetails, ScoringStrategy}; | ||||||
| use crate::search::new::distinct::{apply_distinct_rule, distinct_single_docid, DistinctOutput}; | use crate::search::new::distinct::{apply_distinct_rule, distinct_single_docid, DistinctOutput}; | ||||||
| use crate::Result; | use crate::{Result, TimeBudget}; | ||||||
|  |  | ||||||
| pub struct BucketSortOutput { | pub struct BucketSortOutput { | ||||||
|     pub docids: Vec<u32>, |     pub docids: Vec<u32>, | ||||||
|     pub scores: Vec<Vec<ScoreDetails>>, |     pub scores: Vec<Vec<ScoreDetails>>, | ||||||
|     pub all_candidates: RoaringBitmap, |     pub all_candidates: RoaringBitmap, | ||||||
|  |  | ||||||
|  |     pub degraded: bool, | ||||||
| } | } | ||||||
|  |  | ||||||
| // TODO: would probably be good to regroup some of these inside of a struct? | // TODO: would probably be good to regroup some of these inside of a struct? | ||||||
| @@ -25,6 +27,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>( | |||||||
|     length: usize, |     length: usize, | ||||||
|     scoring_strategy: ScoringStrategy, |     scoring_strategy: ScoringStrategy, | ||||||
|     logger: &mut dyn SearchLogger<Q>, |     logger: &mut dyn SearchLogger<Q>, | ||||||
|  |     time_budget: TimeBudget, | ||||||
| ) -> Result<BucketSortOutput> { | ) -> Result<BucketSortOutput> { | ||||||
|     logger.initial_query(query); |     logger.initial_query(query); | ||||||
|     logger.ranking_rules(&ranking_rules); |     logger.ranking_rules(&ranking_rules); | ||||||
| @@ -41,6 +44,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>( | |||||||
|             docids: vec![], |             docids: vec![], | ||||||
|             scores: vec![], |             scores: vec![], | ||||||
|             all_candidates: universe.clone(), |             all_candidates: universe.clone(), | ||||||
|  |             degraded: false, | ||||||
|         }); |         }); | ||||||
|     } |     } | ||||||
|     if ranking_rules.is_empty() { |     if ranking_rules.is_empty() { | ||||||
| @@ -74,6 +78,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>( | |||||||
|                 scores: vec![Default::default(); results.len()], |                 scores: vec![Default::default(); results.len()], | ||||||
|                 docids: results, |                 docids: results, | ||||||
|                 all_candidates, |                 all_candidates, | ||||||
|  |                 degraded: false, | ||||||
|             }); |             }); | ||||||
|         } else { |         } else { | ||||||
|             let docids: Vec<u32> = universe.iter().skip(from).take(length).collect(); |             let docids: Vec<u32> = universe.iter().skip(from).take(length).collect(); | ||||||
| @@ -81,6 +86,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>( | |||||||
|                 scores: vec![Default::default(); docids.len()], |                 scores: vec![Default::default(); docids.len()], | ||||||
|                 docids, |                 docids, | ||||||
|                 all_candidates: universe.clone(), |                 all_candidates: universe.clone(), | ||||||
|  |                 degraded: false, | ||||||
|             }); |             }); | ||||||
|         }; |         }; | ||||||
|     } |     } | ||||||
| @@ -154,6 +160,28 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>( | |||||||
|     } |     } | ||||||
|  |  | ||||||
|     while valid_docids.len() < length { |     while valid_docids.len() < length { | ||||||
|  |         if time_budget.exceeded() { | ||||||
|  |             loop { | ||||||
|  |                 let bucket = std::mem::take(&mut ranking_rule_universes[cur_ranking_rule_index]); | ||||||
|  |                 ranking_rule_scores.push(ScoreDetails::Skipped); | ||||||
|  |                 maybe_add_to_results!(bucket); | ||||||
|  |                 ranking_rule_scores.pop(); | ||||||
|  |  | ||||||
|  |                 if cur_ranking_rule_index == 0 { | ||||||
|  |                     break; | ||||||
|  |                 } | ||||||
|  |  | ||||||
|  |                 back!(); | ||||||
|  |             } | ||||||
|  |  | ||||||
|  |             return Ok(BucketSortOutput { | ||||||
|  |                 scores: valid_scores, | ||||||
|  |                 docids: valid_docids, | ||||||
|  |                 all_candidates, | ||||||
|  |                 degraded: true, | ||||||
|  |             }); | ||||||
|  |         } | ||||||
|  |  | ||||||
|         // The universe for this bucket is zero, so we don't need to sort |         // The universe for this bucket is zero, so we don't need to sort | ||||||
|         // anything, just go back to the parent ranking rule. |         // anything, just go back to the parent ranking rule. | ||||||
|         if ranking_rule_universes[cur_ranking_rule_index].is_empty() |         if ranking_rule_universes[cur_ranking_rule_index].is_empty() | ||||||
| @@ -219,7 +247,12 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>( | |||||||
|         )?; |         )?; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     Ok(BucketSortOutput { docids: valid_docids, scores: valid_scores, all_candidates }) |     Ok(BucketSortOutput { | ||||||
|  |         docids: valid_docids, | ||||||
|  |         scores: valid_scores, | ||||||
|  |         all_candidates, | ||||||
|  |         degraded: false, | ||||||
|  |     }) | ||||||
| } | } | ||||||
|  |  | ||||||
| /// Add the candidates to the results. Take `distinct`, `from`, `length`, and `cur_offset` | /// Add the candidates to the results. Take `distinct`, `from`, `length`, and `cur_offset` | ||||||
|   | |||||||
| @@ -502,7 +502,7 @@ mod tests { | |||||||
|  |  | ||||||
|     use super::*; |     use super::*; | ||||||
|     use crate::index::tests::TempIndex; |     use crate::index::tests::TempIndex; | ||||||
|     use crate::{execute_search, filtered_universe, SearchContext}; |     use crate::{execute_search, filtered_universe, SearchContext, TimeBudget}; | ||||||
|  |  | ||||||
|     impl<'a> MatcherBuilder<'a> { |     impl<'a> MatcherBuilder<'a> { | ||||||
|         fn new_test(rtxn: &'a heed::RoTxn, index: &'a TempIndex, query: &str) -> Self { |         fn new_test(rtxn: &'a heed::RoTxn, index: &'a TempIndex, query: &str) -> Self { | ||||||
| @@ -522,6 +522,7 @@ mod tests { | |||||||
|                 Some(10), |                 Some(10), | ||||||
|                 &mut crate::DefaultSearchLogger, |                 &mut crate::DefaultSearchLogger, | ||||||
|                 &mut crate::DefaultSearchLogger, |                 &mut crate::DefaultSearchLogger, | ||||||
|  |                 TimeBudget::max(), | ||||||
|             ) |             ) | ||||||
|             .unwrap(); |             .unwrap(); | ||||||
|  |  | ||||||
|   | |||||||
| @@ -52,7 +52,8 @@ use crate::score_details::{ScoreDetails, ScoringStrategy}; | |||||||
| use crate::search::new::distinct::apply_distinct_rule; | use crate::search::new::distinct::apply_distinct_rule; | ||||||
| use crate::vector::DistributionShift; | use crate::vector::DistributionShift; | ||||||
| use crate::{ | use crate::{ | ||||||
|     AscDesc, DocumentId, FieldId, Filter, Index, Member, Result, TermsMatchingStrategy, UserError, |     AscDesc, DocumentId, FieldId, Filter, Index, Member, Result, TermsMatchingStrategy, TimeBudget, | ||||||
|  |     UserError, | ||||||
| }; | }; | ||||||
|  |  | ||||||
| /// A structure used throughout the execution of a search query. | /// A structure used throughout the execution of a search query. | ||||||
| @@ -518,6 +519,7 @@ pub fn execute_vector_search( | |||||||
|     length: usize, |     length: usize, | ||||||
|     distribution_shift: Option<DistributionShift>, |     distribution_shift: Option<DistributionShift>, | ||||||
|     embedder_name: &str, |     embedder_name: &str, | ||||||
|  |     time_budget: TimeBudget, | ||||||
| ) -> Result<PartialSearchResult> { | ) -> Result<PartialSearchResult> { | ||||||
|     check_sort_criteria(ctx, sort_criteria.as_ref())?; |     check_sort_criteria(ctx, sort_criteria.as_ref())?; | ||||||
|  |  | ||||||
| @@ -537,7 +539,7 @@ pub fn execute_vector_search( | |||||||
|     let placeholder_search_logger: &mut dyn SearchLogger<PlaceholderQuery> = |     let placeholder_search_logger: &mut dyn SearchLogger<PlaceholderQuery> = | ||||||
|         &mut placeholder_search_logger; |         &mut placeholder_search_logger; | ||||||
|  |  | ||||||
|     let BucketSortOutput { docids, scores, all_candidates } = bucket_sort( |     let BucketSortOutput { docids, scores, all_candidates, degraded } = bucket_sort( | ||||||
|         ctx, |         ctx, | ||||||
|         ranking_rules, |         ranking_rules, | ||||||
|         &PlaceholderQuery, |         &PlaceholderQuery, | ||||||
| @@ -546,6 +548,7 @@ pub fn execute_vector_search( | |||||||
|         length, |         length, | ||||||
|         scoring_strategy, |         scoring_strategy, | ||||||
|         placeholder_search_logger, |         placeholder_search_logger, | ||||||
|  |         time_budget, | ||||||
|     )?; |     )?; | ||||||
|  |  | ||||||
|     Ok(PartialSearchResult { |     Ok(PartialSearchResult { | ||||||
| @@ -553,6 +556,7 @@ pub fn execute_vector_search( | |||||||
|         document_scores: scores, |         document_scores: scores, | ||||||
|         documents_ids: docids, |         documents_ids: docids, | ||||||
|         located_query_terms: None, |         located_query_terms: None, | ||||||
|  |         degraded, | ||||||
|     }) |     }) | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -572,6 +576,7 @@ pub fn execute_search( | |||||||
|     words_limit: Option<usize>, |     words_limit: Option<usize>, | ||||||
|     placeholder_search_logger: &mut dyn SearchLogger<PlaceholderQuery>, |     placeholder_search_logger: &mut dyn SearchLogger<PlaceholderQuery>, | ||||||
|     query_graph_logger: &mut dyn SearchLogger<QueryGraph>, |     query_graph_logger: &mut dyn SearchLogger<QueryGraph>, | ||||||
|  |     time_budget: TimeBudget, | ||||||
| ) -> Result<PartialSearchResult> { | ) -> Result<PartialSearchResult> { | ||||||
|     check_sort_criteria(ctx, sort_criteria.as_ref())?; |     check_sort_criteria(ctx, sort_criteria.as_ref())?; | ||||||
|  |  | ||||||
| @@ -648,6 +653,7 @@ pub fn execute_search( | |||||||
|             length, |             length, | ||||||
|             scoring_strategy, |             scoring_strategy, | ||||||
|             query_graph_logger, |             query_graph_logger, | ||||||
|  |             time_budget, | ||||||
|         )? |         )? | ||||||
|     } else { |     } else { | ||||||
|         let ranking_rules = |         let ranking_rules = | ||||||
| @@ -661,10 +667,11 @@ pub fn execute_search( | |||||||
|             length, |             length, | ||||||
|             scoring_strategy, |             scoring_strategy, | ||||||
|             placeholder_search_logger, |             placeholder_search_logger, | ||||||
|  |             time_budget, | ||||||
|         )? |         )? | ||||||
|     }; |     }; | ||||||
|  |  | ||||||
|     let BucketSortOutput { docids, scores, mut all_candidates } = bucket_sort_output; |     let BucketSortOutput { docids, scores, mut all_candidates, degraded } = bucket_sort_output; | ||||||
|     let fields_ids_map = ctx.index.fields_ids_map(ctx.txn)?; |     let fields_ids_map = ctx.index.fields_ids_map(ctx.txn)?; | ||||||
|  |  | ||||||
|     // The candidates is the universe unless the exhaustive number of hits |     // The candidates is the universe unless the exhaustive number of hits | ||||||
| @@ -682,6 +689,7 @@ pub fn execute_search( | |||||||
|         document_scores: scores, |         document_scores: scores, | ||||||
|         documents_ids: docids, |         documents_ids: docids, | ||||||
|         located_query_terms, |         located_query_terms, | ||||||
|  |         degraded, | ||||||
|     }) |     }) | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -742,4 +750,6 @@ pub struct PartialSearchResult { | |||||||
|     pub candidates: RoaringBitmap, |     pub candidates: RoaringBitmap, | ||||||
|     pub documents_ids: Vec<DocumentId>, |     pub documents_ids: Vec<DocumentId>, | ||||||
|     pub document_scores: Vec<Vec<ScoreDetails>>, |     pub document_scores: Vec<Vec<ScoreDetails>>, | ||||||
|  |  | ||||||
|  |     pub degraded: bool, | ||||||
| } | } | ||||||
|   | |||||||
							
								
								
									
										429
									
								
								milli/src/search/new/tests/cutoff.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										429
									
								
								milli/src/search/new/tests/cutoff.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,429 @@ | |||||||
|  | //! This module test the search cutoff and ensure a few things: | ||||||
|  | //! 1. A basic test works and mark the search as degraded | ||||||
|  | //! 2. A test that ensure the filters are affectively applied even with a cutoff of 0 | ||||||
|  | //! 3. A test that ensure the cutoff works well with the ranking scores | ||||||
|  |  | ||||||
|  | use std::time::Duration; | ||||||
|  |  | ||||||
|  | use big_s::S; | ||||||
|  | use maplit::hashset; | ||||||
|  | use meili_snap::snapshot; | ||||||
|  |  | ||||||
|  | use crate::index::tests::TempIndex; | ||||||
|  | use crate::score_details::{ScoreDetails, ScoringStrategy}; | ||||||
|  | use crate::{Criterion, Filter, Search, TimeBudget}; | ||||||
|  |  | ||||||
|  | fn create_index() -> TempIndex { | ||||||
|  |     let index = TempIndex::new(); | ||||||
|  |  | ||||||
|  |     index | ||||||
|  |         .update_settings(|s| { | ||||||
|  |             s.set_primary_key("id".to_owned()); | ||||||
|  |             s.set_searchable_fields(vec!["text".to_owned()]); | ||||||
|  |             s.set_filterable_fields(hashset! { S("id") }); | ||||||
|  |             s.set_criteria(vec![Criterion::Words, Criterion::Typo]); | ||||||
|  |         }) | ||||||
|  |         .unwrap(); | ||||||
|  |  | ||||||
|  |     // reverse the ID / insertion order so we see better what was sorted from what got the insertion order ordering | ||||||
|  |     index | ||||||
|  |         .add_documents(documents!([ | ||||||
|  |             { | ||||||
|  |                 "id": 4, | ||||||
|  |                 "text": "hella puppo kefir", | ||||||
|  |             }, | ||||||
|  |             { | ||||||
|  |                 "id": 3, | ||||||
|  |                 "text": "hella puppy kefir", | ||||||
|  |             }, | ||||||
|  |             { | ||||||
|  |                 "id": 2, | ||||||
|  |                 "text": "hello", | ||||||
|  |             }, | ||||||
|  |             { | ||||||
|  |                 "id": 1, | ||||||
|  |                 "text": "hello puppy", | ||||||
|  |             }, | ||||||
|  |             { | ||||||
|  |                 "id": 0, | ||||||
|  |                 "text": "hello puppy kefir", | ||||||
|  |             }, | ||||||
|  |         ])) | ||||||
|  |         .unwrap(); | ||||||
|  |     index | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #[test] | ||||||
|  | fn basic_degraded_search() { | ||||||
|  |     let index = create_index(); | ||||||
|  |     let rtxn = index.read_txn().unwrap(); | ||||||
|  |  | ||||||
|  |     let mut search = Search::new(&rtxn, &index); | ||||||
|  |     search.query("hello puppy kefir"); | ||||||
|  |     search.limit(3); | ||||||
|  |     search.time_budget(TimeBudget::new(Duration::from_millis(0))); | ||||||
|  |  | ||||||
|  |     let result = search.execute().unwrap(); | ||||||
|  |     assert!(result.degraded); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #[test] | ||||||
|  | fn degraded_search_cannot_skip_filter() { | ||||||
|  |     let index = create_index(); | ||||||
|  |     let rtxn = index.read_txn().unwrap(); | ||||||
|  |  | ||||||
|  |     let mut search = Search::new(&rtxn, &index); | ||||||
|  |     search.query("hello puppy kefir"); | ||||||
|  |     search.limit(100); | ||||||
|  |     search.time_budget(TimeBudget::new(Duration::from_millis(0))); | ||||||
|  |     let filter_condition = Filter::from_str("id > 2").unwrap().unwrap(); | ||||||
|  |     search.filter(filter_condition); | ||||||
|  |  | ||||||
|  |     let result = search.execute().unwrap(); | ||||||
|  |     assert!(result.degraded); | ||||||
|  |     snapshot!(format!("{:?}\n{:?}", result.candidates, result.documents_ids), @r###" | ||||||
|  |     RoaringBitmap<[0, 1]> | ||||||
|  |     [0, 1] | ||||||
|  |     "###); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #[test] | ||||||
|  | #[allow(clippy::format_collect)] // the test is already quite big | ||||||
|  | fn degraded_search_and_score_details() { | ||||||
|  |     let index = create_index(); | ||||||
|  |     let rtxn = index.read_txn().unwrap(); | ||||||
|  |  | ||||||
|  |     let mut search = Search::new(&rtxn, &index); | ||||||
|  |     search.query("hello puppy kefir"); | ||||||
|  |     search.limit(4); | ||||||
|  |     search.scoring_strategy(ScoringStrategy::Detailed); | ||||||
|  |     search.time_budget(TimeBudget::max()); | ||||||
|  |  | ||||||
|  |     let result = search.execute().unwrap(); | ||||||
|  |     snapshot!(format!("IDs: {:?}\nScores: {}\nScore Details:\n{:#?}", result.documents_ids, result.document_scores.iter().map(|scores| format!("{:.4} ", ScoreDetails::global_score(scores.iter()))).collect::<String>(), result.document_scores), @r###" | ||||||
|  |     IDs: [4, 1, 0, 3] | ||||||
|  |     Scores: 1.0000 0.9167 0.8333 0.6667  | ||||||
|  |     Score Details: | ||||||
|  |     [ | ||||||
|  |         [ | ||||||
|  |             Words( | ||||||
|  |                 Words { | ||||||
|  |                     matching_words: 3, | ||||||
|  |                     max_matching_words: 3, | ||||||
|  |                 }, | ||||||
|  |             ), | ||||||
|  |             Typo( | ||||||
|  |                 Typo { | ||||||
|  |                     typo_count: 0, | ||||||
|  |                     max_typo_count: 3, | ||||||
|  |                 }, | ||||||
|  |             ), | ||||||
|  |         ], | ||||||
|  |         [ | ||||||
|  |             Words( | ||||||
|  |                 Words { | ||||||
|  |                     matching_words: 3, | ||||||
|  |                     max_matching_words: 3, | ||||||
|  |                 }, | ||||||
|  |             ), | ||||||
|  |             Typo( | ||||||
|  |                 Typo { | ||||||
|  |                     typo_count: 1, | ||||||
|  |                     max_typo_count: 3, | ||||||
|  |                 }, | ||||||
|  |             ), | ||||||
|  |         ], | ||||||
|  |         [ | ||||||
|  |             Words( | ||||||
|  |                 Words { | ||||||
|  |                     matching_words: 3, | ||||||
|  |                     max_matching_words: 3, | ||||||
|  |                 }, | ||||||
|  |             ), | ||||||
|  |             Typo( | ||||||
|  |                 Typo { | ||||||
|  |                     typo_count: 2, | ||||||
|  |                     max_typo_count: 3, | ||||||
|  |                 }, | ||||||
|  |             ), | ||||||
|  |         ], | ||||||
|  |         [ | ||||||
|  |             Words( | ||||||
|  |                 Words { | ||||||
|  |                     matching_words: 2, | ||||||
|  |                     max_matching_words: 3, | ||||||
|  |                 }, | ||||||
|  |             ), | ||||||
|  |             Typo( | ||||||
|  |                 Typo { | ||||||
|  |                     typo_count: 0, | ||||||
|  |                     max_typo_count: 2, | ||||||
|  |                 }, | ||||||
|  |             ), | ||||||
|  |         ], | ||||||
|  |     ] | ||||||
|  |     "###); | ||||||
|  |  | ||||||
|  |     // Do ONE loop iteration. Not much can be deduced, almost everyone matched the words first bucket. | ||||||
|  |     search.time_budget(TimeBudget::max().with_stop_after(1)); | ||||||
|  |  | ||||||
|  |     let result = search.execute().unwrap(); | ||||||
|  |     snapshot!(format!("IDs: {:?}\nScores: {}\nScore Details:\n{:#?}", result.documents_ids, result.document_scores.iter().map(|scores| format!("{:.4} ", ScoreDetails::global_score(scores.iter()))).collect::<String>(), result.document_scores), @r###" | ||||||
|  |     IDs: [0, 1, 4, 2] | ||||||
|  |     Scores: 0.6667 0.6667 0.6667 0.0000  | ||||||
|  |     Score Details: | ||||||
|  |     [ | ||||||
|  |         [ | ||||||
|  |             Words( | ||||||
|  |                 Words { | ||||||
|  |                     matching_words: 3, | ||||||
|  |                     max_matching_words: 3, | ||||||
|  |                 }, | ||||||
|  |             ), | ||||||
|  |             Skipped, | ||||||
|  |         ], | ||||||
|  |         [ | ||||||
|  |             Words( | ||||||
|  |                 Words { | ||||||
|  |                     matching_words: 3, | ||||||
|  |                     max_matching_words: 3, | ||||||
|  |                 }, | ||||||
|  |             ), | ||||||
|  |             Skipped, | ||||||
|  |         ], | ||||||
|  |         [ | ||||||
|  |             Words( | ||||||
|  |                 Words { | ||||||
|  |                     matching_words: 3, | ||||||
|  |                     max_matching_words: 3, | ||||||
|  |                 }, | ||||||
|  |             ), | ||||||
|  |             Skipped, | ||||||
|  |         ], | ||||||
|  |         [ | ||||||
|  |             Skipped, | ||||||
|  |         ], | ||||||
|  |     ] | ||||||
|  |     "###); | ||||||
|  |  | ||||||
|  |     // Do TWO loop iterations. The first document should be entirely sorted | ||||||
|  |     search.time_budget(TimeBudget::max().with_stop_after(2)); | ||||||
|  |  | ||||||
|  |     let result = search.execute().unwrap(); | ||||||
|  |     snapshot!(format!("IDs: {:?}\nScores: {}\nScore Details:\n{:#?}", result.documents_ids, result.document_scores.iter().map(|scores| format!("{:.4} ", ScoreDetails::global_score(scores.iter()))).collect::<String>(), result.document_scores), @r###" | ||||||
|  |     IDs: [4, 0, 1, 2] | ||||||
|  |     Scores: 1.0000 0.6667 0.6667 0.0000  | ||||||
|  |     Score Details: | ||||||
|  |     [ | ||||||
|  |         [ | ||||||
|  |             Words( | ||||||
|  |                 Words { | ||||||
|  |                     matching_words: 3, | ||||||
|  |                     max_matching_words: 3, | ||||||
|  |                 }, | ||||||
|  |             ), | ||||||
|  |             Typo( | ||||||
|  |                 Typo { | ||||||
|  |                     typo_count: 0, | ||||||
|  |                     max_typo_count: 3, | ||||||
|  |                 }, | ||||||
|  |             ), | ||||||
|  |         ], | ||||||
|  |         [ | ||||||
|  |             Words( | ||||||
|  |                 Words { | ||||||
|  |                     matching_words: 3, | ||||||
|  |                     max_matching_words: 3, | ||||||
|  |                 }, | ||||||
|  |             ), | ||||||
|  |             Skipped, | ||||||
|  |         ], | ||||||
|  |         [ | ||||||
|  |             Words( | ||||||
|  |                 Words { | ||||||
|  |                     matching_words: 3, | ||||||
|  |                     max_matching_words: 3, | ||||||
|  |                 }, | ||||||
|  |             ), | ||||||
|  |             Skipped, | ||||||
|  |         ], | ||||||
|  |         [ | ||||||
|  |             Skipped, | ||||||
|  |         ], | ||||||
|  |     ] | ||||||
|  |     "###); | ||||||
|  |  | ||||||
|  |     // Do THREE loop iterations. The second document should be entirely sorted as well | ||||||
|  |     search.time_budget(TimeBudget::max().with_stop_after(3)); | ||||||
|  |  | ||||||
|  |     let result = search.execute().unwrap(); | ||||||
|  |     snapshot!(format!("IDs: {:?}\nScores: {}\nScore Details:\n{:#?}", result.documents_ids, result.document_scores.iter().map(|scores| format!("{:.4} ", ScoreDetails::global_score(scores.iter()))).collect::<String>(), result.document_scores), @r###" | ||||||
|  |     IDs: [4, 1, 0, 2] | ||||||
|  |     Scores: 1.0000 0.9167 0.6667 0.0000  | ||||||
|  |     Score Details: | ||||||
|  |     [ | ||||||
|  |         [ | ||||||
|  |             Words( | ||||||
|  |                 Words { | ||||||
|  |                     matching_words: 3, | ||||||
|  |                     max_matching_words: 3, | ||||||
|  |                 }, | ||||||
|  |             ), | ||||||
|  |             Typo( | ||||||
|  |                 Typo { | ||||||
|  |                     typo_count: 0, | ||||||
|  |                     max_typo_count: 3, | ||||||
|  |                 }, | ||||||
|  |             ), | ||||||
|  |         ], | ||||||
|  |         [ | ||||||
|  |             Words( | ||||||
|  |                 Words { | ||||||
|  |                     matching_words: 3, | ||||||
|  |                     max_matching_words: 3, | ||||||
|  |                 }, | ||||||
|  |             ), | ||||||
|  |             Typo( | ||||||
|  |                 Typo { | ||||||
|  |                     typo_count: 1, | ||||||
|  |                     max_typo_count: 3, | ||||||
|  |                 }, | ||||||
|  |             ), | ||||||
|  |         ], | ||||||
|  |         [ | ||||||
|  |             Words( | ||||||
|  |                 Words { | ||||||
|  |                     matching_words: 3, | ||||||
|  |                     max_matching_words: 3, | ||||||
|  |                 }, | ||||||
|  |             ), | ||||||
|  |             Skipped, | ||||||
|  |         ], | ||||||
|  |         [ | ||||||
|  |             Skipped, | ||||||
|  |         ], | ||||||
|  |     ] | ||||||
|  |     "###); | ||||||
|  |  | ||||||
|  |     // Do FOUR loop iterations. The third document should be entirely sorted as well | ||||||
|  |     // The words bucket have still not progressed thus the last document doesn't have any info yet. | ||||||
|  |     search.time_budget(TimeBudget::max().with_stop_after(4)); | ||||||
|  |  | ||||||
|  |     let result = search.execute().unwrap(); | ||||||
|  |     snapshot!(format!("IDs: {:?}\nScores: {}\nScore Details:\n{:#?}", result.documents_ids, result.document_scores.iter().map(|scores| format!("{:.4} ", ScoreDetails::global_score(scores.iter()))).collect::<String>(), result.document_scores), @r###" | ||||||
|  |     IDs: [4, 1, 0, 2] | ||||||
|  |     Scores: 1.0000 0.9167 0.8333 0.0000  | ||||||
|  |     Score Details: | ||||||
|  |     [ | ||||||
|  |         [ | ||||||
|  |             Words( | ||||||
|  |                 Words { | ||||||
|  |                     matching_words: 3, | ||||||
|  |                     max_matching_words: 3, | ||||||
|  |                 }, | ||||||
|  |             ), | ||||||
|  |             Typo( | ||||||
|  |                 Typo { | ||||||
|  |                     typo_count: 0, | ||||||
|  |                     max_typo_count: 3, | ||||||
|  |                 }, | ||||||
|  |             ), | ||||||
|  |         ], | ||||||
|  |         [ | ||||||
|  |             Words( | ||||||
|  |                 Words { | ||||||
|  |                     matching_words: 3, | ||||||
|  |                     max_matching_words: 3, | ||||||
|  |                 }, | ||||||
|  |             ), | ||||||
|  |             Typo( | ||||||
|  |                 Typo { | ||||||
|  |                     typo_count: 1, | ||||||
|  |                     max_typo_count: 3, | ||||||
|  |                 }, | ||||||
|  |             ), | ||||||
|  |         ], | ||||||
|  |         [ | ||||||
|  |             Words( | ||||||
|  |                 Words { | ||||||
|  |                     matching_words: 3, | ||||||
|  |                     max_matching_words: 3, | ||||||
|  |                 }, | ||||||
|  |             ), | ||||||
|  |             Typo( | ||||||
|  |                 Typo { | ||||||
|  |                     typo_count: 2, | ||||||
|  |                     max_typo_count: 3, | ||||||
|  |                 }, | ||||||
|  |             ), | ||||||
|  |         ], | ||||||
|  |         [ | ||||||
|  |             Skipped, | ||||||
|  |         ], | ||||||
|  |     ] | ||||||
|  |     "###); | ||||||
|  |  | ||||||
|  |     // After SIX loop iteration. The words ranking rule gave us a new bucket. | ||||||
|  |     // Since we reached the limit we were able to early exit without checking the typo ranking rule. | ||||||
|  |     search.time_budget(TimeBudget::max().with_stop_after(6)); | ||||||
|  |  | ||||||
|  |     let result = search.execute().unwrap(); | ||||||
|  |     snapshot!(format!("IDs: {:?}\nScores: {}\nScore Details:\n{:#?}", result.documents_ids, result.document_scores.iter().map(|scores| format!("{:.4} ", ScoreDetails::global_score(scores.iter()))).collect::<String>(), result.document_scores), @r###" | ||||||
|  |     IDs: [4, 1, 0, 3] | ||||||
|  |     Scores: 1.0000 0.9167 0.8333 0.3333  | ||||||
|  |     Score Details: | ||||||
|  |     [ | ||||||
|  |         [ | ||||||
|  |             Words( | ||||||
|  |                 Words { | ||||||
|  |                     matching_words: 3, | ||||||
|  |                     max_matching_words: 3, | ||||||
|  |                 }, | ||||||
|  |             ), | ||||||
|  |             Typo( | ||||||
|  |                 Typo { | ||||||
|  |                     typo_count: 0, | ||||||
|  |                     max_typo_count: 3, | ||||||
|  |                 }, | ||||||
|  |             ), | ||||||
|  |         ], | ||||||
|  |         [ | ||||||
|  |             Words( | ||||||
|  |                 Words { | ||||||
|  |                     matching_words: 3, | ||||||
|  |                     max_matching_words: 3, | ||||||
|  |                 }, | ||||||
|  |             ), | ||||||
|  |             Typo( | ||||||
|  |                 Typo { | ||||||
|  |                     typo_count: 1, | ||||||
|  |                     max_typo_count: 3, | ||||||
|  |                 }, | ||||||
|  |             ), | ||||||
|  |         ], | ||||||
|  |         [ | ||||||
|  |             Words( | ||||||
|  |                 Words { | ||||||
|  |                     matching_words: 3, | ||||||
|  |                     max_matching_words: 3, | ||||||
|  |                 }, | ||||||
|  |             ), | ||||||
|  |             Typo( | ||||||
|  |                 Typo { | ||||||
|  |                     typo_count: 2, | ||||||
|  |                     max_typo_count: 3, | ||||||
|  |                 }, | ||||||
|  |             ), | ||||||
|  |         ], | ||||||
|  |         [ | ||||||
|  |             Words( | ||||||
|  |                 Words { | ||||||
|  |                     matching_words: 2, | ||||||
|  |                     max_matching_words: 3, | ||||||
|  |                 }, | ||||||
|  |             ), | ||||||
|  |             Skipped, | ||||||
|  |         ], | ||||||
|  |     ] | ||||||
|  |     "###); | ||||||
|  | } | ||||||
| @@ -1,5 +1,6 @@ | |||||||
| pub mod attribute_fid; | pub mod attribute_fid; | ||||||
| pub mod attribute_position; | pub mod attribute_position; | ||||||
|  | pub mod cutoff; | ||||||
| pub mod distinct; | pub mod distinct; | ||||||
| pub mod exactness; | pub mod exactness; | ||||||
| pub mod geo_sort; | pub mod geo_sort; | ||||||
|   | |||||||
| @@ -150,6 +150,7 @@ pub struct Settings<'a, 't, 'i> { | |||||||
|     pagination_max_total_hits: Setting<usize>, |     pagination_max_total_hits: Setting<usize>, | ||||||
|     proximity_precision: Setting<ProximityPrecision>, |     proximity_precision: Setting<ProximityPrecision>, | ||||||
|     embedder_settings: Setting<BTreeMap<String, Setting<EmbeddingSettings>>>, |     embedder_settings: Setting<BTreeMap<String, Setting<EmbeddingSettings>>>, | ||||||
|  |     search_cutoff: Setting<u64>, | ||||||
| } | } | ||||||
|  |  | ||||||
| impl<'a, 't, 'i> Settings<'a, 't, 'i> { | impl<'a, 't, 'i> Settings<'a, 't, 'i> { | ||||||
| @@ -183,6 +184,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { | |||||||
|             pagination_max_total_hits: Setting::NotSet, |             pagination_max_total_hits: Setting::NotSet, | ||||||
|             proximity_precision: Setting::NotSet, |             proximity_precision: Setting::NotSet, | ||||||
|             embedder_settings: Setting::NotSet, |             embedder_settings: Setting::NotSet, | ||||||
|  |             search_cutoff: Setting::NotSet, | ||||||
|             indexer_config, |             indexer_config, | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| @@ -373,6 +375,14 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { | |||||||
|         self.embedder_settings = Setting::Reset; |         self.embedder_settings = Setting::Reset; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     pub fn set_search_cutoff(&mut self, value: u64) { | ||||||
|  |         self.search_cutoff = Setting::Set(value); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub fn reset_search_cutoff(&mut self) { | ||||||
|  |         self.search_cutoff = Setting::Reset; | ||||||
|  |     } | ||||||
|  |  | ||||||
|     #[tracing::instrument( |     #[tracing::instrument( | ||||||
|         level = "trace" |         level = "trace" | ||||||
|         skip(self, progress_callback, should_abort, old_fields_ids_map), |         skip(self, progress_callback, should_abort, old_fields_ids_map), | ||||||
| @@ -1026,6 +1036,24 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { | |||||||
|         Ok(update) |         Ok(update) | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     fn update_search_cutoff(&mut self) -> Result<bool> { | ||||||
|  |         let changed = match self.search_cutoff { | ||||||
|  |             Setting::Set(new) => { | ||||||
|  |                 let old = self.index.search_cutoff(self.wtxn)?; | ||||||
|  |                 if old == Some(new) { | ||||||
|  |                     false | ||||||
|  |                 } else { | ||||||
|  |                     self.index.put_search_cutoff(self.wtxn, new)?; | ||||||
|  |                     true | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |             Setting::Reset => self.index.delete_search_cutoff(self.wtxn)?, | ||||||
|  |             Setting::NotSet => false, | ||||||
|  |         }; | ||||||
|  |  | ||||||
|  |         Ok(changed) | ||||||
|  |     } | ||||||
|  |  | ||||||
|     pub fn execute<FP, FA>(mut self, progress_callback: FP, should_abort: FA) -> Result<()> |     pub fn execute<FP, FA>(mut self, progress_callback: FP, should_abort: FA) -> Result<()> | ||||||
|     where |     where | ||||||
|         FP: Fn(UpdateIndexingStep) + Sync, |         FP: Fn(UpdateIndexingStep) + Sync, | ||||||
| @@ -1079,6 +1107,9 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { | |||||||
|         // 3. Keep the old vectors but reattempt indexing on a prompt change: only actually changed prompt will need embedding + storage |         // 3. Keep the old vectors but reattempt indexing on a prompt change: only actually changed prompt will need embedding + storage | ||||||
|         let embedding_configs_updated = self.update_embedding_configs()?; |         let embedding_configs_updated = self.update_embedding_configs()?; | ||||||
|  |  | ||||||
|  |         // never trigger re-indexing | ||||||
|  |         self.update_search_cutoff()?; | ||||||
|  |  | ||||||
|         if stop_words_updated |         if stop_words_updated | ||||||
|             || non_separator_tokens_updated |             || non_separator_tokens_updated | ||||||
|             || separator_tokens_updated |             || separator_tokens_updated | ||||||
| @@ -2035,6 +2066,7 @@ mod tests { | |||||||
|                     pagination_max_total_hits, |                     pagination_max_total_hits, | ||||||
|                     proximity_precision, |                     proximity_precision, | ||||||
|                     embedder_settings, |                     embedder_settings, | ||||||
|  |                     search_cutoff, | ||||||
|                 } = settings; |                 } = settings; | ||||||
|                 assert!(matches!(searchable_fields, Setting::NotSet)); |                 assert!(matches!(searchable_fields, Setting::NotSet)); | ||||||
|                 assert!(matches!(displayed_fields, Setting::NotSet)); |                 assert!(matches!(displayed_fields, Setting::NotSet)); | ||||||
| @@ -2058,6 +2090,7 @@ mod tests { | |||||||
|                 assert!(matches!(pagination_max_total_hits, Setting::NotSet)); |                 assert!(matches!(pagination_max_total_hits, Setting::NotSet)); | ||||||
|                 assert!(matches!(proximity_precision, Setting::NotSet)); |                 assert!(matches!(proximity_precision, Setting::NotSet)); | ||||||
|                 assert!(matches!(embedder_settings, Setting::NotSet)); |                 assert!(matches!(embedder_settings, Setting::NotSet)); | ||||||
|  |                 assert!(matches!(search_cutoff, Setting::NotSet)); | ||||||
|             }) |             }) | ||||||
|             .unwrap(); |             .unwrap(); | ||||||
|     } |     } | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user