mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-23 03:56:28 +00:00 
			
		
		
		
	Merge #4313
4313: Fix document formatting performances r=Kerollmops a=ManyTheFish reduce the formatted option list to the attributes that should be formatted, instead of all the attributes to display. The time to compute the `format` list scales with the number of fields to format; cumulated with `map_leaf_values` that iterates over all the nested fields, it gives a quadratic complexity: `d*f` where `d` is the total number of fields to display and `f` is the total number of fields to format. Co-authored-by: ManyTheFish <many@meilisearch.com>
This commit is contained in:
		| @@ -900,6 +900,14 @@ fn format_fields<'a>( | ||||
|     let mut matches_position = compute_matches.then(BTreeMap::new); | ||||
|     let mut document = document.clone(); | ||||
|  | ||||
|     // reduce the formatted option list to the attributes that should be formatted, | ||||
|     // instead of all the attributes to display. | ||||
|     let formatting_fields_options: Vec<_> = formatted_options | ||||
|         .iter() | ||||
|         .filter(|(_, option)| option.should_format()) | ||||
|         .map(|(fid, option)| (field_ids_map.name(*fid).unwrap(), option)) | ||||
|         .collect(); | ||||
|  | ||||
|     // select the attributes to retrieve | ||||
|     let displayable_names = | ||||
|         displayable_ids.iter().map(|&fid| field_ids_map.name(fid).expect("Missing field name")); | ||||
| @@ -908,13 +916,15 @@ fn format_fields<'a>( | ||||
|         // to the value and merge them together. eg. If a user said he wanted to highlight `doggo` | ||||
|         // and crop `doggo.name`. `doggo.name` needs to be highlighted + cropped while `doggo.age` is only | ||||
|         // highlighted. | ||||
|         let format = formatted_options | ||||
|         // Warn: The time to compute the format list scales with the number of fields to format; | ||||
|         // cumulated with map_leaf_values that iterates over all the nested fields, it gives a quadratic complexity: | ||||
|         // d*f where d is the total number of fields to display and f is the total number of fields to format. | ||||
|         let format = formatting_fields_options | ||||
|             .iter() | ||||
|             .filter(|(field, _option)| { | ||||
|                 let name = field_ids_map.name(**field).unwrap(); | ||||
|             .filter(|(name, _option)| { | ||||
|                 milli::is_faceted_by(name, key) || milli::is_faceted_by(key, name) | ||||
|             }) | ||||
|             .map(|(_, option)| *option) | ||||
|             .map(|(_, option)| **option) | ||||
|             .reduce(|acc, option| acc.merge(option)); | ||||
|         let mut infos = Vec::new(); | ||||
|  | ||||
| @@ -1011,7 +1021,7 @@ fn format_value<'a>( | ||||
|                     let value = matcher.format(format_options); | ||||
|                     Value::String(value.into_owned()) | ||||
|                 } | ||||
|                 None => Value::Number(number), | ||||
|                 None => Value::String(s), | ||||
|             } | ||||
|         } | ||||
|         value => value, | ||||
|   | ||||
| @@ -15,6 +15,7 @@ pub struct BucketSortOutput { | ||||
|  | ||||
| // TODO: would probably be good to regroup some of these inside of a struct? | ||||
| #[allow(clippy::too_many_arguments)] | ||||
| #[logging_timer::time] | ||||
| pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>( | ||||
|     ctx: &mut SearchContext<'ctx>, | ||||
|     mut ranking_rules: Vec<BoxRankingRule<'ctx, Q>>, | ||||
|   | ||||
| @@ -72,7 +72,7 @@ impl<'m> MatcherBuilder<'m> { | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[derive(Copy, Clone, Default)] | ||||
| #[derive(Copy, Clone, Default, Debug)] | ||||
| pub struct FormatOptions { | ||||
|     pub highlight: bool, | ||||
|     pub crop: Option<usize>, | ||||
| @@ -82,6 +82,10 @@ impl FormatOptions { | ||||
|     pub fn merge(self, other: Self) -> Self { | ||||
|         Self { highlight: self.highlight || other.highlight, crop: self.crop.or(other.crop) } | ||||
|     } | ||||
|  | ||||
|     pub fn should_format(&self) -> bool { | ||||
|         self.highlight || self.crop.is_some() | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[derive(Clone, Debug)] | ||||
|   | ||||
| @@ -191,6 +191,7 @@ fn resolve_maximally_reduced_query_graph( | ||||
|     Ok(docids) | ||||
| } | ||||
|  | ||||
| #[logging_timer::time] | ||||
| fn resolve_universe( | ||||
|     ctx: &mut SearchContext, | ||||
|     initial_universe: &RoaringBitmap, | ||||
| @@ -556,6 +557,7 @@ pub fn execute_vector_search( | ||||
| } | ||||
|  | ||||
| #[allow(clippy::too_many_arguments)] | ||||
| #[logging_timer::time] | ||||
| pub fn execute_search( | ||||
|     ctx: &mut SearchContext, | ||||
|     query: Option<&str>, | ||||
|   | ||||
| @@ -5,6 +5,7 @@ use super::*; | ||||
| use crate::{Result, SearchContext, MAX_WORD_LENGTH}; | ||||
|  | ||||
| /// Convert the tokenised search query into a list of located query terms. | ||||
| #[logging_timer::time] | ||||
| pub fn located_query_terms_from_tokens( | ||||
|     ctx: &mut SearchContext, | ||||
|     query: NormalizedTokenIter, | ||||
|   | ||||
		Reference in New Issue
	
	Block a user