mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-11-04 01:46:28 +00:00 
			
		
		
		
	search: introduce federated search
This commit is contained in:
		@@ -31,6 +31,9 @@ use serde_json::{json, Value};
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
use crate::error::MeilisearchHttpError;
 | 
					use crate::error::MeilisearchHttpError;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					mod federated;
 | 
				
			||||||
 | 
					pub use federated::{perform_federated_search, FederatedSearch, Federation, FederationOptions};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
type MatchesPosition = BTreeMap<String, Vec<MatchBounds>>;
 | 
					type MatchesPosition = BTreeMap<String, Vec<MatchBounds>>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
pub const DEFAULT_SEARCH_OFFSET: fn() -> usize = || 0;
 | 
					pub const DEFAULT_SEARCH_OFFSET: fn() -> usize = || 0;
 | 
				
			||||||
@@ -360,7 +363,7 @@ impl SearchQuery {
 | 
				
			|||||||
    }
 | 
					    }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/// A `SearchQuery` + an index UID.
 | 
					/// A `SearchQuery` + an index UID and optional FederationOptions.
 | 
				
			||||||
// This struct contains the fields of `SearchQuery` inline.
 | 
					// This struct contains the fields of `SearchQuery` inline.
 | 
				
			||||||
// This is because neither deserr nor serde support `flatten` when using `deny_unknown_fields.
 | 
					// This is because neither deserr nor serde support `flatten` when using `deny_unknown_fields.
 | 
				
			||||||
// The `From<SearchQueryWithIndex>` implementation ensures both structs remain up to date.
 | 
					// The `From<SearchQueryWithIndex>` implementation ensures both structs remain up to date.
 | 
				
			||||||
@@ -375,10 +378,10 @@ pub struct SearchQueryWithIndex {
 | 
				
			|||||||
    pub vector: Option<Vec<f32>>,
 | 
					    pub vector: Option<Vec<f32>>,
 | 
				
			||||||
    #[deserr(default, error = DeserrJsonError<InvalidHybridQuery>)]
 | 
					    #[deserr(default, error = DeserrJsonError<InvalidHybridQuery>)]
 | 
				
			||||||
    pub hybrid: Option<HybridQuery>,
 | 
					    pub hybrid: Option<HybridQuery>,
 | 
				
			||||||
    #[deserr(default = DEFAULT_SEARCH_OFFSET(), error = DeserrJsonError<InvalidSearchOffset>)]
 | 
					    #[deserr(default, error = DeserrJsonError<InvalidSearchOffset>)]
 | 
				
			||||||
    pub offset: usize,
 | 
					    pub offset: Option<usize>,
 | 
				
			||||||
    #[deserr(default = DEFAULT_SEARCH_LIMIT(), error = DeserrJsonError<InvalidSearchLimit>)]
 | 
					    #[deserr(default, error = DeserrJsonError<InvalidSearchLimit>)]
 | 
				
			||||||
    pub limit: usize,
 | 
					    pub limit: Option<usize>,
 | 
				
			||||||
    #[deserr(default, error = DeserrJsonError<InvalidSearchPage>)]
 | 
					    #[deserr(default, error = DeserrJsonError<InvalidSearchPage>)]
 | 
				
			||||||
    pub page: Option<usize>,
 | 
					    pub page: Option<usize>,
 | 
				
			||||||
    #[deserr(default, error = DeserrJsonError<InvalidSearchHitsPerPage>)]
 | 
					    #[deserr(default, error = DeserrJsonError<InvalidSearchHitsPerPage>)]
 | 
				
			||||||
@@ -419,12 +422,33 @@ pub struct SearchQueryWithIndex {
 | 
				
			|||||||
    pub attributes_to_search_on: Option<Vec<String>>,
 | 
					    pub attributes_to_search_on: Option<Vec<String>>,
 | 
				
			||||||
    #[deserr(default, error = DeserrJsonError<InvalidSearchRankingScoreThreshold>, default)]
 | 
					    #[deserr(default, error = DeserrJsonError<InvalidSearchRankingScoreThreshold>, default)]
 | 
				
			||||||
    pub ranking_score_threshold: Option<RankingScoreThreshold>,
 | 
					    pub ranking_score_threshold: Option<RankingScoreThreshold>,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    #[deserr(default)]
 | 
				
			||||||
 | 
					    pub federation_options: Option<FederationOptions>,
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
impl SearchQueryWithIndex {
 | 
					impl SearchQueryWithIndex {
 | 
				
			||||||
    pub fn into_index_query(self) -> (IndexUid, SearchQuery) {
 | 
					    pub fn has_federation_options(&self) -> bool {
 | 
				
			||||||
 | 
					        self.federation_options.is_some()
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    pub fn has_pagination(&self) -> Option<&'static str> {
 | 
				
			||||||
 | 
					        if self.offset.is_some() {
 | 
				
			||||||
 | 
					            Some("offset")
 | 
				
			||||||
 | 
					        } else if self.limit.is_some() {
 | 
				
			||||||
 | 
					            Some("limit")
 | 
				
			||||||
 | 
					        } else if self.page.is_some() {
 | 
				
			||||||
 | 
					            Some("page")
 | 
				
			||||||
 | 
					        } else if self.hits_per_page.is_some() {
 | 
				
			||||||
 | 
					            Some("hitsPerPage")
 | 
				
			||||||
 | 
					        } else {
 | 
				
			||||||
 | 
					            None
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    pub fn into_index_query_federation(self) -> (IndexUid, SearchQuery, Option<FederationOptions>) {
 | 
				
			||||||
        let SearchQueryWithIndex {
 | 
					        let SearchQueryWithIndex {
 | 
				
			||||||
            index_uid,
 | 
					            index_uid,
 | 
				
			||||||
 | 
					            federation_options,
 | 
				
			||||||
            q,
 | 
					            q,
 | 
				
			||||||
            vector,
 | 
					            vector,
 | 
				
			||||||
            offset,
 | 
					            offset,
 | 
				
			||||||
@@ -456,8 +480,8 @@ impl SearchQueryWithIndex {
 | 
				
			|||||||
            SearchQuery {
 | 
					            SearchQuery {
 | 
				
			||||||
                q,
 | 
					                q,
 | 
				
			||||||
                vector,
 | 
					                vector,
 | 
				
			||||||
                offset,
 | 
					                offset: offset.unwrap_or(DEFAULT_SEARCH_OFFSET()),
 | 
				
			||||||
                limit,
 | 
					                limit: limit.unwrap_or(DEFAULT_SEARCH_LIMIT()),
 | 
				
			||||||
                page,
 | 
					                page,
 | 
				
			||||||
                hits_per_page,
 | 
					                hits_per_page,
 | 
				
			||||||
                attributes_to_retrieve,
 | 
					                attributes_to_retrieve,
 | 
				
			||||||
@@ -482,6 +506,7 @@ impl SearchQueryWithIndex {
 | 
				
			|||||||
                // do not use ..Default::default() here,
 | 
					                // do not use ..Default::default() here,
 | 
				
			||||||
                // rather add any missing field from `SearchQuery` to `SearchQueryWithIndex`
 | 
					                // rather add any missing field from `SearchQuery` to `SearchQueryWithIndex`
 | 
				
			||||||
            },
 | 
					            },
 | 
				
			||||||
 | 
					            federation_options,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 
 | 
				
			|||||||
							
								
								
									
										633
									
								
								meilisearch/src/search/federated.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										633
									
								
								meilisearch/src/search/federated.rs
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,633 @@
 | 
				
			|||||||
 | 
					use std::cmp::Ordering;
 | 
				
			||||||
 | 
					use std::collections::BTreeMap;
 | 
				
			||||||
 | 
					use std::fmt;
 | 
				
			||||||
 | 
					use std::iter::Zip;
 | 
				
			||||||
 | 
					use std::rc::Rc;
 | 
				
			||||||
 | 
					use std::str::FromStr as _;
 | 
				
			||||||
 | 
					use std::time::Duration;
 | 
				
			||||||
 | 
					use std::vec::{IntoIter, Vec};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					use actix_http::StatusCode;
 | 
				
			||||||
 | 
					use index_scheduler::{IndexScheduler, RoFeatures};
 | 
				
			||||||
 | 
					use meilisearch_types::deserr::DeserrJsonError;
 | 
				
			||||||
 | 
					use meilisearch_types::error::deserr_codes::{
 | 
				
			||||||
 | 
					    InvalidMultiSearchWeight, InvalidSearchLimit, InvalidSearchOffset,
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					use meilisearch_types::error::ResponseError;
 | 
				
			||||||
 | 
					use meilisearch_types::milli::score_details::{ScoreDetails, ScoreValue};
 | 
				
			||||||
 | 
					use meilisearch_types::milli::{self, DocumentId, TimeBudget};
 | 
				
			||||||
 | 
					use roaring::RoaringBitmap;
 | 
				
			||||||
 | 
					use serde::Serialize;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					use self::ranking_rules::RankingRules;
 | 
				
			||||||
 | 
					use super::{
 | 
				
			||||||
 | 
					    prepare_search, AttributesFormat, HitMaker, HitsInfo, RetrieveVectors, SearchHit, SearchKind,
 | 
				
			||||||
 | 
					    SearchQuery, SearchQueryWithIndex,
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					use crate::error::MeilisearchHttpError;
 | 
				
			||||||
 | 
					use crate::routes::indexes::search::search_kind;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					mod ranking_rules;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					pub const DEFAULT_FEDERATED_WEIGHT: fn() -> f64 = || 1.0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#[derive(Debug, Default, Clone, Copy, PartialEq, deserr::Deserr)]
 | 
				
			||||||
 | 
					#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
 | 
				
			||||||
 | 
					pub struct FederationOptions {
 | 
				
			||||||
 | 
					    #[deserr(default, error = DeserrJsonError<InvalidMultiSearchWeight>)]
 | 
				
			||||||
 | 
					    pub weight: Weight,
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#[derive(Debug, Clone, Copy, PartialEq, deserr::Deserr)]
 | 
				
			||||||
 | 
					#[deserr(try_from(f64) = TryFrom::try_from -> InvalidMultiSearchWeight)]
 | 
				
			||||||
 | 
					pub struct Weight(f64);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					impl Default for Weight {
 | 
				
			||||||
 | 
					    fn default() -> Self {
 | 
				
			||||||
 | 
					        Weight(DEFAULT_FEDERATED_WEIGHT())
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					impl std::convert::TryFrom<f64> for Weight {
 | 
				
			||||||
 | 
					    type Error = InvalidMultiSearchWeight;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    fn try_from(f: f64) -> Result<Self, Self::Error> {
 | 
				
			||||||
 | 
					        // the suggested "fix" is: `!(0.0..=1.0).contains(&f)`` which is allegedly less readable
 | 
				
			||||||
 | 
					        #[allow(clippy::manual_range_contains)]
 | 
				
			||||||
 | 
					        if f < 0.0 {
 | 
				
			||||||
 | 
					            Err(InvalidMultiSearchWeight)
 | 
				
			||||||
 | 
					        } else {
 | 
				
			||||||
 | 
					            Ok(Weight(f))
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					impl std::ops::Deref for Weight {
 | 
				
			||||||
 | 
					    type Target = f64;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    fn deref(&self) -> &Self::Target {
 | 
				
			||||||
 | 
					        &self.0
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#[derive(Debug, deserr::Deserr)]
 | 
				
			||||||
 | 
					#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
 | 
				
			||||||
 | 
					pub struct Federation {
 | 
				
			||||||
 | 
					    #[deserr(default = super::DEFAULT_SEARCH_LIMIT(), error = DeserrJsonError<InvalidSearchLimit>)]
 | 
				
			||||||
 | 
					    pub limit: usize,
 | 
				
			||||||
 | 
					    #[deserr(default = super::DEFAULT_SEARCH_OFFSET(), error = DeserrJsonError<InvalidSearchOffset>)]
 | 
				
			||||||
 | 
					    pub offset: usize,
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#[derive(Debug, deserr::Deserr)]
 | 
				
			||||||
 | 
					#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
 | 
				
			||||||
 | 
					pub struct FederatedSearch {
 | 
				
			||||||
 | 
					    pub queries: Vec<SearchQueryWithIndex>,
 | 
				
			||||||
 | 
					    #[deserr(default)]
 | 
				
			||||||
 | 
					    pub federation: Option<Federation>,
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					#[derive(Serialize, Clone, PartialEq)]
 | 
				
			||||||
 | 
					#[serde(rename_all = "camelCase")]
 | 
				
			||||||
 | 
					pub struct FederatedSearchResult {
 | 
				
			||||||
 | 
					    pub hits: Vec<SearchHit>,
 | 
				
			||||||
 | 
					    pub processing_time_ms: u128,
 | 
				
			||||||
 | 
					    #[serde(flatten)]
 | 
				
			||||||
 | 
					    pub hits_info: HitsInfo,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    #[serde(skip_serializing_if = "Option::is_none")]
 | 
				
			||||||
 | 
					    pub semantic_hit_count: Option<u32>,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // These fields are only used for analytics purposes
 | 
				
			||||||
 | 
					    #[serde(skip)]
 | 
				
			||||||
 | 
					    pub degraded: bool,
 | 
				
			||||||
 | 
					    #[serde(skip)]
 | 
				
			||||||
 | 
					    pub used_negative_operator: bool,
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					impl fmt::Debug for FederatedSearchResult {
 | 
				
			||||||
 | 
					    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
 | 
				
			||||||
 | 
					        let FederatedSearchResult {
 | 
				
			||||||
 | 
					            hits,
 | 
				
			||||||
 | 
					            processing_time_ms,
 | 
				
			||||||
 | 
					            hits_info,
 | 
				
			||||||
 | 
					            semantic_hit_count,
 | 
				
			||||||
 | 
					            degraded,
 | 
				
			||||||
 | 
					            used_negative_operator,
 | 
				
			||||||
 | 
					        } = self;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        let mut debug = f.debug_struct("SearchResult");
 | 
				
			||||||
 | 
					        // The most important thing when looking at a search result is the time it took to process
 | 
				
			||||||
 | 
					        debug.field("processing_time_ms", &processing_time_ms);
 | 
				
			||||||
 | 
					        debug.field("hits", &format!("[{} hits returned]", hits.len()));
 | 
				
			||||||
 | 
					        debug.field("hits_info", &hits_info);
 | 
				
			||||||
 | 
					        if *used_negative_operator {
 | 
				
			||||||
 | 
					            debug.field("used_negative_operator", used_negative_operator);
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        if *degraded {
 | 
				
			||||||
 | 
					            debug.field("degraded", degraded);
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        if let Some(semantic_hit_count) = semantic_hit_count {
 | 
				
			||||||
 | 
					            debug.field("semantic_hit_count", &semantic_hit_count);
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        debug.finish()
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					struct WeightedScore<'a> {
 | 
				
			||||||
 | 
					    details: &'a [ScoreDetails],
 | 
				
			||||||
 | 
					    weight: f64,
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					impl<'a> WeightedScore<'a> {
 | 
				
			||||||
 | 
					    pub fn new(details: &'a [ScoreDetails], weight: f64) -> Self {
 | 
				
			||||||
 | 
					        Self { details, weight }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    pub fn weighted_global_score(&self) -> f64 {
 | 
				
			||||||
 | 
					        ScoreDetails::global_score(self.details.iter()) * self.weight
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    pub fn compare_weighted_global_scores(&self, other: &Self) -> Ordering {
 | 
				
			||||||
 | 
					        self.weighted_global_score()
 | 
				
			||||||
 | 
					            .partial_cmp(&other.weighted_global_score())
 | 
				
			||||||
 | 
					            // both are numbers, possibly infinite
 | 
				
			||||||
 | 
					            .unwrap()
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    pub fn compare(&self, other: &Self) -> Ordering {
 | 
				
			||||||
 | 
					        let mut left_it = ScoreDetails::score_values(self.details.iter());
 | 
				
			||||||
 | 
					        let mut right_it = ScoreDetails::score_values(other.details.iter());
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        loop {
 | 
				
			||||||
 | 
					            let left = left_it.next();
 | 
				
			||||||
 | 
					            let right = right_it.next();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            match (left, right) {
 | 
				
			||||||
 | 
					                (None, None) => return Ordering::Equal,
 | 
				
			||||||
 | 
					                (None, Some(_)) => return Ordering::Less,
 | 
				
			||||||
 | 
					                (Some(_), None) => return Ordering::Greater,
 | 
				
			||||||
 | 
					                (Some(ScoreValue::Score(left)), Some(ScoreValue::Score(right))) => {
 | 
				
			||||||
 | 
					                    let left = left * self.weight;
 | 
				
			||||||
 | 
					                    let right = right * other.weight;
 | 
				
			||||||
 | 
					                    if (left - right).abs() <= f64::EPSILON {
 | 
				
			||||||
 | 
					                        continue;
 | 
				
			||||||
 | 
					                    }
 | 
				
			||||||
 | 
					                    return left.partial_cmp(&right).unwrap();
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					                (Some(ScoreValue::Sort(left)), Some(ScoreValue::Sort(right))) => {
 | 
				
			||||||
 | 
					                    match left.partial_cmp(right) {
 | 
				
			||||||
 | 
					                        Some(Ordering::Equal) => continue,
 | 
				
			||||||
 | 
					                        Some(order) => return order,
 | 
				
			||||||
 | 
					                        None => return self.compare_weighted_global_scores(other),
 | 
				
			||||||
 | 
					                    }
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					                (Some(ScoreValue::GeoSort(left)), Some(ScoreValue::GeoSort(right))) => {
 | 
				
			||||||
 | 
					                    match left.partial_cmp(right) {
 | 
				
			||||||
 | 
					                        Some(Ordering::Equal) => continue,
 | 
				
			||||||
 | 
					                        Some(order) => return order,
 | 
				
			||||||
 | 
					                        None => {
 | 
				
			||||||
 | 
					                            return self.compare_weighted_global_scores(other);
 | 
				
			||||||
 | 
					                        }
 | 
				
			||||||
 | 
					                    }
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					                // not comparable details, use global
 | 
				
			||||||
 | 
					                (Some(ScoreValue::Score(_)), Some(_))
 | 
				
			||||||
 | 
					                | (Some(_), Some(ScoreValue::Score(_)))
 | 
				
			||||||
 | 
					                | (Some(ScoreValue::GeoSort(_)), Some(ScoreValue::Sort(_)))
 | 
				
			||||||
 | 
					                | (Some(ScoreValue::Sort(_)), Some(ScoreValue::GeoSort(_))) => {
 | 
				
			||||||
 | 
					                    let left_count = left_it.count();
 | 
				
			||||||
 | 
					                    let right_count = right_it.count();
 | 
				
			||||||
 | 
					                    // compare how many remaining groups of rules each side has.
 | 
				
			||||||
 | 
					                    // the group with the most remaining groups wins.
 | 
				
			||||||
 | 
					                    return left_count
 | 
				
			||||||
 | 
					                        .cmp(&right_count)
 | 
				
			||||||
 | 
					                        // breaks ties with the global ranking score
 | 
				
			||||||
 | 
					                        .then_with(|| self.compare_weighted_global_scores(other));
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					struct QueryByIndex {
 | 
				
			||||||
 | 
					    query: SearchQuery,
 | 
				
			||||||
 | 
					    federation_options: FederationOptions,
 | 
				
			||||||
 | 
					    query_index: usize,
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					struct SearchResultByQuery<'a> {
 | 
				
			||||||
 | 
					    documents_ids: Vec<DocumentId>,
 | 
				
			||||||
 | 
					    document_scores: Vec<Vec<ScoreDetails>>,
 | 
				
			||||||
 | 
					    federation_options: FederationOptions,
 | 
				
			||||||
 | 
					    hit_maker: HitMaker<'a>,
 | 
				
			||||||
 | 
					    query_index: usize,
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					struct SearchResultByQueryIter<'a> {
 | 
				
			||||||
 | 
					    it: Zip<IntoIter<DocumentId>, IntoIter<Vec<ScoreDetails>>>,
 | 
				
			||||||
 | 
					    federation_options: FederationOptions,
 | 
				
			||||||
 | 
					    hit_maker: Rc<HitMaker<'a>>,
 | 
				
			||||||
 | 
					    query_index: usize,
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					impl<'a> SearchResultByQueryIter<'a> {
 | 
				
			||||||
 | 
					    fn new(
 | 
				
			||||||
 | 
					        SearchResultByQuery {
 | 
				
			||||||
 | 
					            documents_ids,
 | 
				
			||||||
 | 
					            document_scores,
 | 
				
			||||||
 | 
					            federation_options,
 | 
				
			||||||
 | 
					            hit_maker,
 | 
				
			||||||
 | 
					            query_index,
 | 
				
			||||||
 | 
					        }: SearchResultByQuery<'a>,
 | 
				
			||||||
 | 
					    ) -> Self {
 | 
				
			||||||
 | 
					        let it = documents_ids.into_iter().zip(document_scores);
 | 
				
			||||||
 | 
					        Self { it, federation_options, hit_maker: Rc::new(hit_maker), query_index }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					struct SearchResultByQueryIterItem<'a> {
 | 
				
			||||||
 | 
					    docid: DocumentId,
 | 
				
			||||||
 | 
					    score: Vec<ScoreDetails>,
 | 
				
			||||||
 | 
					    federation_options: FederationOptions,
 | 
				
			||||||
 | 
					    hit_maker: Rc<HitMaker<'a>>,
 | 
				
			||||||
 | 
					    query_index: usize,
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					fn merge_index_local_results(
 | 
				
			||||||
 | 
					    results_by_query: Vec<SearchResultByQuery<'_>>,
 | 
				
			||||||
 | 
					) -> impl Iterator<Item = SearchResultByQueryIterItem> + '_ {
 | 
				
			||||||
 | 
					    itertools::kmerge_by(
 | 
				
			||||||
 | 
					        results_by_query.into_iter().map(SearchResultByQueryIter::new),
 | 
				
			||||||
 | 
					        |left: &SearchResultByQueryIterItem, right: &SearchResultByQueryIterItem| {
 | 
				
			||||||
 | 
					            let left_score = WeightedScore::new(&left.score, *left.federation_options.weight);
 | 
				
			||||||
 | 
					            let right_score = WeightedScore::new(&right.score, *right.federation_options.weight);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            match left_score.compare(&right_score) {
 | 
				
			||||||
 | 
					                // the biggest score goes first
 | 
				
			||||||
 | 
					                Ordering::Greater => true,
 | 
				
			||||||
 | 
					                // break ties using query index
 | 
				
			||||||
 | 
					                Ordering::Equal => left.query_index < right.query_index,
 | 
				
			||||||
 | 
					                Ordering::Less => false,
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					        },
 | 
				
			||||||
 | 
					    )
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					fn merge_index_global_results(
 | 
				
			||||||
 | 
					    results_by_index: Vec<SearchResultByIndex>,
 | 
				
			||||||
 | 
					) -> impl Iterator<Item = SearchHitByIndex> {
 | 
				
			||||||
 | 
					    itertools::kmerge_by(
 | 
				
			||||||
 | 
					        results_by_index.into_iter().map(|result_by_index| result_by_index.hits.into_iter()),
 | 
				
			||||||
 | 
					        |left: &SearchHitByIndex, right: &SearchHitByIndex| {
 | 
				
			||||||
 | 
					            let left_score = WeightedScore::new(&left.score, *left.federation_options.weight);
 | 
				
			||||||
 | 
					            let right_score = WeightedScore::new(&right.score, *right.federation_options.weight);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            match left_score.compare(&right_score) {
 | 
				
			||||||
 | 
					                // the biggest score goes first
 | 
				
			||||||
 | 
					                Ordering::Greater => true,
 | 
				
			||||||
 | 
					                // break ties using query index
 | 
				
			||||||
 | 
					                Ordering::Equal => left.query_index < right.query_index,
 | 
				
			||||||
 | 
					                Ordering::Less => false,
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					        },
 | 
				
			||||||
 | 
					    )
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					impl<'a> Iterator for SearchResultByQueryIter<'a> {
 | 
				
			||||||
 | 
					    type Item = SearchResultByQueryIterItem<'a>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    fn next(&mut self) -> Option<Self::Item> {
 | 
				
			||||||
 | 
					        let (docid, score) = self.it.next()?;
 | 
				
			||||||
 | 
					        Some(SearchResultByQueryIterItem {
 | 
				
			||||||
 | 
					            docid,
 | 
				
			||||||
 | 
					            score,
 | 
				
			||||||
 | 
					            federation_options: self.federation_options,
 | 
				
			||||||
 | 
					            hit_maker: Rc::clone(&self.hit_maker),
 | 
				
			||||||
 | 
					            query_index: self.query_index,
 | 
				
			||||||
 | 
					        })
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					struct SearchHitByIndex {
 | 
				
			||||||
 | 
					    hit: SearchHit,
 | 
				
			||||||
 | 
					    score: Vec<ScoreDetails>,
 | 
				
			||||||
 | 
					    federation_options: FederationOptions,
 | 
				
			||||||
 | 
					    query_index: usize,
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					struct SearchResultByIndex {
 | 
				
			||||||
 | 
					    hits: Vec<SearchHitByIndex>,
 | 
				
			||||||
 | 
					    candidates: RoaringBitmap,
 | 
				
			||||||
 | 
					    degraded: bool,
 | 
				
			||||||
 | 
					    used_negative_operator: bool,
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					pub fn perform_federated_search(
 | 
				
			||||||
 | 
					    index_scheduler: &IndexScheduler,
 | 
				
			||||||
 | 
					    queries: Vec<SearchQueryWithIndex>,
 | 
				
			||||||
 | 
					    federation: Federation,
 | 
				
			||||||
 | 
					    features: RoFeatures,
 | 
				
			||||||
 | 
					) -> Result<FederatedSearchResult, ResponseError> {
 | 
				
			||||||
 | 
					    let before_search = std::time::Instant::now();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // this implementation partition the queries by index to guarantee an important property:
 | 
				
			||||||
 | 
					    // - all the queries to a particular index use the same read transaction.
 | 
				
			||||||
 | 
					    // This is an important property, otherwise we cannot guarantee the self-consistency of the results.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // 1. partition queries by index
 | 
				
			||||||
 | 
					    let mut queries_by_index: BTreeMap<String, Vec<QueryByIndex>> = Default::default();
 | 
				
			||||||
 | 
					    for (query_index, federated_query) in queries.into_iter().enumerate() {
 | 
				
			||||||
 | 
					        if let Some(pagination_field) = federated_query.has_pagination() {
 | 
				
			||||||
 | 
					            return Err(MeilisearchHttpError::PaginationInFederatedQuery(
 | 
				
			||||||
 | 
					                query_index,
 | 
				
			||||||
 | 
					                pagination_field,
 | 
				
			||||||
 | 
					            )
 | 
				
			||||||
 | 
					            .into());
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        let (index_uid, query, federation_options) = federated_query.into_index_query_federation();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        queries_by_index.entry(index_uid.into_inner()).or_default().push(QueryByIndex {
 | 
				
			||||||
 | 
					            query,
 | 
				
			||||||
 | 
					            federation_options: federation_options.unwrap_or_default(),
 | 
				
			||||||
 | 
					            query_index,
 | 
				
			||||||
 | 
					        })
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // 2. perform queries, merge and make hits index by index
 | 
				
			||||||
 | 
					    let required_hit_count = federation.limit + federation.offset;
 | 
				
			||||||
 | 
					    // In step (2), semantic_hit_count will be set to Some(0) if any search kind uses semantic
 | 
				
			||||||
 | 
					    // Then in step (3), we'll update its value if there is any semantic search
 | 
				
			||||||
 | 
					    let mut semantic_hit_count = None;
 | 
				
			||||||
 | 
					    let mut results_by_index = Vec::with_capacity(queries_by_index.len());
 | 
				
			||||||
 | 
					    let mut previous_query_data: Option<(RankingRules, usize, String)> = None;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for (index_uid, queries) in queries_by_index {
 | 
				
			||||||
 | 
					        let index = match index_scheduler.index(&index_uid) {
 | 
				
			||||||
 | 
					            Ok(index) => index,
 | 
				
			||||||
 | 
					            Err(err) => {
 | 
				
			||||||
 | 
					                let mut err = ResponseError::from(err);
 | 
				
			||||||
 | 
					                // Patch the HTTP status code to 400 as it defaults to 404 for `index_not_found`, but
 | 
				
			||||||
 | 
					                // here the resource not found is not part of the URL.
 | 
				
			||||||
 | 
					                err.code = StatusCode::BAD_REQUEST;
 | 
				
			||||||
 | 
					                if let Some(query) = queries.first() {
 | 
				
			||||||
 | 
					                    err.message =
 | 
				
			||||||
 | 
					                        format!("Inside `.queries[{}]`: {}", query.query_index, err.message);
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					                return Err(err);
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					        };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        // Important: this is the only transaction we'll use for this index during this federated search
 | 
				
			||||||
 | 
					        let rtxn = index.read_txn()?;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        let criteria = index.criteria(&rtxn)?;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        // stuff we need for the hitmaker
 | 
				
			||||||
 | 
					        let script_lang_map = index.script_language(&rtxn)?;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        let dictionary = index.dictionary(&rtxn)?;
 | 
				
			||||||
 | 
					        let dictionary: Option<Vec<_>> =
 | 
				
			||||||
 | 
					            dictionary.as_ref().map(|x| x.iter().map(String::as_str).collect());
 | 
				
			||||||
 | 
					        let separators = index.allowed_separators(&rtxn)?;
 | 
				
			||||||
 | 
					        let separators: Option<Vec<_>> =
 | 
				
			||||||
 | 
					            separators.as_ref().map(|x| x.iter().map(String::as_str).collect());
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        // each query gets its individual cutoff
 | 
				
			||||||
 | 
					        let cutoff = index.search_cutoff(&rtxn)?;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        let mut degraded = false;
 | 
				
			||||||
 | 
					        let mut used_negative_operator = false;
 | 
				
			||||||
 | 
					        let mut candidates = RoaringBitmap::new();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        // 2.1. Compute all candidates for each query in the index
 | 
				
			||||||
 | 
					        let mut results_by_query = Vec::with_capacity(queries.len());
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        for QueryByIndex { query, federation_options, query_index } in queries {
 | 
				
			||||||
 | 
					            // use an immediately invoked lambda to capture the result without returning from the function
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            let res: Result<(), ResponseError> = (|| {
 | 
				
			||||||
 | 
					                let search_kind = search_kind(&query, index_scheduler, &index, features)?;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                let canonicalization_kind = match (&search_kind, &query.q) {
 | 
				
			||||||
 | 
					                    (SearchKind::SemanticOnly { .. }, _) => {
 | 
				
			||||||
 | 
					                        ranking_rules::CanonicalizationKind::Vector
 | 
				
			||||||
 | 
					                    }
 | 
				
			||||||
 | 
					                    (_, Some(q)) if !q.is_empty() => ranking_rules::CanonicalizationKind::Keyword,
 | 
				
			||||||
 | 
					                    _ => ranking_rules::CanonicalizationKind::Placeholder,
 | 
				
			||||||
 | 
					                };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                let sort = if let Some(sort) = &query.sort {
 | 
				
			||||||
 | 
					                    let sorts: Vec<_> =
 | 
				
			||||||
 | 
					                        match sort.iter().map(|s| milli::AscDesc::from_str(s)).collect() {
 | 
				
			||||||
 | 
					                            Ok(sorts) => sorts,
 | 
				
			||||||
 | 
					                            Err(asc_desc_error) => {
 | 
				
			||||||
 | 
					                                return Err(milli::Error::from(milli::SortError::from(
 | 
				
			||||||
 | 
					                                    asc_desc_error,
 | 
				
			||||||
 | 
					                                ))
 | 
				
			||||||
 | 
					                                .into())
 | 
				
			||||||
 | 
					                            }
 | 
				
			||||||
 | 
					                        };
 | 
				
			||||||
 | 
					                    Some(sorts)
 | 
				
			||||||
 | 
					                } else {
 | 
				
			||||||
 | 
					                    None
 | 
				
			||||||
 | 
					                };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                let ranking_rules = ranking_rules::RankingRules::new(
 | 
				
			||||||
 | 
					                    criteria.clone(),
 | 
				
			||||||
 | 
					                    sort,
 | 
				
			||||||
 | 
					                    query.matching_strategy.into(),
 | 
				
			||||||
 | 
					                    canonicalization_kind,
 | 
				
			||||||
 | 
					                );
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                if let Some((previous_ranking_rules, previous_query_index, previous_index_uid)) =
 | 
				
			||||||
 | 
					                    previous_query_data.take()
 | 
				
			||||||
 | 
					                {
 | 
				
			||||||
 | 
					                    if let Err(error) = ranking_rules.is_compatible_with(&previous_ranking_rules) {
 | 
				
			||||||
 | 
					                        return Err(error.to_response_error(
 | 
				
			||||||
 | 
					                            &ranking_rules,
 | 
				
			||||||
 | 
					                            &previous_ranking_rules,
 | 
				
			||||||
 | 
					                            query_index,
 | 
				
			||||||
 | 
					                            previous_query_index,
 | 
				
			||||||
 | 
					                            &index_uid,
 | 
				
			||||||
 | 
					                            &previous_index_uid,
 | 
				
			||||||
 | 
					                        ));
 | 
				
			||||||
 | 
					                    }
 | 
				
			||||||
 | 
					                    previous_query_data = if previous_ranking_rules.constraint_count()
 | 
				
			||||||
 | 
					                        > ranking_rules.constraint_count()
 | 
				
			||||||
 | 
					                    {
 | 
				
			||||||
 | 
					                        Some((previous_ranking_rules, previous_query_index, previous_index_uid))
 | 
				
			||||||
 | 
					                    } else {
 | 
				
			||||||
 | 
					                        Some((ranking_rules, query_index, index_uid.clone()))
 | 
				
			||||||
 | 
					                    };
 | 
				
			||||||
 | 
					                } else {
 | 
				
			||||||
 | 
					                    previous_query_data = Some((ranking_rules, query_index, index_uid.clone()));
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                match search_kind {
 | 
				
			||||||
 | 
					                    SearchKind::KeywordOnly => {}
 | 
				
			||||||
 | 
					                    _ => semantic_hit_count = Some(0),
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                let retrieve_vectors = RetrieveVectors::new(query.retrieve_vectors, features)?;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                let time_budget = match cutoff {
 | 
				
			||||||
 | 
					                    Some(cutoff) => TimeBudget::new(Duration::from_millis(cutoff)),
 | 
				
			||||||
 | 
					                    None => TimeBudget::default(),
 | 
				
			||||||
 | 
					                };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                let (mut search, _is_finite_pagination, _max_total_hits, _offset) =
 | 
				
			||||||
 | 
					                    prepare_search(&index, &rtxn, &query, &search_kind, time_budget)?;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                search.scoring_strategy(milli::score_details::ScoringStrategy::Detailed);
 | 
				
			||||||
 | 
					                search.offset(0);
 | 
				
			||||||
 | 
					                search.limit(required_hit_count);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                let (result, _semantic_hit_count) = super::search_from_kind(search_kind, search)?;
 | 
				
			||||||
 | 
					                let format = AttributesFormat {
 | 
				
			||||||
 | 
					                    attributes_to_retrieve: query.attributes_to_retrieve,
 | 
				
			||||||
 | 
					                    retrieve_vectors,
 | 
				
			||||||
 | 
					                    attributes_to_highlight: query.attributes_to_highlight,
 | 
				
			||||||
 | 
					                    attributes_to_crop: query.attributes_to_crop,
 | 
				
			||||||
 | 
					                    crop_length: query.crop_length,
 | 
				
			||||||
 | 
					                    crop_marker: query.crop_marker,
 | 
				
			||||||
 | 
					                    highlight_pre_tag: query.highlight_pre_tag,
 | 
				
			||||||
 | 
					                    highlight_post_tag: query.highlight_post_tag,
 | 
				
			||||||
 | 
					                    show_matches_position: query.show_matches_position,
 | 
				
			||||||
 | 
					                    sort: query.sort,
 | 
				
			||||||
 | 
					                    show_ranking_score: query.show_ranking_score,
 | 
				
			||||||
 | 
					                    show_ranking_score_details: query.show_ranking_score_details,
 | 
				
			||||||
 | 
					                };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                let milli::SearchResult {
 | 
				
			||||||
 | 
					                    matching_words,
 | 
				
			||||||
 | 
					                    candidates: query_candidates,
 | 
				
			||||||
 | 
					                    documents_ids,
 | 
				
			||||||
 | 
					                    document_scores,
 | 
				
			||||||
 | 
					                    degraded: query_degraded,
 | 
				
			||||||
 | 
					                    used_negative_operator: query_used_negative_operator,
 | 
				
			||||||
 | 
					                } = result;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                candidates |= query_candidates;
 | 
				
			||||||
 | 
					                degraded |= query_degraded;
 | 
				
			||||||
 | 
					                used_negative_operator |= query_used_negative_operator;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                let tokenizer = HitMaker::tokenizer(
 | 
				
			||||||
 | 
					                    &script_lang_map,
 | 
				
			||||||
 | 
					                    dictionary.as_deref(),
 | 
				
			||||||
 | 
					                    separators.as_deref(),
 | 
				
			||||||
 | 
					                );
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                let formatter_builder = HitMaker::formatter_builder(matching_words, tokenizer);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                let hit_maker = HitMaker::new(&index, &rtxn, format, formatter_builder)?;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                results_by_query.push(SearchResultByQuery {
 | 
				
			||||||
 | 
					                    federation_options,
 | 
				
			||||||
 | 
					                    hit_maker,
 | 
				
			||||||
 | 
					                    query_index,
 | 
				
			||||||
 | 
					                    documents_ids,
 | 
				
			||||||
 | 
					                    document_scores,
 | 
				
			||||||
 | 
					                });
 | 
				
			||||||
 | 
					                Ok(())
 | 
				
			||||||
 | 
					            })();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            if let Err(mut error) = res {
 | 
				
			||||||
 | 
					                error.message = format!("Inside `.queries[{query_index}]`: {}", error.message);
 | 
				
			||||||
 | 
					                return Err(error);
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        // 2.2. merge inside index
 | 
				
			||||||
 | 
					        let mut documents_seen = RoaringBitmap::new();
 | 
				
			||||||
 | 
					        let merged_result: Result<Vec<_>, ResponseError> =
 | 
				
			||||||
 | 
					            merge_index_local_results(results_by_query)
 | 
				
			||||||
 | 
					                // skip documents we've already seen & mark that we saw the current document
 | 
				
			||||||
 | 
					                .filter(|SearchResultByQueryIterItem { docid, .. }| documents_seen.insert(*docid))
 | 
				
			||||||
 | 
					                .take(required_hit_count)
 | 
				
			||||||
 | 
					                // 2.3 make hits
 | 
				
			||||||
 | 
					                .map(
 | 
				
			||||||
 | 
					                    |SearchResultByQueryIterItem {
 | 
				
			||||||
 | 
					                         docid,
 | 
				
			||||||
 | 
					                         score,
 | 
				
			||||||
 | 
					                         federation_options,
 | 
				
			||||||
 | 
					                         hit_maker,
 | 
				
			||||||
 | 
					                         query_index,
 | 
				
			||||||
 | 
					                     }| {
 | 
				
			||||||
 | 
					                        let mut hit = hit_maker.make_hit(docid, &score)?;
 | 
				
			||||||
 | 
					                        let weighted_score =
 | 
				
			||||||
 | 
					                            ScoreDetails::global_score(score.iter()) * (*federation_options.weight);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                        let _federation = serde_json::json!(
 | 
				
			||||||
 | 
					                            {
 | 
				
			||||||
 | 
					                                "indexUid": index_uid,
 | 
				
			||||||
 | 
					                                "queriesPosition": query_index,
 | 
				
			||||||
 | 
					                                "weightedRankingScore": weighted_score,
 | 
				
			||||||
 | 
					                            }
 | 
				
			||||||
 | 
					                        );
 | 
				
			||||||
 | 
					                        hit.document.insert("_federation".to_string(), _federation);
 | 
				
			||||||
 | 
					                        Ok(SearchHitByIndex { hit, score, federation_options, query_index })
 | 
				
			||||||
 | 
					                    },
 | 
				
			||||||
 | 
					                )
 | 
				
			||||||
 | 
					                .collect();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        let merged_result = merged_result?;
 | 
				
			||||||
 | 
					        results_by_index.push(SearchResultByIndex {
 | 
				
			||||||
 | 
					            hits: merged_result,
 | 
				
			||||||
 | 
					            candidates,
 | 
				
			||||||
 | 
					            degraded,
 | 
				
			||||||
 | 
					            used_negative_operator,
 | 
				
			||||||
 | 
					        });
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // 3. merge hits and metadata across indexes
 | 
				
			||||||
 | 
					    // 3.1 merge metadata
 | 
				
			||||||
 | 
					    let (estimated_total_hits, degraded, used_negative_operator) = {
 | 
				
			||||||
 | 
					        let mut estimated_total_hits = 0;
 | 
				
			||||||
 | 
					        let mut degraded = false;
 | 
				
			||||||
 | 
					        let mut used_negative_operator = false;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        for SearchResultByIndex {
 | 
				
			||||||
 | 
					            hits: _,
 | 
				
			||||||
 | 
					            candidates,
 | 
				
			||||||
 | 
					            degraded: degraded_by_index,
 | 
				
			||||||
 | 
					            used_negative_operator: used_negative_operator_by_index,
 | 
				
			||||||
 | 
					        } in &results_by_index
 | 
				
			||||||
 | 
					        {
 | 
				
			||||||
 | 
					            estimated_total_hits += candidates.len() as usize;
 | 
				
			||||||
 | 
					            degraded |= *degraded_by_index;
 | 
				
			||||||
 | 
					            used_negative_operator |= *used_negative_operator_by_index;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        (estimated_total_hits, degraded, used_negative_operator)
 | 
				
			||||||
 | 
					    };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // 3.2 merge hits
 | 
				
			||||||
 | 
					    let merged_hits: Vec<_> = merge_index_global_results(results_by_index)
 | 
				
			||||||
 | 
					        .skip(federation.offset)
 | 
				
			||||||
 | 
					        .take(federation.limit)
 | 
				
			||||||
 | 
					        .inspect(|hit| {
 | 
				
			||||||
 | 
					            if let Some(semantic_hit_count) = &mut semantic_hit_count {
 | 
				
			||||||
 | 
					                if hit.score.iter().any(|score| matches!(&score, ScoreDetails::Vector(_))) {
 | 
				
			||||||
 | 
					                    *semantic_hit_count += 1;
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					        })
 | 
				
			||||||
 | 
					        .map(|hit| hit.hit)
 | 
				
			||||||
 | 
					        .collect();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    let search_result = FederatedSearchResult {
 | 
				
			||||||
 | 
					        hits: merged_hits,
 | 
				
			||||||
 | 
					        processing_time_ms: before_search.elapsed().as_millis(),
 | 
				
			||||||
 | 
					        hits_info: HitsInfo::OffsetLimit {
 | 
				
			||||||
 | 
					            limit: federation.limit,
 | 
				
			||||||
 | 
					            offset: federation.offset,
 | 
				
			||||||
 | 
					            estimated_total_hits,
 | 
				
			||||||
 | 
					        },
 | 
				
			||||||
 | 
					        semantic_hit_count,
 | 
				
			||||||
 | 
					        degraded,
 | 
				
			||||||
 | 
					        used_negative_operator,
 | 
				
			||||||
 | 
					    };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Ok(search_result)
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
							
								
								
									
										823
									
								
								meilisearch/src/search/federated/ranking_rules.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										823
									
								
								meilisearch/src/search/federated/ranking_rules.rs
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,823 @@
 | 
				
			|||||||
 | 
					use std::collections::HashMap;
 | 
				
			||||||
 | 
					use std::fmt::Write;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					use itertools::Itertools as _;
 | 
				
			||||||
 | 
					use meilisearch_types::error::{Code, ResponseError};
 | 
				
			||||||
 | 
					use meilisearch_types::milli::{AscDesc, Criterion, Member, TermsMatchingStrategy};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					pub struct RankingRules {
 | 
				
			||||||
 | 
					    canonical_criteria: Vec<Criterion>,
 | 
				
			||||||
 | 
					    canonical_sort: Option<Vec<AscDesc>>,
 | 
				
			||||||
 | 
					    canonicalization_actions: Vec<CanonicalizationAction>,
 | 
				
			||||||
 | 
					    source_criteria: Vec<Criterion>,
 | 
				
			||||||
 | 
					    source_sort: Option<Vec<AscDesc>>,
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					pub enum CanonicalizationAction {
 | 
				
			||||||
 | 
					    PrependedWords {
 | 
				
			||||||
 | 
					        prepended_index: RankingRuleSource,
 | 
				
			||||||
 | 
					    },
 | 
				
			||||||
 | 
					    RemovedDuplicate {
 | 
				
			||||||
 | 
					        earlier_occurrence: RankingRuleSource,
 | 
				
			||||||
 | 
					        removed_occurrence: RankingRuleSource,
 | 
				
			||||||
 | 
					    },
 | 
				
			||||||
 | 
					    RemovedWords {
 | 
				
			||||||
 | 
					        reason: RemoveWords,
 | 
				
			||||||
 | 
					        removed_occurrence: RankingRuleSource,
 | 
				
			||||||
 | 
					    },
 | 
				
			||||||
 | 
					    RemovedPlaceholder {
 | 
				
			||||||
 | 
					        removed_occurrence: RankingRuleSource,
 | 
				
			||||||
 | 
					    },
 | 
				
			||||||
 | 
					    TruncatedVector {
 | 
				
			||||||
 | 
					        vector_rule: RankingRuleSource,
 | 
				
			||||||
 | 
					        truncated_from: RankingRuleSource,
 | 
				
			||||||
 | 
					    },
 | 
				
			||||||
 | 
					    RemovedVector {
 | 
				
			||||||
 | 
					        vector_rule: RankingRuleSource,
 | 
				
			||||||
 | 
					        removed_occurrence: RankingRuleSource,
 | 
				
			||||||
 | 
					    },
 | 
				
			||||||
 | 
					    RemovedSort {
 | 
				
			||||||
 | 
					        removed_occurrence: RankingRuleSource,
 | 
				
			||||||
 | 
					    },
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					pub enum RemoveWords {
 | 
				
			||||||
 | 
					    WasPrepended,
 | 
				
			||||||
 | 
					    MatchingStrategyAll,
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					impl std::fmt::Display for RemoveWords {
 | 
				
			||||||
 | 
					    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
 | 
				
			||||||
 | 
					        let reason = match self {
 | 
				
			||||||
 | 
					            RemoveWords::WasPrepended => "it was previously prepended",
 | 
				
			||||||
 | 
					            RemoveWords::MatchingStrategyAll => "`query.matchingWords` is set to `all`",
 | 
				
			||||||
 | 
					        };
 | 
				
			||||||
 | 
					        f.write_str(reason)
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					pub enum CanonicalizationKind {
 | 
				
			||||||
 | 
					    Placeholder,
 | 
				
			||||||
 | 
					    Keyword,
 | 
				
			||||||
 | 
					    Vector,
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					pub struct CompatibilityError {
 | 
				
			||||||
 | 
					    previous: RankingRule,
 | 
				
			||||||
 | 
					    current: RankingRule,
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					impl CompatibilityError {
 | 
				
			||||||
 | 
					    pub(crate) fn to_response_error(
 | 
				
			||||||
 | 
					        &self,
 | 
				
			||||||
 | 
					        ranking_rules: &RankingRules,
 | 
				
			||||||
 | 
					        previous_ranking_rules: &RankingRules,
 | 
				
			||||||
 | 
					        query_index: usize,
 | 
				
			||||||
 | 
					        previous_query_index: usize,
 | 
				
			||||||
 | 
					        index_uid: &str,
 | 
				
			||||||
 | 
					        previous_index_uid: &str,
 | 
				
			||||||
 | 
					    ) -> meilisearch_types::error::ResponseError {
 | 
				
			||||||
 | 
					        let rule = self.current.as_string(
 | 
				
			||||||
 | 
					            &ranking_rules.canonical_criteria,
 | 
				
			||||||
 | 
					            &ranking_rules.canonical_sort,
 | 
				
			||||||
 | 
					            query_index,
 | 
				
			||||||
 | 
					            index_uid,
 | 
				
			||||||
 | 
					        );
 | 
				
			||||||
 | 
					        let previous_rule = self.previous.as_string(
 | 
				
			||||||
 | 
					            &previous_ranking_rules.canonical_criteria,
 | 
				
			||||||
 | 
					            &previous_ranking_rules.canonical_sort,
 | 
				
			||||||
 | 
					            previous_query_index,
 | 
				
			||||||
 | 
					            previous_index_uid,
 | 
				
			||||||
 | 
					        );
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        let canonicalization_actions = ranking_rules.canonicalization_notes();
 | 
				
			||||||
 | 
					        let previous_canonicalization_actions = previous_ranking_rules.canonicalization_notes();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        let mut msg = String::new();
 | 
				
			||||||
 | 
					        let reason = self.reason();
 | 
				
			||||||
 | 
					        let _ = writeln!(
 | 
				
			||||||
 | 
					            &mut msg,
 | 
				
			||||||
 | 
					            "The results of queries #{previous_query_index} and #{query_index} are incompatible: "
 | 
				
			||||||
 | 
					        );
 | 
				
			||||||
 | 
					        let _ = writeln!(&mut msg, "  1. {previous_rule}");
 | 
				
			||||||
 | 
					        let _ = writeln!(&mut msg, "  2. {rule}");
 | 
				
			||||||
 | 
					        let _ = writeln!(&mut msg, "  - {reason}");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        if !previous_canonicalization_actions.is_empty() {
 | 
				
			||||||
 | 
					            let _ = write!(&mut msg, "  - note: The ranking rules of query #{previous_query_index} were modified during canonicalization:\n{previous_canonicalization_actions}");
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        if !canonicalization_actions.is_empty() {
 | 
				
			||||||
 | 
					            let _ = write!(&mut msg, "  - note: The ranking rules of query #{query_index} were modified during canonicalization:\n{canonicalization_actions}");
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        ResponseError::from_msg(msg, Code::InvalidMultiSearchQueryRankingRules)
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    pub fn reason(&self) -> &'static str {
 | 
				
			||||||
 | 
					        match (self.previous.kind, self.current.kind) {
 | 
				
			||||||
 | 
					            (RankingRuleKind::Relevancy, RankingRuleKind::AscendingSort)
 | 
				
			||||||
 | 
					            | (RankingRuleKind::Relevancy, RankingRuleKind::DescendingSort)
 | 
				
			||||||
 | 
					            | (RankingRuleKind::AscendingSort, RankingRuleKind::Relevancy)
 | 
				
			||||||
 | 
					            | (RankingRuleKind::DescendingSort, RankingRuleKind::Relevancy) => {
 | 
				
			||||||
 | 
					                "cannot compare a relevancy rule with a sort rule"
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            (RankingRuleKind::Relevancy, RankingRuleKind::AscendingGeoSort)
 | 
				
			||||||
 | 
					            | (RankingRuleKind::Relevancy, RankingRuleKind::DescendingGeoSort)
 | 
				
			||||||
 | 
					            | (RankingRuleKind::AscendingGeoSort, RankingRuleKind::Relevancy)
 | 
				
			||||||
 | 
					            | (RankingRuleKind::DescendingGeoSort, RankingRuleKind::Relevancy) => {
 | 
				
			||||||
 | 
					                "cannot compare a relevancy rule with a sort rule"
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            (RankingRuleKind::AscendingSort, RankingRuleKind::DescendingSort)
 | 
				
			||||||
 | 
					            | (RankingRuleKind::DescendingSort, RankingRuleKind::AscendingSort) => {
 | 
				
			||||||
 | 
					                "cannot compare two sort rules in opposite directions"
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            (RankingRuleKind::AscendingSort, RankingRuleKind::AscendingGeoSort)
 | 
				
			||||||
 | 
					            | (RankingRuleKind::AscendingSort, RankingRuleKind::DescendingGeoSort)
 | 
				
			||||||
 | 
					            | (RankingRuleKind::DescendingSort, RankingRuleKind::AscendingGeoSort)
 | 
				
			||||||
 | 
					            | (RankingRuleKind::DescendingSort, RankingRuleKind::DescendingGeoSort)
 | 
				
			||||||
 | 
					            | (RankingRuleKind::AscendingGeoSort, RankingRuleKind::AscendingSort)
 | 
				
			||||||
 | 
					            | (RankingRuleKind::AscendingGeoSort, RankingRuleKind::DescendingSort)
 | 
				
			||||||
 | 
					            | (RankingRuleKind::DescendingGeoSort, RankingRuleKind::AscendingSort)
 | 
				
			||||||
 | 
					            | (RankingRuleKind::DescendingGeoSort, RankingRuleKind::DescendingSort) => {
 | 
				
			||||||
 | 
					                "cannot compare a sort rule with a geosort rule"
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            (RankingRuleKind::AscendingGeoSort, RankingRuleKind::DescendingGeoSort)
 | 
				
			||||||
 | 
					            | (RankingRuleKind::DescendingGeoSort, RankingRuleKind::AscendingGeoSort) => {
 | 
				
			||||||
 | 
					                "cannot compare two geosort rules in opposite directions"
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            (RankingRuleKind::Relevancy, RankingRuleKind::Relevancy)
 | 
				
			||||||
 | 
					            | (RankingRuleKind::AscendingSort, RankingRuleKind::AscendingSort)
 | 
				
			||||||
 | 
					            | (RankingRuleKind::DescendingSort, RankingRuleKind::DescendingSort)
 | 
				
			||||||
 | 
					            | (RankingRuleKind::AscendingGeoSort, RankingRuleKind::AscendingGeoSort)
 | 
				
			||||||
 | 
					            | (RankingRuleKind::DescendingGeoSort, RankingRuleKind::DescendingGeoSort) => {
 | 
				
			||||||
 | 
					                "internal error, comparison should be possible"
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					impl RankingRules {
 | 
				
			||||||
 | 
					    pub fn new(
 | 
				
			||||||
 | 
					        criteria: Vec<Criterion>,
 | 
				
			||||||
 | 
					        sort: Option<Vec<AscDesc>>,
 | 
				
			||||||
 | 
					        terms_matching_strategy: TermsMatchingStrategy,
 | 
				
			||||||
 | 
					        canonicalization_kind: CanonicalizationKind,
 | 
				
			||||||
 | 
					    ) -> Self {
 | 
				
			||||||
 | 
					        let (canonical_criteria, canonical_sort, canonicalization_actions) =
 | 
				
			||||||
 | 
					            Self::canonicalize(&criteria, &sort, terms_matching_strategy, canonicalization_kind);
 | 
				
			||||||
 | 
					        Self {
 | 
				
			||||||
 | 
					            canonical_criteria,
 | 
				
			||||||
 | 
					            canonical_sort,
 | 
				
			||||||
 | 
					            canonicalization_actions,
 | 
				
			||||||
 | 
					            source_criteria: criteria,
 | 
				
			||||||
 | 
					            source_sort: sort,
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    fn canonicalize(
 | 
				
			||||||
 | 
					        criteria: &[Criterion],
 | 
				
			||||||
 | 
					        sort: &Option<Vec<AscDesc>>,
 | 
				
			||||||
 | 
					        terms_matching_strategy: TermsMatchingStrategy,
 | 
				
			||||||
 | 
					        canonicalization_kind: CanonicalizationKind,
 | 
				
			||||||
 | 
					    ) -> (Vec<Criterion>, Option<Vec<AscDesc>>, Vec<CanonicalizationAction>) {
 | 
				
			||||||
 | 
					        match canonicalization_kind {
 | 
				
			||||||
 | 
					            CanonicalizationKind::Placeholder => Self::canonicalize_placeholder(criteria, sort),
 | 
				
			||||||
 | 
					            CanonicalizationKind::Keyword => {
 | 
				
			||||||
 | 
					                Self::canonicalize_keyword(criteria, sort, terms_matching_strategy)
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            CanonicalizationKind::Vector => Self::canonicalize_vector(criteria, sort),
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    fn canonicalize_placeholder(
 | 
				
			||||||
 | 
					        criteria: &[Criterion],
 | 
				
			||||||
 | 
					        sort_query: &Option<Vec<AscDesc>>,
 | 
				
			||||||
 | 
					    ) -> (Vec<Criterion>, Option<Vec<AscDesc>>, Vec<CanonicalizationAction>) {
 | 
				
			||||||
 | 
					        let mut sort = None;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        let mut sorted_fields = HashMap::new();
 | 
				
			||||||
 | 
					        let mut canonicalization_actions = Vec::new();
 | 
				
			||||||
 | 
					        let mut canonical_criteria = Vec::new();
 | 
				
			||||||
 | 
					        let mut canonical_sort = None;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        for (criterion_index, criterion) in criteria.iter().enumerate() {
 | 
				
			||||||
 | 
					            match criterion.clone() {
 | 
				
			||||||
 | 
					                Criterion::Words
 | 
				
			||||||
 | 
					                | Criterion::Typo
 | 
				
			||||||
 | 
					                | Criterion::Proximity
 | 
				
			||||||
 | 
					                | Criterion::Attribute
 | 
				
			||||||
 | 
					                | Criterion::Exactness => {
 | 
				
			||||||
 | 
					                    canonicalization_actions.push(CanonicalizationAction::RemovedPlaceholder {
 | 
				
			||||||
 | 
					                        removed_occurrence: RankingRuleSource::Criterion(criterion_index),
 | 
				
			||||||
 | 
					                    })
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                Criterion::Sort => {
 | 
				
			||||||
 | 
					                    if let Some(previous_index) = sort {
 | 
				
			||||||
 | 
					                        canonicalization_actions.push(CanonicalizationAction::RemovedDuplicate {
 | 
				
			||||||
 | 
					                            earlier_occurrence: RankingRuleSource::Criterion(previous_index),
 | 
				
			||||||
 | 
					                            removed_occurrence: RankingRuleSource::Criterion(criterion_index),
 | 
				
			||||||
 | 
					                        });
 | 
				
			||||||
 | 
					                    } else if let Some(sort_query) = sort_query {
 | 
				
			||||||
 | 
					                        sort = Some(criterion_index);
 | 
				
			||||||
 | 
					                        canonical_criteria.push(criterion.clone());
 | 
				
			||||||
 | 
					                        canonical_sort = Some(canonicalize_sort(
 | 
				
			||||||
 | 
					                            &mut sorted_fields,
 | 
				
			||||||
 | 
					                            sort_query.as_slice(),
 | 
				
			||||||
 | 
					                            criterion_index,
 | 
				
			||||||
 | 
					                            &mut canonicalization_actions,
 | 
				
			||||||
 | 
					                        ));
 | 
				
			||||||
 | 
					                    } else {
 | 
				
			||||||
 | 
					                        canonicalization_actions.push(CanonicalizationAction::RemovedSort {
 | 
				
			||||||
 | 
					                            removed_occurrence: RankingRuleSource::Criterion(criterion_index),
 | 
				
			||||||
 | 
					                        })
 | 
				
			||||||
 | 
					                    }
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					                Criterion::Asc(s) | Criterion::Desc(s) => match sorted_fields.entry(s) {
 | 
				
			||||||
 | 
					                    std::collections::hash_map::Entry::Occupied(entry) => canonicalization_actions
 | 
				
			||||||
 | 
					                        .push(CanonicalizationAction::RemovedDuplicate {
 | 
				
			||||||
 | 
					                            earlier_occurrence: *entry.get(),
 | 
				
			||||||
 | 
					                            removed_occurrence: RankingRuleSource::Criterion(criterion_index),
 | 
				
			||||||
 | 
					                        }),
 | 
				
			||||||
 | 
					                    std::collections::hash_map::Entry::Vacant(entry) => {
 | 
				
			||||||
 | 
					                        entry.insert(RankingRuleSource::Criterion(criterion_index));
 | 
				
			||||||
 | 
					                        canonical_criteria.push(criterion.clone())
 | 
				
			||||||
 | 
					                    }
 | 
				
			||||||
 | 
					                },
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        (canonical_criteria, canonical_sort, canonicalization_actions)
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    fn canonicalize_vector(
 | 
				
			||||||
 | 
					        criteria: &[Criterion],
 | 
				
			||||||
 | 
					        sort_query: &Option<Vec<AscDesc>>,
 | 
				
			||||||
 | 
					    ) -> (Vec<Criterion>, Option<Vec<AscDesc>>, Vec<CanonicalizationAction>) {
 | 
				
			||||||
 | 
					        let mut sort = None;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        let mut sorted_fields = HashMap::new();
 | 
				
			||||||
 | 
					        let mut canonicalization_actions = Vec::new();
 | 
				
			||||||
 | 
					        let mut canonical_criteria = Vec::new();
 | 
				
			||||||
 | 
					        let mut canonical_sort = None;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        let mut vector = None;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        'criteria: for (criterion_index, criterion) in criteria.iter().enumerate() {
 | 
				
			||||||
 | 
					            match criterion.clone() {
 | 
				
			||||||
 | 
					                Criterion::Words
 | 
				
			||||||
 | 
					                | Criterion::Typo
 | 
				
			||||||
 | 
					                | Criterion::Proximity
 | 
				
			||||||
 | 
					                | Criterion::Attribute
 | 
				
			||||||
 | 
					                | Criterion::Exactness => match vector {
 | 
				
			||||||
 | 
					                    Some(previous_occurrence) => {
 | 
				
			||||||
 | 
					                        if sorted_fields.is_empty() {
 | 
				
			||||||
 | 
					                            canonicalization_actions.push(CanonicalizationAction::RemovedVector {
 | 
				
			||||||
 | 
					                                vector_rule: RankingRuleSource::Criterion(previous_occurrence),
 | 
				
			||||||
 | 
					                                removed_occurrence: RankingRuleSource::Criterion(criterion_index),
 | 
				
			||||||
 | 
					                            });
 | 
				
			||||||
 | 
					                        } else {
 | 
				
			||||||
 | 
					                            canonicalization_actions.push(
 | 
				
			||||||
 | 
					                                CanonicalizationAction::TruncatedVector {
 | 
				
			||||||
 | 
					                                    vector_rule: RankingRuleSource::Criterion(previous_occurrence),
 | 
				
			||||||
 | 
					                                    truncated_from: RankingRuleSource::Criterion(criterion_index),
 | 
				
			||||||
 | 
					                                },
 | 
				
			||||||
 | 
					                            );
 | 
				
			||||||
 | 
					                            break 'criteria;
 | 
				
			||||||
 | 
					                        }
 | 
				
			||||||
 | 
					                    }
 | 
				
			||||||
 | 
					                    None => {
 | 
				
			||||||
 | 
					                        canonical_criteria.push(criterion.clone());
 | 
				
			||||||
 | 
					                        vector = Some(criterion_index);
 | 
				
			||||||
 | 
					                    }
 | 
				
			||||||
 | 
					                },
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                Criterion::Sort => {
 | 
				
			||||||
 | 
					                    if let Some(previous_index) = sort {
 | 
				
			||||||
 | 
					                        canonicalization_actions.push(CanonicalizationAction::RemovedDuplicate {
 | 
				
			||||||
 | 
					                            earlier_occurrence: RankingRuleSource::Criterion(previous_index),
 | 
				
			||||||
 | 
					                            removed_occurrence: RankingRuleSource::Criterion(criterion_index),
 | 
				
			||||||
 | 
					                        });
 | 
				
			||||||
 | 
					                    } else if let Some(sort_query) = sort_query {
 | 
				
			||||||
 | 
					                        sort = Some(criterion_index);
 | 
				
			||||||
 | 
					                        canonical_criteria.push(criterion.clone());
 | 
				
			||||||
 | 
					                        canonical_sort = Some(canonicalize_sort(
 | 
				
			||||||
 | 
					                            &mut sorted_fields,
 | 
				
			||||||
 | 
					                            sort_query.as_slice(),
 | 
				
			||||||
 | 
					                            criterion_index,
 | 
				
			||||||
 | 
					                            &mut canonicalization_actions,
 | 
				
			||||||
 | 
					                        ));
 | 
				
			||||||
 | 
					                    } else {
 | 
				
			||||||
 | 
					                        canonicalization_actions.push(CanonicalizationAction::RemovedSort {
 | 
				
			||||||
 | 
					                            removed_occurrence: RankingRuleSource::Criterion(criterion_index),
 | 
				
			||||||
 | 
					                        })
 | 
				
			||||||
 | 
					                    }
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					                Criterion::Asc(s) | Criterion::Desc(s) => match sorted_fields.entry(s) {
 | 
				
			||||||
 | 
					                    std::collections::hash_map::Entry::Occupied(entry) => canonicalization_actions
 | 
				
			||||||
 | 
					                        .push(CanonicalizationAction::RemovedDuplicate {
 | 
				
			||||||
 | 
					                            earlier_occurrence: *entry.get(),
 | 
				
			||||||
 | 
					                            removed_occurrence: RankingRuleSource::Criterion(criterion_index),
 | 
				
			||||||
 | 
					                        }),
 | 
				
			||||||
 | 
					                    std::collections::hash_map::Entry::Vacant(entry) => {
 | 
				
			||||||
 | 
					                        entry.insert(RankingRuleSource::Criterion(criterion_index));
 | 
				
			||||||
 | 
					                        canonical_criteria.push(criterion.clone())
 | 
				
			||||||
 | 
					                    }
 | 
				
			||||||
 | 
					                },
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        (canonical_criteria, canonical_sort, canonicalization_actions)
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    fn canonicalize_keyword(
 | 
				
			||||||
 | 
					        criteria: &[Criterion],
 | 
				
			||||||
 | 
					        sort_query: &Option<Vec<AscDesc>>,
 | 
				
			||||||
 | 
					        terms_matching_strategy: TermsMatchingStrategy,
 | 
				
			||||||
 | 
					    ) -> (Vec<Criterion>, Option<Vec<AscDesc>>, Vec<CanonicalizationAction>) {
 | 
				
			||||||
 | 
					        let mut words = None;
 | 
				
			||||||
 | 
					        let mut typo = None;
 | 
				
			||||||
 | 
					        let mut proximity = None;
 | 
				
			||||||
 | 
					        let mut sort = None;
 | 
				
			||||||
 | 
					        let mut attribute = None;
 | 
				
			||||||
 | 
					        let mut exactness = None;
 | 
				
			||||||
 | 
					        let mut sorted_fields = HashMap::new();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        let mut canonical_criteria = Vec::new();
 | 
				
			||||||
 | 
					        let mut canonical_sort = None;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        let mut canonicalization_actions = Vec::new();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        for (criterion_index, criterion) in criteria.iter().enumerate() {
 | 
				
			||||||
 | 
					            let criterion = criterion.clone();
 | 
				
			||||||
 | 
					            match criterion.clone() {
 | 
				
			||||||
 | 
					                Criterion::Words => {
 | 
				
			||||||
 | 
					                    if let TermsMatchingStrategy::All = terms_matching_strategy {
 | 
				
			||||||
 | 
					                        canonicalization_actions.push(CanonicalizationAction::RemovedWords {
 | 
				
			||||||
 | 
					                            reason: RemoveWords::MatchingStrategyAll,
 | 
				
			||||||
 | 
					                            removed_occurrence: RankingRuleSource::Criterion(criterion_index),
 | 
				
			||||||
 | 
					                        });
 | 
				
			||||||
 | 
					                        continue;
 | 
				
			||||||
 | 
					                    }
 | 
				
			||||||
 | 
					                    if let Some(maybe_previous_index) = words {
 | 
				
			||||||
 | 
					                        if let Some(previous_index) = maybe_previous_index {
 | 
				
			||||||
 | 
					                            canonicalization_actions.push(
 | 
				
			||||||
 | 
					                                CanonicalizationAction::RemovedDuplicate {
 | 
				
			||||||
 | 
					                                    earlier_occurrence: RankingRuleSource::Criterion(
 | 
				
			||||||
 | 
					                                        previous_index,
 | 
				
			||||||
 | 
					                                    ),
 | 
				
			||||||
 | 
					                                    removed_occurrence: RankingRuleSource::Criterion(
 | 
				
			||||||
 | 
					                                        criterion_index,
 | 
				
			||||||
 | 
					                                    ),
 | 
				
			||||||
 | 
					                                },
 | 
				
			||||||
 | 
					                            );
 | 
				
			||||||
 | 
					                            continue;
 | 
				
			||||||
 | 
					                        }
 | 
				
			||||||
 | 
					                        canonicalization_actions.push(CanonicalizationAction::RemovedWords {
 | 
				
			||||||
 | 
					                            reason: RemoveWords::WasPrepended,
 | 
				
			||||||
 | 
					                            removed_occurrence: RankingRuleSource::Criterion(criterion_index),
 | 
				
			||||||
 | 
					                        })
 | 
				
			||||||
 | 
					                    }
 | 
				
			||||||
 | 
					                    words = Some(Some(criterion_index));
 | 
				
			||||||
 | 
					                    canonical_criteria.push(criterion);
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					                Criterion::Typo => {
 | 
				
			||||||
 | 
					                    canonicalize_criterion(
 | 
				
			||||||
 | 
					                        criterion,
 | 
				
			||||||
 | 
					                        criterion_index,
 | 
				
			||||||
 | 
					                        terms_matching_strategy,
 | 
				
			||||||
 | 
					                        &mut words,
 | 
				
			||||||
 | 
					                        &mut canonicalization_actions,
 | 
				
			||||||
 | 
					                        &mut canonical_criteria,
 | 
				
			||||||
 | 
					                        &mut typo,
 | 
				
			||||||
 | 
					                    );
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					                Criterion::Proximity => {
 | 
				
			||||||
 | 
					                    canonicalize_criterion(
 | 
				
			||||||
 | 
					                        criterion,
 | 
				
			||||||
 | 
					                        criterion_index,
 | 
				
			||||||
 | 
					                        terms_matching_strategy,
 | 
				
			||||||
 | 
					                        &mut words,
 | 
				
			||||||
 | 
					                        &mut canonicalization_actions,
 | 
				
			||||||
 | 
					                        &mut canonical_criteria,
 | 
				
			||||||
 | 
					                        &mut proximity,
 | 
				
			||||||
 | 
					                    );
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					                Criterion::Attribute => {
 | 
				
			||||||
 | 
					                    canonicalize_criterion(
 | 
				
			||||||
 | 
					                        criterion,
 | 
				
			||||||
 | 
					                        criterion_index,
 | 
				
			||||||
 | 
					                        terms_matching_strategy,
 | 
				
			||||||
 | 
					                        &mut words,
 | 
				
			||||||
 | 
					                        &mut canonicalization_actions,
 | 
				
			||||||
 | 
					                        &mut canonical_criteria,
 | 
				
			||||||
 | 
					                        &mut attribute,
 | 
				
			||||||
 | 
					                    );
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					                Criterion::Exactness => {
 | 
				
			||||||
 | 
					                    canonicalize_criterion(
 | 
				
			||||||
 | 
					                        criterion,
 | 
				
			||||||
 | 
					                        criterion_index,
 | 
				
			||||||
 | 
					                        terms_matching_strategy,
 | 
				
			||||||
 | 
					                        &mut words,
 | 
				
			||||||
 | 
					                        &mut canonicalization_actions,
 | 
				
			||||||
 | 
					                        &mut canonical_criteria,
 | 
				
			||||||
 | 
					                        &mut exactness,
 | 
				
			||||||
 | 
					                    );
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                Criterion::Sort => {
 | 
				
			||||||
 | 
					                    if let Some(previous_index) = sort {
 | 
				
			||||||
 | 
					                        canonicalization_actions.push(CanonicalizationAction::RemovedDuplicate {
 | 
				
			||||||
 | 
					                            earlier_occurrence: RankingRuleSource::Criterion(previous_index),
 | 
				
			||||||
 | 
					                            removed_occurrence: RankingRuleSource::Criterion(criterion_index),
 | 
				
			||||||
 | 
					                        });
 | 
				
			||||||
 | 
					                    } else if let Some(sort_query) = sort_query {
 | 
				
			||||||
 | 
					                        sort = Some(criterion_index);
 | 
				
			||||||
 | 
					                        canonical_criteria.push(criterion);
 | 
				
			||||||
 | 
					                        canonical_sort = Some(canonicalize_sort(
 | 
				
			||||||
 | 
					                            &mut sorted_fields,
 | 
				
			||||||
 | 
					                            sort_query.as_slice(),
 | 
				
			||||||
 | 
					                            criterion_index,
 | 
				
			||||||
 | 
					                            &mut canonicalization_actions,
 | 
				
			||||||
 | 
					                        ));
 | 
				
			||||||
 | 
					                    } else {
 | 
				
			||||||
 | 
					                        canonicalization_actions.push(CanonicalizationAction::RemovedSort {
 | 
				
			||||||
 | 
					                            removed_occurrence: RankingRuleSource::Criterion(criterion_index),
 | 
				
			||||||
 | 
					                        })
 | 
				
			||||||
 | 
					                    }
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					                Criterion::Asc(s) | Criterion::Desc(s) => match sorted_fields.entry(s) {
 | 
				
			||||||
 | 
					                    std::collections::hash_map::Entry::Occupied(entry) => canonicalization_actions
 | 
				
			||||||
 | 
					                        .push(CanonicalizationAction::RemovedDuplicate {
 | 
				
			||||||
 | 
					                            earlier_occurrence: *entry.get(),
 | 
				
			||||||
 | 
					                            removed_occurrence: RankingRuleSource::Criterion(criterion_index),
 | 
				
			||||||
 | 
					                        }),
 | 
				
			||||||
 | 
					                    std::collections::hash_map::Entry::Vacant(entry) => {
 | 
				
			||||||
 | 
					                        entry.insert(RankingRuleSource::Criterion(criterion_index));
 | 
				
			||||||
 | 
					                        canonical_criteria.push(criterion)
 | 
				
			||||||
 | 
					                    }
 | 
				
			||||||
 | 
					                },
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        (canonical_criteria, canonical_sort, canonicalization_actions)
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    pub fn is_compatible_with(&self, previous: &Self) -> Result<(), CompatibilityError> {
 | 
				
			||||||
 | 
					        for (current, previous) in self.coalesce_iterator().zip(previous.coalesce_iterator()) {
 | 
				
			||||||
 | 
					            if current.kind != previous.kind {
 | 
				
			||||||
 | 
					                return Err(CompatibilityError { current, previous });
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        Ok(())
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    pub fn constraint_count(&self) -> usize {
 | 
				
			||||||
 | 
					        self.coalesce_iterator().count()
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    fn coalesce_iterator(&self) -> impl Iterator<Item = RankingRule> + '_ {
 | 
				
			||||||
 | 
					        self.canonical_criteria
 | 
				
			||||||
 | 
					            .iter()
 | 
				
			||||||
 | 
					            .enumerate()
 | 
				
			||||||
 | 
					            .flat_map(|(criterion_index, criterion)| {
 | 
				
			||||||
 | 
					                RankingRule::from_criterion(criterion_index, criterion, &self.canonical_sort)
 | 
				
			||||||
 | 
					            })
 | 
				
			||||||
 | 
					            .coalesce(
 | 
				
			||||||
 | 
					                |previous @ RankingRule { source: previous_source, kind: previous_kind },
 | 
				
			||||||
 | 
					                 current @ RankingRule { source, kind }| {
 | 
				
			||||||
 | 
					                    match (previous_kind, kind) {
 | 
				
			||||||
 | 
					                        (RankingRuleKind::Relevancy, RankingRuleKind::Relevancy) => {
 | 
				
			||||||
 | 
					                            let merged_source = match (previous_source, source) {
 | 
				
			||||||
 | 
					                                (
 | 
				
			||||||
 | 
					                                    RankingRuleSource::Criterion(previous),
 | 
				
			||||||
 | 
					                                    RankingRuleSource::Criterion(current),
 | 
				
			||||||
 | 
					                                ) => RankingRuleSource::CoalescedCriteria(previous, current),
 | 
				
			||||||
 | 
					                                (
 | 
				
			||||||
 | 
					                                    RankingRuleSource::CoalescedCriteria(begin, _end),
 | 
				
			||||||
 | 
					                                    RankingRuleSource::Criterion(current),
 | 
				
			||||||
 | 
					                                ) => RankingRuleSource::CoalescedCriteria(begin, current),
 | 
				
			||||||
 | 
					                                (_previous, current) => current,
 | 
				
			||||||
 | 
					                            };
 | 
				
			||||||
 | 
					                            Ok(RankingRule { source: merged_source, kind })
 | 
				
			||||||
 | 
					                        }
 | 
				
			||||||
 | 
					                        _ => Err((previous, current)),
 | 
				
			||||||
 | 
					                    }
 | 
				
			||||||
 | 
					                },
 | 
				
			||||||
 | 
					            )
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    fn canonicalization_notes(&self) -> String {
 | 
				
			||||||
 | 
					        use CanonicalizationAction::*;
 | 
				
			||||||
 | 
					        let mut notes = String::new();
 | 
				
			||||||
 | 
					        for (index, action) in self.canonicalization_actions.iter().enumerate() {
 | 
				
			||||||
 | 
					            let index = index + 1;
 | 
				
			||||||
 | 
					            let _ = match action {
 | 
				
			||||||
 | 
					                PrependedWords { prepended_index } => writeln!(
 | 
				
			||||||
 | 
					                    &mut notes,
 | 
				
			||||||
 | 
					                    "    {index}. Prepended rule `words` before first relevancy rule `{}` at position {}",
 | 
				
			||||||
 | 
					                    prepended_index.rule_name(&self.source_criteria, &self.source_sort),
 | 
				
			||||||
 | 
					                    prepended_index.rule_position()
 | 
				
			||||||
 | 
					                ),
 | 
				
			||||||
 | 
					                RemovedDuplicate { earlier_occurrence, removed_occurrence } => writeln!(
 | 
				
			||||||
 | 
					                    &mut notes,
 | 
				
			||||||
 | 
					                    "    {index}. Removed duplicate rule `{}` at position {} as it already appears at position {}",
 | 
				
			||||||
 | 
					                    earlier_occurrence.rule_name(&self.source_criteria, &self.source_sort),
 | 
				
			||||||
 | 
					                    removed_occurrence.rule_position(),
 | 
				
			||||||
 | 
					                    earlier_occurrence.rule_position(),
 | 
				
			||||||
 | 
					                ),
 | 
				
			||||||
 | 
					                RemovedWords { reason, removed_occurrence } => writeln!(
 | 
				
			||||||
 | 
					                    &mut notes,
 | 
				
			||||||
 | 
					                    "    {index}. Removed rule `words` at position {} because {reason}",
 | 
				
			||||||
 | 
					                    removed_occurrence.rule_position()
 | 
				
			||||||
 | 
					                ),
 | 
				
			||||||
 | 
					                RemovedPlaceholder { removed_occurrence } => writeln!(
 | 
				
			||||||
 | 
					                    &mut notes,
 | 
				
			||||||
 | 
					                    "    {index}. Removed relevancy rule `{}` at position {} because the query is a placeholder search (`q`: \"\")",
 | 
				
			||||||
 | 
					                    removed_occurrence.rule_name(&self.source_criteria, &self.source_sort),
 | 
				
			||||||
 | 
					                    removed_occurrence.rule_position()
 | 
				
			||||||
 | 
					                ),
 | 
				
			||||||
 | 
					                TruncatedVector { vector_rule, truncated_from } => writeln!(
 | 
				
			||||||
 | 
					                    &mut notes,
 | 
				
			||||||
 | 
					                    "    {index}. Truncated relevancy rule `{}` at position {} and later rules because the query is a vector search and `vector` was inserted at position {}",
 | 
				
			||||||
 | 
					                    truncated_from.rule_name(&self.source_criteria, &self.source_sort),
 | 
				
			||||||
 | 
					                    truncated_from.rule_position(),
 | 
				
			||||||
 | 
					                    vector_rule.rule_position(),
 | 
				
			||||||
 | 
					                ),
 | 
				
			||||||
 | 
					                RemovedVector { vector_rule, removed_occurrence } => writeln!(
 | 
				
			||||||
 | 
					                    &mut notes,
 | 
				
			||||||
 | 
					                    "    {index}. Removed relevancy rule `{}` at position {} because the query is a vector search and `vector` was already inserted at position {}",
 | 
				
			||||||
 | 
					                    removed_occurrence.rule_name(&self.source_criteria, &self.source_sort),
 | 
				
			||||||
 | 
					                    removed_occurrence.rule_position(),
 | 
				
			||||||
 | 
					                    vector_rule.rule_position(),
 | 
				
			||||||
 | 
					                ),
 | 
				
			||||||
 | 
					                RemovedSort { removed_occurrence } => writeln!(
 | 
				
			||||||
 | 
					                    &mut notes,
 | 
				
			||||||
 | 
					                    "   {index}. Removed rule `sort` at position {} because `query.sort` is empty",
 | 
				
			||||||
 | 
					removed_occurrence.rule_position()
 | 
				
			||||||
 | 
					                ),
 | 
				
			||||||
 | 
					            };
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        notes
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					fn canonicalize_sort(
 | 
				
			||||||
 | 
					    sorted_fields: &mut HashMap<String, RankingRuleSource>,
 | 
				
			||||||
 | 
					    sort_query: &[AscDesc],
 | 
				
			||||||
 | 
					    criterion_index: usize,
 | 
				
			||||||
 | 
					    canonicalization_actions: &mut Vec<CanonicalizationAction>,
 | 
				
			||||||
 | 
					) -> Vec<AscDesc> {
 | 
				
			||||||
 | 
					    let mut geo_sorted = None;
 | 
				
			||||||
 | 
					    let mut canonical_sort = Vec::new();
 | 
				
			||||||
 | 
					    for (sort_index, asc_desc) in sort_query.iter().enumerate() {
 | 
				
			||||||
 | 
					        let source = RankingRuleSource::Sort { criterion_index, sort_index };
 | 
				
			||||||
 | 
					        let asc_desc = asc_desc.clone();
 | 
				
			||||||
 | 
					        match asc_desc.clone() {
 | 
				
			||||||
 | 
					            AscDesc::Asc(Member::Field(s)) | AscDesc::Desc(Member::Field(s)) => {
 | 
				
			||||||
 | 
					                match sorted_fields.entry(s) {
 | 
				
			||||||
 | 
					                    std::collections::hash_map::Entry::Occupied(entry) => canonicalization_actions
 | 
				
			||||||
 | 
					                        .push(CanonicalizationAction::RemovedDuplicate {
 | 
				
			||||||
 | 
					                            earlier_occurrence: *entry.get(),
 | 
				
			||||||
 | 
					                            removed_occurrence: source,
 | 
				
			||||||
 | 
					                        }),
 | 
				
			||||||
 | 
					                    std::collections::hash_map::Entry::Vacant(entry) => {
 | 
				
			||||||
 | 
					                        entry.insert(source);
 | 
				
			||||||
 | 
					                        canonical_sort.push(asc_desc);
 | 
				
			||||||
 | 
					                    }
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            AscDesc::Asc(Member::Geo(_)) | AscDesc::Desc(Member::Geo(_)) => match geo_sorted {
 | 
				
			||||||
 | 
					                Some(earlier_sort_index) => {
 | 
				
			||||||
 | 
					                    canonicalization_actions.push(CanonicalizationAction::RemovedDuplicate {
 | 
				
			||||||
 | 
					                        earlier_occurrence: RankingRuleSource::Sort {
 | 
				
			||||||
 | 
					                            criterion_index,
 | 
				
			||||||
 | 
					                            sort_index: earlier_sort_index,
 | 
				
			||||||
 | 
					                        },
 | 
				
			||||||
 | 
					                        removed_occurrence: source,
 | 
				
			||||||
 | 
					                    })
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					                None => {
 | 
				
			||||||
 | 
					                    geo_sorted = Some(sort_index);
 | 
				
			||||||
 | 
					                    canonical_sort.push(asc_desc);
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					            },
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    canonical_sort
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					fn canonicalize_criterion(
 | 
				
			||||||
 | 
					    criterion: Criterion,
 | 
				
			||||||
 | 
					    criterion_index: usize,
 | 
				
			||||||
 | 
					    terms_matching_strategy: TermsMatchingStrategy,
 | 
				
			||||||
 | 
					    words: &mut Option<Option<usize>>,
 | 
				
			||||||
 | 
					    canonicalization_actions: &mut Vec<CanonicalizationAction>,
 | 
				
			||||||
 | 
					    canonical_criteria: &mut Vec<Criterion>,
 | 
				
			||||||
 | 
					    rule: &mut Option<usize>,
 | 
				
			||||||
 | 
					) {
 | 
				
			||||||
 | 
					    *words = match (terms_matching_strategy, words.take()) {
 | 
				
			||||||
 | 
					        (TermsMatchingStrategy::All, words) => words,
 | 
				
			||||||
 | 
					        (_, None) => {
 | 
				
			||||||
 | 
					            // inject words
 | 
				
			||||||
 | 
					            canonicalization_actions.push(CanonicalizationAction::PrependedWords {
 | 
				
			||||||
 | 
					                prepended_index: RankingRuleSource::Criterion(criterion_index),
 | 
				
			||||||
 | 
					            });
 | 
				
			||||||
 | 
					            canonical_criteria.push(Criterion::Words);
 | 
				
			||||||
 | 
					            Some(None)
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        (_, words) => words,
 | 
				
			||||||
 | 
					    };
 | 
				
			||||||
 | 
					    if let Some(previous_index) = *rule {
 | 
				
			||||||
 | 
					        canonicalization_actions.push(CanonicalizationAction::RemovedDuplicate {
 | 
				
			||||||
 | 
					            earlier_occurrence: RankingRuleSource::Criterion(previous_index),
 | 
				
			||||||
 | 
					            removed_occurrence: RankingRuleSource::Criterion(criterion_index),
 | 
				
			||||||
 | 
					        });
 | 
				
			||||||
 | 
					    } else {
 | 
				
			||||||
 | 
					        *rule = Some(criterion_index);
 | 
				
			||||||
 | 
					        canonical_criteria.push(criterion)
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#[derive(Debug, Clone, Copy, PartialEq, Eq)]
 | 
				
			||||||
 | 
					enum RankingRuleKind {
 | 
				
			||||||
 | 
					    Relevancy,
 | 
				
			||||||
 | 
					    AscendingSort,
 | 
				
			||||||
 | 
					    DescendingSort,
 | 
				
			||||||
 | 
					    AscendingGeoSort,
 | 
				
			||||||
 | 
					    DescendingGeoSort,
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#[derive(Debug, Clone, Copy)]
 | 
				
			||||||
 | 
					pub struct RankingRule {
 | 
				
			||||||
 | 
					    source: RankingRuleSource,
 | 
				
			||||||
 | 
					    kind: RankingRuleKind,
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#[derive(Debug, Clone, Copy)]
 | 
				
			||||||
 | 
					pub enum RankingRuleSource {
 | 
				
			||||||
 | 
					    Criterion(usize),
 | 
				
			||||||
 | 
					    CoalescedCriteria(usize, usize),
 | 
				
			||||||
 | 
					    Sort { criterion_index: usize, sort_index: usize },
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					impl RankingRuleSource {
 | 
				
			||||||
 | 
					    fn rule_name(&self, criteria: &[Criterion], sort: &Option<Vec<AscDesc>>) -> String {
 | 
				
			||||||
 | 
					        match self {
 | 
				
			||||||
 | 
					            RankingRuleSource::Criterion(criterion_index) => criteria
 | 
				
			||||||
 | 
					                .get(*criterion_index)
 | 
				
			||||||
 | 
					                .map(|c| c.to_string())
 | 
				
			||||||
 | 
					                .unwrap_or_else(|| "unknown".into()),
 | 
				
			||||||
 | 
					            RankingRuleSource::CoalescedCriteria(begin, end) => {
 | 
				
			||||||
 | 
					                let rules: Vec<_> = criteria
 | 
				
			||||||
 | 
					                    .get(*begin..=*end)
 | 
				
			||||||
 | 
					                    .iter()
 | 
				
			||||||
 | 
					                    .flat_map(|c| c.iter())
 | 
				
			||||||
 | 
					                    .map(|c| c.to_string())
 | 
				
			||||||
 | 
					                    .collect();
 | 
				
			||||||
 | 
					                rules.join(", ")
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            RankingRuleSource::Sort { criterion_index: _, sort_index } => {
 | 
				
			||||||
 | 
					                match sort.as_deref().and_then(|sort| sort.get(*sort_index)) {
 | 
				
			||||||
 | 
					                    Some(sort) => match sort {
 | 
				
			||||||
 | 
					                        AscDesc::Asc(Member::Field(field_name)) => format!("{field_name}:asc"),
 | 
				
			||||||
 | 
					                        AscDesc::Desc(Member::Field(field_name)) => {
 | 
				
			||||||
 | 
					                            format!("{field_name}:desc")
 | 
				
			||||||
 | 
					                        }
 | 
				
			||||||
 | 
					                        AscDesc::Asc(Member::Geo(_)) => "_geo(..):asc".to_string(),
 | 
				
			||||||
 | 
					                        AscDesc::Desc(Member::Geo(_)) => "_geo(..):desc".to_string(),
 | 
				
			||||||
 | 
					                    },
 | 
				
			||||||
 | 
					                    None => "unknown".into(),
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    fn rule_position(&self) -> String {
 | 
				
			||||||
 | 
					        match self {
 | 
				
			||||||
 | 
					            RankingRuleSource::Criterion(criterion_index) => {
 | 
				
			||||||
 | 
					                format!("#{criterion_index} in ranking rules")
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            RankingRuleSource::CoalescedCriteria(begin, end) => {
 | 
				
			||||||
 | 
					                format!("#{begin} to #{end} in ranking rules")
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            RankingRuleSource::Sort { criterion_index, sort_index } => format!(
 | 
				
			||||||
 | 
					                "#{sort_index} in `query.sort` (as `sort` is #{criterion_index} in ranking rules)"
 | 
				
			||||||
 | 
					            ),
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					impl RankingRule {
 | 
				
			||||||
 | 
					    fn from_criterion<'a>(
 | 
				
			||||||
 | 
					        criterion_index: usize,
 | 
				
			||||||
 | 
					        criterion: &'a Criterion,
 | 
				
			||||||
 | 
					        sort: &'a Option<Vec<AscDesc>>,
 | 
				
			||||||
 | 
					    ) -> impl Iterator<Item = Self> + 'a {
 | 
				
			||||||
 | 
					        let kind = match criterion {
 | 
				
			||||||
 | 
					            Criterion::Words
 | 
				
			||||||
 | 
					            | Criterion::Typo
 | 
				
			||||||
 | 
					            | Criterion::Proximity
 | 
				
			||||||
 | 
					            | Criterion::Attribute
 | 
				
			||||||
 | 
					            | Criterion::Exactness => RankingRuleKind::Relevancy,
 | 
				
			||||||
 | 
					            Criterion::Asc(s) if s == "_geo" => RankingRuleKind::AscendingGeoSort,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            Criterion::Asc(_) => RankingRuleKind::AscendingSort,
 | 
				
			||||||
 | 
					            Criterion::Desc(s) if s == "_geo" => RankingRuleKind::DescendingGeoSort,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            Criterion::Desc(_) => RankingRuleKind::DescendingSort,
 | 
				
			||||||
 | 
					            Criterion::Sort => {
 | 
				
			||||||
 | 
					                return either::Right(sort.iter().flatten().enumerate().map(
 | 
				
			||||||
 | 
					                    move |(rule_index, asc_desc)| {
 | 
				
			||||||
 | 
					                        Self::from_asc_desc(asc_desc, criterion_index, rule_index)
 | 
				
			||||||
 | 
					                    },
 | 
				
			||||||
 | 
					                ))
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					        };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        either::Left(std::iter::once(Self {
 | 
				
			||||||
 | 
					            source: RankingRuleSource::Criterion(criterion_index),
 | 
				
			||||||
 | 
					            kind,
 | 
				
			||||||
 | 
					        }))
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    fn from_asc_desc(asc_desc: &AscDesc, sort_index: usize, rule_index_in_sort: usize) -> Self {
 | 
				
			||||||
 | 
					        let kind = match asc_desc {
 | 
				
			||||||
 | 
					            AscDesc::Asc(Member::Field(_)) => RankingRuleKind::AscendingSort,
 | 
				
			||||||
 | 
					            AscDesc::Desc(Member::Field(_)) => RankingRuleKind::DescendingSort,
 | 
				
			||||||
 | 
					            AscDesc::Asc(Member::Geo(_)) => RankingRuleKind::AscendingGeoSort,
 | 
				
			||||||
 | 
					            AscDesc::Desc(Member::Geo(_)) => RankingRuleKind::DescendingGeoSort,
 | 
				
			||||||
 | 
					        };
 | 
				
			||||||
 | 
					        Self {
 | 
				
			||||||
 | 
					            source: RankingRuleSource::Sort {
 | 
				
			||||||
 | 
					                criterion_index: sort_index,
 | 
				
			||||||
 | 
					                sort_index: rule_index_in_sort,
 | 
				
			||||||
 | 
					            },
 | 
				
			||||||
 | 
					            kind,
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    fn as_string(
 | 
				
			||||||
 | 
					        &self,
 | 
				
			||||||
 | 
					        canonical_criteria: &[Criterion],
 | 
				
			||||||
 | 
					        canonical_sort: &Option<Vec<AscDesc>>,
 | 
				
			||||||
 | 
					        query_index: usize,
 | 
				
			||||||
 | 
					        index_uid: &str,
 | 
				
			||||||
 | 
					    ) -> String {
 | 
				
			||||||
 | 
					        let kind = match self.kind {
 | 
				
			||||||
 | 
					            RankingRuleKind::Relevancy => "relevancy",
 | 
				
			||||||
 | 
					            RankingRuleKind::AscendingSort => "ascending sort",
 | 
				
			||||||
 | 
					            RankingRuleKind::DescendingSort => "descending sort",
 | 
				
			||||||
 | 
					            RankingRuleKind::AscendingGeoSort => "ascending geo sort",
 | 
				
			||||||
 | 
					            RankingRuleKind::DescendingGeoSort => "descending geo sort",
 | 
				
			||||||
 | 
					        };
 | 
				
			||||||
 | 
					        let rules = self.fetch_from_source(canonical_criteria, canonical_sort);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        let source = match self.source {
 | 
				
			||||||
 | 
					            RankingRuleSource::Criterion(criterion_index) => format!("`queries[{query_index}]`, `{index_uid}.rankingRules[{criterion_index}]`"),
 | 
				
			||||||
 | 
					            RankingRuleSource::CoalescedCriteria(begin, end) => format!("`queries[{query_index}]`, `{index_uid}.rankingRules[{begin}..={end}]`"),
 | 
				
			||||||
 | 
					            RankingRuleSource::Sort { criterion_index, sort_index } => format!("`queries[{query_index}].sort[{sort_index}]`, `{index_uid}.rankingRules[{criterion_index}]`"),
 | 
				
			||||||
 | 
					        };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        format!("{source}: {kind} {rules}")
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    fn fetch_from_source(
 | 
				
			||||||
 | 
					        &self,
 | 
				
			||||||
 | 
					        canonical_criteria: &[Criterion],
 | 
				
			||||||
 | 
					        canonical_sort: &Option<Vec<AscDesc>>,
 | 
				
			||||||
 | 
					    ) -> String {
 | 
				
			||||||
 | 
					        let rule_name = match self.source {
 | 
				
			||||||
 | 
					            RankingRuleSource::Criterion(index) => {
 | 
				
			||||||
 | 
					                canonical_criteria.get(index).map(|criterion| criterion.to_string())
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            RankingRuleSource::CoalescedCriteria(begin, end) => {
 | 
				
			||||||
 | 
					                let rules: Vec<String> = canonical_criteria
 | 
				
			||||||
 | 
					                    .get(begin..=end)
 | 
				
			||||||
 | 
					                    .into_iter()
 | 
				
			||||||
 | 
					                    .flat_map(|criteria| criteria.iter())
 | 
				
			||||||
 | 
					                    .map(|criterion| criterion.to_string())
 | 
				
			||||||
 | 
					                    .collect();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                (!rules.is_empty()).then_some(rules.join(", "))
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            RankingRuleSource::Sort { criterion_index: _, sort_index } => canonical_sort
 | 
				
			||||||
 | 
					                .as_deref()
 | 
				
			||||||
 | 
					                .and_then(|canonical_sort| canonical_sort.get(sort_index))
 | 
				
			||||||
 | 
					                .and_then(|asc_desc: &AscDesc| match asc_desc {
 | 
				
			||||||
 | 
					                    AscDesc::Asc(Member::Field(s)) | AscDesc::Desc(Member::Field(s)) => {
 | 
				
			||||||
 | 
					                        Some(format!("on field `{s}`"))
 | 
				
			||||||
 | 
					                    }
 | 
				
			||||||
 | 
					                    _ => None,
 | 
				
			||||||
 | 
					                }),
 | 
				
			||||||
 | 
					        };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        let rule_name = rule_name.unwrap_or_else(|| "default".into());
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        format!("rule(s) {rule_name}")
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
		Reference in New Issue
	
	Block a user