mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-25 04:56:28 +00:00 
			
		
		
		
	Merge #4769
4769: Federated search r=ManyTheFish a=dureuill # Pull Request ## Related issue Fixes #4747 [Usage](https://meilisearch.notion.site/v1-10-federated-search-698dfe36ab6b4668b044f735fb40f0b2) ## What does this PR do? - multi-search now allows a top-level federation object. When not `null`, the results of multi-search are modified to be a single list of results rather than a list of a list of results - changed lifetimes around tokenizer et al. to be able to make hits one by one rather than using a vector - adds `roaring` to Meilisearch itself. As the federated search happens at the Meilisearch level (reuses the search functions declared at the Meilisearch level + merge happens after the hits were created), `RoaringBitmap`s are needed to track the candidates: hits that were seen, all candidates. - Refactor `make_hits` to allow for an individual, optimized `make_hit` - Score details comparison no longer fail when sorting on different field names or target point (for geo) Co-authored-by: Louis Dureuil <louis@meilisearch.com>
This commit is contained in:
		
							
								
								
									
										1
									
								
								Cargo.lock
									
									
									
										generated
									
									
									
								
							
							
						
						
									
										1
									
								
								Cargo.lock
									
									
									
										generated
									
									
									
								
							| @@ -3394,6 +3394,7 @@ dependencies = [ | ||||
|  "rayon", | ||||
|  "regex", | ||||
|  "reqwest", | ||||
|  "roaring", | ||||
|  "rustls 0.21.12", | ||||
|  "rustls-pemfile 1.0.4", | ||||
|  "segment", | ||||
|   | ||||
| @@ -192,6 +192,7 @@ merge_with_error_impl_take_error_message!(ParseOffsetDateTimeError); | ||||
| merge_with_error_impl_take_error_message!(ParseTaskKindError); | ||||
| merge_with_error_impl_take_error_message!(ParseTaskStatusError); | ||||
| merge_with_error_impl_take_error_message!(IndexUidFormatError); | ||||
| merge_with_error_impl_take_error_message!(InvalidMultiSearchWeight); | ||||
| merge_with_error_impl_take_error_message!(InvalidSearchSemanticRatio); | ||||
| merge_with_error_impl_take_error_message!(InvalidSearchRankingScoreThreshold); | ||||
| merge_with_error_impl_take_error_message!(InvalidSimilarRankingScoreThreshold); | ||||
|   | ||||
| @@ -238,6 +238,11 @@ InvalidIndexLimit                     , InvalidRequest       , BAD_REQUEST ; | ||||
| InvalidIndexOffset                    , InvalidRequest       , BAD_REQUEST ; | ||||
| InvalidIndexPrimaryKey                , InvalidRequest       , BAD_REQUEST ; | ||||
| InvalidIndexUid                       , InvalidRequest       , BAD_REQUEST ; | ||||
| InvalidMultiSearchFederated           , InvalidRequest       , BAD_REQUEST ; | ||||
| InvalidMultiSearchFederationOptions   , InvalidRequest       , BAD_REQUEST ; | ||||
| InvalidMultiSearchQueryPagination     , InvalidRequest       , BAD_REQUEST ; | ||||
| InvalidMultiSearchQueryRankingRules   , InvalidRequest       , BAD_REQUEST ; | ||||
| InvalidMultiSearchWeight              , InvalidRequest       , BAD_REQUEST ; | ||||
| InvalidSearchAttributesToSearchOn     , InvalidRequest       , BAD_REQUEST ; | ||||
| InvalidSearchAttributesToCrop         , InvalidRequest       , BAD_REQUEST ; | ||||
| InvalidSearchAttributesToHighlight    , InvalidRequest       , BAD_REQUEST ; | ||||
| @@ -512,6 +517,12 @@ impl fmt::Display for deserr_codes::InvalidSearchSemanticRatio { | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl fmt::Display for deserr_codes::InvalidMultiSearchWeight { | ||||
|     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | ||||
|         write!(f, "the value of `weight` is invalid, expected a positive float (>= 0.0).") | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl fmt::Display for deserr_codes::InvalidSimilarId { | ||||
|     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | ||||
|         write!( | ||||
|   | ||||
| @@ -102,6 +102,7 @@ tracing-subscriber = { version = "0.3.18", features = ["json"] } | ||||
| tracing-trace = { version = "0.1.0", path = "../tracing-trace" } | ||||
| tracing-actix-web = "0.7.11" | ||||
| build-info = { version = "1.7.0", path = "../build-info" } | ||||
| roaring = "0.10.2" | ||||
|  | ||||
| [dev-dependencies] | ||||
| actix-rt = "2.10.0" | ||||
|   | ||||
| @@ -42,7 +42,7 @@ pub struct MultiSearchAggregator; | ||||
|  | ||||
| #[allow(dead_code)] | ||||
| impl MultiSearchAggregator { | ||||
|     pub fn from_queries(_: &dyn Any, _: &dyn Any) -> Self { | ||||
|     pub fn from_federated_search(_: &dyn Any, _: &dyn Any) -> Self { | ||||
|         Self | ||||
|     } | ||||
|  | ||||
|   | ||||
| @@ -34,8 +34,8 @@ use crate::routes::indexes::documents::{DocumentEditionByFunction, UpdateDocumen | ||||
| use crate::routes::indexes::facet_search::FacetSearchQuery; | ||||
| use crate::routes::{create_all_stats, Stats}; | ||||
| use crate::search::{ | ||||
|     FacetSearchResult, MatchingStrategy, SearchQuery, SearchQueryWithIndex, SearchResult, | ||||
|     SimilarQuery, SimilarResult, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, | ||||
|     FacetSearchResult, FederatedSearch, MatchingStrategy, SearchQuery, SearchQueryWithIndex, | ||||
|     SearchResult, SimilarQuery, SimilarResult, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, | ||||
|     DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, | ||||
|     DEFAULT_SEMANTIC_RATIO, | ||||
| }; | ||||
| @@ -1095,22 +1095,33 @@ pub struct MultiSearchAggregator { | ||||
|     show_ranking_score: bool, | ||||
|     show_ranking_score_details: bool, | ||||
|  | ||||
|     // federation | ||||
|     use_federation: bool, | ||||
|  | ||||
|     // context | ||||
|     user_agents: HashSet<String>, | ||||
| } | ||||
|  | ||||
| impl MultiSearchAggregator { | ||||
|     pub fn from_queries(query: &[SearchQueryWithIndex], request: &HttpRequest) -> Self { | ||||
|     pub fn from_federated_search( | ||||
|         federated_search: &FederatedSearch, | ||||
|         request: &HttpRequest, | ||||
|     ) -> Self { | ||||
|         let timestamp = Some(OffsetDateTime::now_utc()); | ||||
|  | ||||
|         let user_agents = extract_user_agents(request).into_iter().collect(); | ||||
|  | ||||
|         let distinct_indexes: HashSet<_> = query | ||||
|         let use_federation = federated_search.federation.is_some(); | ||||
|  | ||||
|         let distinct_indexes: HashSet<_> = federated_search | ||||
|             .queries | ||||
|             .iter() | ||||
|             .map(|query| { | ||||
|                 let query = &query; | ||||
|                 // make sure we get a compilation error if a field gets added to / removed from SearchQueryWithIndex | ||||
|                 let SearchQueryWithIndex { | ||||
|                     index_uid, | ||||
|                     federation_options: _, | ||||
|                     q: _, | ||||
|                     vector: _, | ||||
|                     offset: _, | ||||
| @@ -1142,8 +1153,10 @@ impl MultiSearchAggregator { | ||||
|             }) | ||||
|             .collect(); | ||||
|  | ||||
|         let show_ranking_score = query.iter().any(|query| query.show_ranking_score); | ||||
|         let show_ranking_score_details = query.iter().any(|query| query.show_ranking_score_details); | ||||
|         let show_ranking_score = | ||||
|             federated_search.queries.iter().any(|query| query.show_ranking_score); | ||||
|         let show_ranking_score_details = | ||||
|             federated_search.queries.iter().any(|query| query.show_ranking_score_details); | ||||
|  | ||||
|         Self { | ||||
|             timestamp, | ||||
| @@ -1151,10 +1164,11 @@ impl MultiSearchAggregator { | ||||
|             total_succeeded: 0, | ||||
|             total_distinct_index_count: distinct_indexes.len(), | ||||
|             total_single_index: if distinct_indexes.len() == 1 { 1 } else { 0 }, | ||||
|             total_search_count: query.len(), | ||||
|             total_search_count: federated_search.queries.len(), | ||||
|             show_ranking_score, | ||||
|             show_ranking_score_details, | ||||
|             user_agents, | ||||
|             use_federation, | ||||
|         } | ||||
|     } | ||||
|  | ||||
| @@ -1180,6 +1194,7 @@ impl MultiSearchAggregator { | ||||
|         let show_ranking_score_details = | ||||
|             this.show_ranking_score_details || other.show_ranking_score_details; | ||||
|         let mut user_agents = this.user_agents; | ||||
|         let use_federation = this.use_federation || other.use_federation; | ||||
|  | ||||
|         for user_agent in other.user_agents.into_iter() { | ||||
|             user_agents.insert(user_agent); | ||||
| @@ -1196,6 +1211,7 @@ impl MultiSearchAggregator { | ||||
|             user_agents, | ||||
|             show_ranking_score, | ||||
|             show_ranking_score_details, | ||||
|             use_federation, | ||||
|             // do not add _ or ..Default::default() here | ||||
|         }; | ||||
|  | ||||
| @@ -1214,6 +1230,7 @@ impl MultiSearchAggregator { | ||||
|             user_agents, | ||||
|             show_ranking_score, | ||||
|             show_ranking_score_details, | ||||
|             use_federation, | ||||
|         } = self; | ||||
|  | ||||
|         if total_received == 0 { | ||||
| @@ -1238,6 +1255,9 @@ impl MultiSearchAggregator { | ||||
|                 "scoring": { | ||||
|                     "show_ranking_score": show_ranking_score, | ||||
|                     "show_ranking_score_details": show_ranking_score_details, | ||||
|                 }, | ||||
|                 "federation": { | ||||
|                     "use_federation": use_federation, | ||||
|                 } | ||||
|             }); | ||||
|  | ||||
|   | ||||
| @@ -25,6 +25,10 @@ pub enum MeilisearchHttpError { | ||||
|     DocumentNotFound(String), | ||||
|     #[error("Sending an empty filter is forbidden.")] | ||||
|     EmptyFilter, | ||||
|     #[error("Using `federationOptions` is not allowed in a non-federated search.\n Hint: remove `federationOptions` from query #{0} or add `federation: {{}}` to the request.")] | ||||
|     FederationOptionsInNonFederatedRequest(usize), | ||||
|     #[error("Inside `.queries[{0}]`: Using pagination options is not allowed in federated queries.\n Hint: remove `{1}` from query #{0} or remove `federation: {{}}` from the request")] | ||||
|     PaginationInFederatedQuery(usize, &'static str), | ||||
|     #[error("Invalid syntax for the filter parameter: `expected {}, found: {1}`.", .0.join(", "))] | ||||
|     InvalidExpression(&'static [&'static str], Value), | ||||
|     #[error("A {0} payload is missing.")] | ||||
| @@ -86,6 +90,12 @@ impl ErrorCode for MeilisearchHttpError { | ||||
|             MeilisearchHttpError::DocumentFormat(e) => e.error_code(), | ||||
|             MeilisearchHttpError::Join(_) => Code::Internal, | ||||
|             MeilisearchHttpError::MissingSearchHybrid => Code::MissingSearchHybrid, | ||||
|             MeilisearchHttpError::FederationOptionsInNonFederatedRequest(_) => { | ||||
|                 Code::InvalidMultiSearchFederationOptions | ||||
|             } | ||||
|             MeilisearchHttpError::PaginationInFederatedQuery(_, _) => { | ||||
|                 Code::InvalidMultiSearchQueryPagination | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| } | ||||
|   | ||||
| @@ -10,12 +10,14 @@ use serde::Serialize; | ||||
| use tracing::debug; | ||||
|  | ||||
| use crate::analytics::{Analytics, MultiSearchAggregator}; | ||||
| use crate::error::MeilisearchHttpError; | ||||
| use crate::extractors::authentication::policies::ActionPolicy; | ||||
| use crate::extractors::authentication::{AuthenticationError, GuardedData}; | ||||
| use crate::extractors::sequential_extractor::SeqHandler; | ||||
| use crate::routes::indexes::search::search_kind; | ||||
| use crate::search::{ | ||||
|     add_search_rules, perform_search, RetrieveVectors, SearchQueryWithIndex, SearchResultWithIndex, | ||||
|     add_search_rules, perform_federated_search, perform_search, FederatedSearch, RetrieveVectors, | ||||
|     SearchQueryWithIndex, SearchResultWithIndex, | ||||
| }; | ||||
| use crate::search_queue::SearchQueue; | ||||
|  | ||||
| @@ -28,85 +30,44 @@ struct SearchResults { | ||||
|     results: Vec<SearchResultWithIndex>, | ||||
| } | ||||
|  | ||||
| #[derive(Debug, deserr::Deserr)] | ||||
| #[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] | ||||
| pub struct SearchQueries { | ||||
|     queries: Vec<SearchQueryWithIndex>, | ||||
| } | ||||
|  | ||||
| pub async fn multi_search_with_post( | ||||
|     index_scheduler: GuardedData<ActionPolicy<{ actions::SEARCH }>, Data<IndexScheduler>>, | ||||
|     search_queue: Data<SearchQueue>, | ||||
|     params: AwebJson<SearchQueries, DeserrJsonError>, | ||||
|     params: AwebJson<FederatedSearch, DeserrJsonError>, | ||||
|     req: HttpRequest, | ||||
|     analytics: web::Data<dyn Analytics>, | ||||
| ) -> Result<HttpResponse, ResponseError> { | ||||
|     let queries = params.into_inner().queries; | ||||
|  | ||||
|     let mut multi_aggregate = MultiSearchAggregator::from_queries(&queries, &req); | ||||
|     let features = index_scheduler.features(); | ||||
|  | ||||
|     // Since we don't want to process half of the search requests and then get a permit refused | ||||
|     // we're going to get one permit for the whole duration of the multi-search request. | ||||
|     let _permit = search_queue.try_get_search_permit().await?; | ||||
|  | ||||
|     // Explicitly expect a `(ResponseError, usize)` for the error type rather than `ResponseError` only, | ||||
|     // so that `?` doesn't work if it doesn't use `with_index`, ensuring that it is not forgotten in case of code | ||||
|     // changes. | ||||
|     let search_results: Result<_, (ResponseError, usize)> = async { | ||||
|         let mut search_results = Vec::with_capacity(queries.len()); | ||||
|         for (query_index, (index_uid, mut query)) in | ||||
|             queries.into_iter().map(SearchQueryWithIndex::into_index_query).enumerate() | ||||
|         { | ||||
|             debug!(on_index = query_index, parameters = ?query, "Multi-search"); | ||||
|     let federated_search = params.into_inner(); | ||||
|  | ||||
|     let mut multi_aggregate = MultiSearchAggregator::from_federated_search(&federated_search, &req); | ||||
|  | ||||
|     let FederatedSearch { mut queries, federation } = federated_search; | ||||
|  | ||||
|     let features = index_scheduler.features(); | ||||
|  | ||||
|     // regardless of federation, check authorization and apply search rules | ||||
|     let auth = 'check_authorization: { | ||||
|         for (query_index, federated_query) in queries.iter_mut().enumerate() { | ||||
|             let index_uid = federated_query.index_uid.as_str(); | ||||
|             // Check index from API key | ||||
|             if !index_scheduler.filters().is_index_authorized(&index_uid) { | ||||
|                 return Err(AuthenticationError::InvalidToken).with_index(query_index); | ||||
|             if !index_scheduler.filters().is_index_authorized(index_uid) { | ||||
|                 break 'check_authorization Err(AuthenticationError::InvalidToken) | ||||
|                     .with_index(query_index); | ||||
|             } | ||||
|             // Apply search rules from tenant token | ||||
|             if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid) | ||||
|             if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(index_uid) | ||||
|             { | ||||
|                 add_search_rules(&mut query.filter, search_rules); | ||||
|                 add_search_rules(&mut federated_query.filter, search_rules); | ||||
|             } | ||||
|  | ||||
|             let index = index_scheduler | ||||
|                 .index(&index_uid) | ||||
|                 .map_err(|err| { | ||||
|                     let mut err = ResponseError::from(err); | ||||
|                     // Patch the HTTP status code to 400 as it defaults to 404 for `index_not_found`, but | ||||
|                     // here the resource not found is not part of the URL. | ||||
|                     err.code = StatusCode::BAD_REQUEST; | ||||
|                     err | ||||
|                 }) | ||||
|                 .with_index(query_index)?; | ||||
|  | ||||
|             let search_kind = search_kind(&query, index_scheduler.get_ref(), &index, features) | ||||
|                 .with_index(query_index)?; | ||||
|             let retrieve_vector = | ||||
|                 RetrieveVectors::new(query.retrieve_vectors, features).with_index(query_index)?; | ||||
|  | ||||
|             let search_result = tokio::task::spawn_blocking(move || { | ||||
|                 perform_search(&index, query, search_kind, retrieve_vector) | ||||
|             }) | ||||
|             .await | ||||
|             .with_index(query_index)?; | ||||
|  | ||||
|             search_results.push(SearchResultWithIndex { | ||||
|                 index_uid: index_uid.into_inner(), | ||||
|                 result: search_result.with_index(query_index)?, | ||||
|             }); | ||||
|         } | ||||
|         Ok(search_results) | ||||
|     } | ||||
|     .await; | ||||
|         Ok(()) | ||||
|     }; | ||||
|  | ||||
|     if search_results.is_ok() { | ||||
|         multi_aggregate.succeed(); | ||||
|     } | ||||
|     analytics.post_multi_search(multi_aggregate); | ||||
|  | ||||
|     let search_results = search_results.map_err(|(mut err, query_index)| { | ||||
|     auth.map_err(|(mut err, query_index)| { | ||||
|         // Add the query index that failed as context for the error message. | ||||
|         // We're doing it only here and not directly in the `WithIndex` trait so that the `with_index` function returns a different type | ||||
|         // of result and we can benefit from static typing. | ||||
| @@ -114,9 +75,95 @@ pub async fn multi_search_with_post( | ||||
|         err | ||||
|     })?; | ||||
|  | ||||
|     debug!(returns = ?search_results, "Multi-search"); | ||||
|     let response = match federation { | ||||
|         Some(federation) => { | ||||
|             let search_result = tokio::task::spawn_blocking(move || { | ||||
|                 perform_federated_search(&index_scheduler, queries, federation, features) | ||||
|             }) | ||||
|             .await; | ||||
|  | ||||
|     Ok(HttpResponse::Ok().json(SearchResults { results: search_results })) | ||||
|             if let Ok(Ok(_)) = search_result { | ||||
|                 multi_aggregate.succeed(); | ||||
|             } | ||||
|  | ||||
|             analytics.post_multi_search(multi_aggregate); | ||||
|             HttpResponse::Ok().json(search_result??) | ||||
|         } | ||||
|         None => { | ||||
|             // Explicitly expect a `(ResponseError, usize)` for the error type rather than `ResponseError` only, | ||||
|             // so that `?` doesn't work if it doesn't use `with_index`, ensuring that it is not forgotten in case of code | ||||
|             // changes. | ||||
|             let search_results: Result<_, (ResponseError, usize)> = async { | ||||
|                 let mut search_results = Vec::with_capacity(queries.len()); | ||||
|                 for (query_index, (index_uid, query, federation_options)) in queries | ||||
|                     .into_iter() | ||||
|                     .map(SearchQueryWithIndex::into_index_query_federation) | ||||
|                     .enumerate() | ||||
|                 { | ||||
|                     debug!(on_index = query_index, parameters = ?query, "Multi-search"); | ||||
|  | ||||
|                     if federation_options.is_some() { | ||||
|                         return Err(( | ||||
|                             MeilisearchHttpError::FederationOptionsInNonFederatedRequest( | ||||
|                                 query_index, | ||||
|                             ) | ||||
|                             .into(), | ||||
|                             query_index, | ||||
|                         )); | ||||
|                     } | ||||
|  | ||||
|                     let index = index_scheduler | ||||
|                         .index(&index_uid) | ||||
|                         .map_err(|err| { | ||||
|                             let mut err = ResponseError::from(err); | ||||
|                             // Patch the HTTP status code to 400 as it defaults to 404 for `index_not_found`, but | ||||
|                             // here the resource not found is not part of the URL. | ||||
|                             err.code = StatusCode::BAD_REQUEST; | ||||
|                             err | ||||
|                         }) | ||||
|                         .with_index(query_index)?; | ||||
|  | ||||
|                     let search_kind = | ||||
|                         search_kind(&query, index_scheduler.get_ref(), &index, features) | ||||
|                             .with_index(query_index)?; | ||||
|                     let retrieve_vector = RetrieveVectors::new(query.retrieve_vectors, features) | ||||
|                         .with_index(query_index)?; | ||||
|  | ||||
|                     let search_result = tokio::task::spawn_blocking(move || { | ||||
|                         perform_search(&index, query, search_kind, retrieve_vector) | ||||
|                     }) | ||||
|                     .await | ||||
|                     .with_index(query_index)?; | ||||
|  | ||||
|                     search_results.push(SearchResultWithIndex { | ||||
|                         index_uid: index_uid.into_inner(), | ||||
|                         result: search_result.with_index(query_index)?, | ||||
|                     }); | ||||
|                 } | ||||
|                 Ok(search_results) | ||||
|             } | ||||
|             .await; | ||||
|  | ||||
|             if search_results.is_ok() { | ||||
|                 multi_aggregate.succeed(); | ||||
|             } | ||||
|             analytics.post_multi_search(multi_aggregate); | ||||
|  | ||||
|             let search_results = search_results.map_err(|(mut err, query_index)| { | ||||
|                 // Add the query index that failed as context for the error message. | ||||
|                 // We're doing it only here and not directly in the `WithIndex` trait so that the `with_index` function returns a different type | ||||
|                 // of result and we can benefit from static typing. | ||||
|                 err.message = format!("Inside `.queries[{query_index}]`: {}", err.message); | ||||
|                 err | ||||
|             })?; | ||||
|  | ||||
|             debug!(returns = ?search_results, "Multi-search"); | ||||
|  | ||||
|             HttpResponse::Ok().json(SearchResults { results: search_results }) | ||||
|         } | ||||
|     }; | ||||
|  | ||||
|     Ok(response) | ||||
| } | ||||
|  | ||||
| /// Local `Result` extension trait to avoid `map_err` boilerplate. | ||||
|   | ||||
							
								
								
									
										629
									
								
								meilisearch/src/search/federated.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										629
									
								
								meilisearch/src/search/federated.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,629 @@ | ||||
| use std::cmp::Ordering; | ||||
| use std::collections::BTreeMap; | ||||
| use std::fmt; | ||||
| use std::iter::Zip; | ||||
| use std::rc::Rc; | ||||
| use std::str::FromStr as _; | ||||
| use std::time::Duration; | ||||
| use std::vec::{IntoIter, Vec}; | ||||
|  | ||||
| use actix_http::StatusCode; | ||||
| use index_scheduler::{IndexScheduler, RoFeatures}; | ||||
| use meilisearch_types::deserr::DeserrJsonError; | ||||
| use meilisearch_types::error::deserr_codes::{ | ||||
|     InvalidMultiSearchWeight, InvalidSearchLimit, InvalidSearchOffset, | ||||
| }; | ||||
| use meilisearch_types::error::ResponseError; | ||||
| use meilisearch_types::milli::score_details::{ScoreDetails, ScoreValue}; | ||||
| use meilisearch_types::milli::{self, DocumentId, TimeBudget}; | ||||
| use roaring::RoaringBitmap; | ||||
| use serde::Serialize; | ||||
|  | ||||
| use super::ranking_rules::{self, RankingRules}; | ||||
| use super::{ | ||||
|     prepare_search, AttributesFormat, HitMaker, HitsInfo, RetrieveVectors, SearchHit, SearchKind, | ||||
|     SearchQuery, SearchQueryWithIndex, | ||||
| }; | ||||
| use crate::error::MeilisearchHttpError; | ||||
| use crate::routes::indexes::search::search_kind; | ||||
|  | ||||
| pub const DEFAULT_FEDERATED_WEIGHT: f64 = 1.0; | ||||
|  | ||||
| #[derive(Debug, Default, Clone, Copy, PartialEq, deserr::Deserr)] | ||||
| #[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] | ||||
| pub struct FederationOptions { | ||||
|     #[deserr(default, error = DeserrJsonError<InvalidMultiSearchWeight>)] | ||||
|     pub weight: Weight, | ||||
| } | ||||
|  | ||||
| #[derive(Debug, Clone, Copy, PartialEq, deserr::Deserr)] | ||||
| #[deserr(try_from(f64) = TryFrom::try_from -> InvalidMultiSearchWeight)] | ||||
| pub struct Weight(f64); | ||||
|  | ||||
| impl Default for Weight { | ||||
|     fn default() -> Self { | ||||
|         Weight(DEFAULT_FEDERATED_WEIGHT) | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl std::convert::TryFrom<f64> for Weight { | ||||
|     type Error = InvalidMultiSearchWeight; | ||||
|  | ||||
|     fn try_from(f: f64) -> Result<Self, Self::Error> { | ||||
|         if f < 0.0 { | ||||
|             Err(InvalidMultiSearchWeight) | ||||
|         } else { | ||||
|             Ok(Weight(f)) | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl std::ops::Deref for Weight { | ||||
|     type Target = f64; | ||||
|  | ||||
|     fn deref(&self) -> &Self::Target { | ||||
|         &self.0 | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[derive(Debug, deserr::Deserr)] | ||||
| #[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] | ||||
| pub struct Federation { | ||||
|     #[deserr(default = super::DEFAULT_SEARCH_LIMIT(), error = DeserrJsonError<InvalidSearchLimit>)] | ||||
|     pub limit: usize, | ||||
|     #[deserr(default = super::DEFAULT_SEARCH_OFFSET(), error = DeserrJsonError<InvalidSearchOffset>)] | ||||
|     pub offset: usize, | ||||
| } | ||||
|  | ||||
| #[derive(Debug, deserr::Deserr)] | ||||
| #[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] | ||||
| pub struct FederatedSearch { | ||||
|     pub queries: Vec<SearchQueryWithIndex>, | ||||
|     #[deserr(default)] | ||||
|     pub federation: Option<Federation>, | ||||
| } | ||||
| #[derive(Serialize, Clone, PartialEq)] | ||||
| #[serde(rename_all = "camelCase")] | ||||
| pub struct FederatedSearchResult { | ||||
|     pub hits: Vec<SearchHit>, | ||||
|     pub processing_time_ms: u128, | ||||
|     #[serde(flatten)] | ||||
|     pub hits_info: HitsInfo, | ||||
|  | ||||
|     #[serde(skip_serializing_if = "Option::is_none")] | ||||
|     pub semantic_hit_count: Option<u32>, | ||||
|  | ||||
|     // These fields are only used for analytics purposes | ||||
|     #[serde(skip)] | ||||
|     pub degraded: bool, | ||||
|     #[serde(skip)] | ||||
|     pub used_negative_operator: bool, | ||||
| } | ||||
|  | ||||
| impl fmt::Debug for FederatedSearchResult { | ||||
|     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | ||||
|         let FederatedSearchResult { | ||||
|             hits, | ||||
|             processing_time_ms, | ||||
|             hits_info, | ||||
|             semantic_hit_count, | ||||
|             degraded, | ||||
|             used_negative_operator, | ||||
|         } = self; | ||||
|  | ||||
|         let mut debug = f.debug_struct("SearchResult"); | ||||
|         // The most important thing when looking at a search result is the time it took to process | ||||
|         debug.field("processing_time_ms", &processing_time_ms); | ||||
|         debug.field("hits", &format!("[{} hits returned]", hits.len())); | ||||
|         debug.field("hits_info", &hits_info); | ||||
|         if *used_negative_operator { | ||||
|             debug.field("used_negative_operator", used_negative_operator); | ||||
|         } | ||||
|         if *degraded { | ||||
|             debug.field("degraded", degraded); | ||||
|         } | ||||
|         if let Some(semantic_hit_count) = semantic_hit_count { | ||||
|             debug.field("semantic_hit_count", &semantic_hit_count); | ||||
|         } | ||||
|  | ||||
|         debug.finish() | ||||
|     } | ||||
| } | ||||
|  | ||||
| struct WeightedScore<'a> { | ||||
|     details: &'a [ScoreDetails], | ||||
|     weight: f64, | ||||
| } | ||||
|  | ||||
| impl<'a> WeightedScore<'a> { | ||||
|     pub fn new(details: &'a [ScoreDetails], weight: f64) -> Self { | ||||
|         Self { details, weight } | ||||
|     } | ||||
|  | ||||
|     pub fn weighted_global_score(&self) -> f64 { | ||||
|         ScoreDetails::global_score(self.details.iter()) * self.weight | ||||
|     } | ||||
|  | ||||
|     pub fn compare_weighted_global_scores(&self, other: &Self) -> Ordering { | ||||
|         self.weighted_global_score() | ||||
|             .partial_cmp(&other.weighted_global_score()) | ||||
|             // both are numbers, possibly infinite | ||||
|             .unwrap() | ||||
|     } | ||||
|  | ||||
|     pub fn compare(&self, other: &Self) -> Ordering { | ||||
|         let mut left_it = ScoreDetails::score_values(self.details.iter()); | ||||
|         let mut right_it = ScoreDetails::score_values(other.details.iter()); | ||||
|  | ||||
|         loop { | ||||
|             let left = left_it.next(); | ||||
|             let right = right_it.next(); | ||||
|  | ||||
|             match (left, right) { | ||||
|                 (None, None) => return Ordering::Equal, | ||||
|                 (None, Some(_)) => return Ordering::Less, | ||||
|                 (Some(_), None) => return Ordering::Greater, | ||||
|                 (Some(ScoreValue::Score(left)), Some(ScoreValue::Score(right))) => { | ||||
|                     let left = left * self.weight; | ||||
|                     let right = right * other.weight; | ||||
|                     if (left - right).abs() <= f64::EPSILON { | ||||
|                         continue; | ||||
|                     } | ||||
|                     return left.partial_cmp(&right).unwrap(); | ||||
|                 } | ||||
|                 (Some(ScoreValue::Sort(left)), Some(ScoreValue::Sort(right))) => { | ||||
|                     match left.partial_cmp(right) { | ||||
|                         Some(Ordering::Equal) => continue, | ||||
|                         Some(order) => return order, | ||||
|                         None => return self.compare_weighted_global_scores(other), | ||||
|                     } | ||||
|                 } | ||||
|                 (Some(ScoreValue::GeoSort(left)), Some(ScoreValue::GeoSort(right))) => { | ||||
|                     match left.partial_cmp(right) { | ||||
|                         Some(Ordering::Equal) => continue, | ||||
|                         Some(order) => return order, | ||||
|                         None => { | ||||
|                             return self.compare_weighted_global_scores(other); | ||||
|                         } | ||||
|                     } | ||||
|                 } | ||||
|                 // not comparable details, use global | ||||
|                 (Some(ScoreValue::Score(_)), Some(_)) | ||||
|                 | (Some(_), Some(ScoreValue::Score(_))) | ||||
|                 | (Some(ScoreValue::GeoSort(_)), Some(ScoreValue::Sort(_))) | ||||
|                 | (Some(ScoreValue::Sort(_)), Some(ScoreValue::GeoSort(_))) => { | ||||
|                     let left_count = left_it.count(); | ||||
|                     let right_count = right_it.count(); | ||||
|                     // compare how many remaining groups of rules each side has. | ||||
|                     // the group with the most remaining groups wins. | ||||
|                     return left_count | ||||
|                         .cmp(&right_count) | ||||
|                         // breaks ties with the global ranking score | ||||
|                         .then_with(|| self.compare_weighted_global_scores(other)); | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| struct QueryByIndex { | ||||
|     query: SearchQuery, | ||||
|     federation_options: FederationOptions, | ||||
|     query_index: usize, | ||||
| } | ||||
|  | ||||
| struct SearchResultByQuery<'a> { | ||||
|     documents_ids: Vec<DocumentId>, | ||||
|     document_scores: Vec<Vec<ScoreDetails>>, | ||||
|     federation_options: FederationOptions, | ||||
|     hit_maker: HitMaker<'a>, | ||||
|     query_index: usize, | ||||
| } | ||||
|  | ||||
| struct SearchResultByQueryIter<'a> { | ||||
|     it: Zip<IntoIter<DocumentId>, IntoIter<Vec<ScoreDetails>>>, | ||||
|     federation_options: FederationOptions, | ||||
|     hit_maker: Rc<HitMaker<'a>>, | ||||
|     query_index: usize, | ||||
| } | ||||
|  | ||||
| impl<'a> SearchResultByQueryIter<'a> { | ||||
|     fn new( | ||||
|         SearchResultByQuery { | ||||
|             documents_ids, | ||||
|             document_scores, | ||||
|             federation_options, | ||||
|             hit_maker, | ||||
|             query_index, | ||||
|         }: SearchResultByQuery<'a>, | ||||
|     ) -> Self { | ||||
|         let it = documents_ids.into_iter().zip(document_scores); | ||||
|         Self { it, federation_options, hit_maker: Rc::new(hit_maker), query_index } | ||||
|     } | ||||
| } | ||||
|  | ||||
| struct SearchResultByQueryIterItem<'a> { | ||||
|     docid: DocumentId, | ||||
|     score: Vec<ScoreDetails>, | ||||
|     federation_options: FederationOptions, | ||||
|     hit_maker: Rc<HitMaker<'a>>, | ||||
|     query_index: usize, | ||||
| } | ||||
|  | ||||
| fn merge_index_local_results( | ||||
|     results_by_query: Vec<SearchResultByQuery<'_>>, | ||||
| ) -> impl Iterator<Item = SearchResultByQueryIterItem> + '_ { | ||||
|     itertools::kmerge_by( | ||||
|         results_by_query.into_iter().map(SearchResultByQueryIter::new), | ||||
|         |left: &SearchResultByQueryIterItem, right: &SearchResultByQueryIterItem| { | ||||
|             let left_score = WeightedScore::new(&left.score, *left.federation_options.weight); | ||||
|             let right_score = WeightedScore::new(&right.score, *right.federation_options.weight); | ||||
|  | ||||
|             match left_score.compare(&right_score) { | ||||
|                 // the biggest score goes first | ||||
|                 Ordering::Greater => true, | ||||
|                 // break ties using query index | ||||
|                 Ordering::Equal => left.query_index < right.query_index, | ||||
|                 Ordering::Less => false, | ||||
|             } | ||||
|         }, | ||||
|     ) | ||||
| } | ||||
|  | ||||
| fn merge_index_global_results( | ||||
|     results_by_index: Vec<SearchResultByIndex>, | ||||
| ) -> impl Iterator<Item = SearchHitByIndex> { | ||||
|     itertools::kmerge_by( | ||||
|         results_by_index.into_iter().map(|result_by_index| result_by_index.hits.into_iter()), | ||||
|         |left: &SearchHitByIndex, right: &SearchHitByIndex| { | ||||
|             let left_score = WeightedScore::new(&left.score, *left.federation_options.weight); | ||||
|             let right_score = WeightedScore::new(&right.score, *right.federation_options.weight); | ||||
|  | ||||
|             match left_score.compare(&right_score) { | ||||
|                 // the biggest score goes first | ||||
|                 Ordering::Greater => true, | ||||
|                 // break ties using query index | ||||
|                 Ordering::Equal => left.query_index < right.query_index, | ||||
|                 Ordering::Less => false, | ||||
|             } | ||||
|         }, | ||||
|     ) | ||||
| } | ||||
|  | ||||
| impl<'a> Iterator for SearchResultByQueryIter<'a> { | ||||
|     type Item = SearchResultByQueryIterItem<'a>; | ||||
|  | ||||
|     fn next(&mut self) -> Option<Self::Item> { | ||||
|         let (docid, score) = self.it.next()?; | ||||
|         Some(SearchResultByQueryIterItem { | ||||
|             docid, | ||||
|             score, | ||||
|             federation_options: self.federation_options, | ||||
|             hit_maker: Rc::clone(&self.hit_maker), | ||||
|             query_index: self.query_index, | ||||
|         }) | ||||
|     } | ||||
| } | ||||
|  | ||||
| struct SearchHitByIndex { | ||||
|     hit: SearchHit, | ||||
|     score: Vec<ScoreDetails>, | ||||
|     federation_options: FederationOptions, | ||||
|     query_index: usize, | ||||
| } | ||||
|  | ||||
| struct SearchResultByIndex { | ||||
|     hits: Vec<SearchHitByIndex>, | ||||
|     candidates: RoaringBitmap, | ||||
|     degraded: bool, | ||||
|     used_negative_operator: bool, | ||||
| } | ||||
|  | ||||
| pub fn perform_federated_search( | ||||
|     index_scheduler: &IndexScheduler, | ||||
|     queries: Vec<SearchQueryWithIndex>, | ||||
|     federation: Federation, | ||||
|     features: RoFeatures, | ||||
| ) -> Result<FederatedSearchResult, ResponseError> { | ||||
|     let before_search = std::time::Instant::now(); | ||||
|  | ||||
|     // this implementation partition the queries by index to guarantee an important property: | ||||
|     // - all the queries to a particular index use the same read transaction. | ||||
|     // This is an important property, otherwise we cannot guarantee the self-consistency of the results. | ||||
|  | ||||
|     // 1. partition queries by index | ||||
|     let mut queries_by_index: BTreeMap<String, Vec<QueryByIndex>> = Default::default(); | ||||
|     for (query_index, federated_query) in queries.into_iter().enumerate() { | ||||
|         if let Some(pagination_field) = federated_query.has_pagination() { | ||||
|             return Err(MeilisearchHttpError::PaginationInFederatedQuery( | ||||
|                 query_index, | ||||
|                 pagination_field, | ||||
|             ) | ||||
|             .into()); | ||||
|         } | ||||
|  | ||||
|         let (index_uid, query, federation_options) = federated_query.into_index_query_federation(); | ||||
|  | ||||
|         queries_by_index.entry(index_uid.into_inner()).or_default().push(QueryByIndex { | ||||
|             query, | ||||
|             federation_options: federation_options.unwrap_or_default(), | ||||
|             query_index, | ||||
|         }) | ||||
|     } | ||||
|  | ||||
|     // 2. perform queries, merge and make hits index by index | ||||
|     let required_hit_count = federation.limit + federation.offset; | ||||
|     // In step (2), semantic_hit_count will be set to Some(0) if any search kind uses semantic | ||||
|     // Then in step (3), we'll update its value if there is any semantic search | ||||
|     let mut semantic_hit_count = None; | ||||
|     let mut results_by_index = Vec::with_capacity(queries_by_index.len()); | ||||
|     let mut previous_query_data: Option<(RankingRules, usize, String)> = None; | ||||
|  | ||||
|     for (index_uid, queries) in queries_by_index { | ||||
|         let index = match index_scheduler.index(&index_uid) { | ||||
|             Ok(index) => index, | ||||
|             Err(err) => { | ||||
|                 let mut err = ResponseError::from(err); | ||||
|                 // Patch the HTTP status code to 400 as it defaults to 404 for `index_not_found`, but | ||||
|                 // here the resource not found is not part of the URL. | ||||
|                 err.code = StatusCode::BAD_REQUEST; | ||||
|                 if let Some(query) = queries.first() { | ||||
|                     err.message = | ||||
|                         format!("Inside `.queries[{}]`: {}", query.query_index, err.message); | ||||
|                 } | ||||
|                 return Err(err); | ||||
|             } | ||||
|         }; | ||||
|  | ||||
|         // Important: this is the only transaction we'll use for this index during this federated search | ||||
|         let rtxn = index.read_txn()?; | ||||
|  | ||||
|         let criteria = index.criteria(&rtxn)?; | ||||
|  | ||||
|         // stuff we need for the hitmaker | ||||
|         let script_lang_map = index.script_language(&rtxn)?; | ||||
|  | ||||
|         let dictionary = index.dictionary(&rtxn)?; | ||||
|         let dictionary: Option<Vec<_>> = | ||||
|             dictionary.as_ref().map(|x| x.iter().map(String::as_str).collect()); | ||||
|         let separators = index.allowed_separators(&rtxn)?; | ||||
|         let separators: Option<Vec<_>> = | ||||
|             separators.as_ref().map(|x| x.iter().map(String::as_str).collect()); | ||||
|  | ||||
|         // each query gets its individual cutoff | ||||
|         let cutoff = index.search_cutoff(&rtxn)?; | ||||
|  | ||||
|         let mut degraded = false; | ||||
|         let mut used_negative_operator = false; | ||||
|         let mut candidates = RoaringBitmap::new(); | ||||
|  | ||||
|         // 2.1. Compute all candidates for each query in the index | ||||
|         let mut results_by_query = Vec::with_capacity(queries.len()); | ||||
|  | ||||
|         for QueryByIndex { query, federation_options, query_index } in queries { | ||||
|             // use an immediately invoked lambda to capture the result without returning from the function | ||||
|  | ||||
|             let res: Result<(), ResponseError> = (|| { | ||||
|                 let search_kind = search_kind(&query, index_scheduler, &index, features)?; | ||||
|  | ||||
|                 let canonicalization_kind = match (&search_kind, &query.q) { | ||||
|                     (SearchKind::SemanticOnly { .. }, _) => { | ||||
|                         ranking_rules::CanonicalizationKind::Vector | ||||
|                     } | ||||
|                     (_, Some(q)) if !q.is_empty() => ranking_rules::CanonicalizationKind::Keyword, | ||||
|                     _ => ranking_rules::CanonicalizationKind::Placeholder, | ||||
|                 }; | ||||
|  | ||||
|                 let sort = if let Some(sort) = &query.sort { | ||||
|                     let sorts: Vec<_> = | ||||
|                         match sort.iter().map(|s| milli::AscDesc::from_str(s)).collect() { | ||||
|                             Ok(sorts) => sorts, | ||||
|                             Err(asc_desc_error) => { | ||||
|                                 return Err(milli::Error::from(milli::SortError::from( | ||||
|                                     asc_desc_error, | ||||
|                                 )) | ||||
|                                 .into()) | ||||
|                             } | ||||
|                         }; | ||||
|                     Some(sorts) | ||||
|                 } else { | ||||
|                     None | ||||
|                 }; | ||||
|  | ||||
|                 let ranking_rules = ranking_rules::RankingRules::new( | ||||
|                     criteria.clone(), | ||||
|                     sort, | ||||
|                     query.matching_strategy.into(), | ||||
|                     canonicalization_kind, | ||||
|                 ); | ||||
|  | ||||
|                 if let Some((previous_ranking_rules, previous_query_index, previous_index_uid)) = | ||||
|                     previous_query_data.take() | ||||
|                 { | ||||
|                     if let Err(error) = ranking_rules.is_compatible_with(&previous_ranking_rules) { | ||||
|                         return Err(error.to_response_error( | ||||
|                             &ranking_rules, | ||||
|                             &previous_ranking_rules, | ||||
|                             query_index, | ||||
|                             previous_query_index, | ||||
|                             &index_uid, | ||||
|                             &previous_index_uid, | ||||
|                         )); | ||||
|                     } | ||||
|                     previous_query_data = if previous_ranking_rules.constraint_count() | ||||
|                         > ranking_rules.constraint_count() | ||||
|                     { | ||||
|                         Some((previous_ranking_rules, previous_query_index, previous_index_uid)) | ||||
|                     } else { | ||||
|                         Some((ranking_rules, query_index, index_uid.clone())) | ||||
|                     }; | ||||
|                 } else { | ||||
|                     previous_query_data = Some((ranking_rules, query_index, index_uid.clone())); | ||||
|                 } | ||||
|  | ||||
|                 match search_kind { | ||||
|                     SearchKind::KeywordOnly => {} | ||||
|                     _ => semantic_hit_count = Some(0), | ||||
|                 } | ||||
|  | ||||
|                 let retrieve_vectors = RetrieveVectors::new(query.retrieve_vectors, features)?; | ||||
|  | ||||
|                 let time_budget = match cutoff { | ||||
|                     Some(cutoff) => TimeBudget::new(Duration::from_millis(cutoff)), | ||||
|                     None => TimeBudget::default(), | ||||
|                 }; | ||||
|  | ||||
|                 let (mut search, _is_finite_pagination, _max_total_hits, _offset) = | ||||
|                     prepare_search(&index, &rtxn, &query, &search_kind, time_budget)?; | ||||
|  | ||||
|                 search.scoring_strategy(milli::score_details::ScoringStrategy::Detailed); | ||||
|                 search.offset(0); | ||||
|                 search.limit(required_hit_count); | ||||
|  | ||||
|                 let (result, _semantic_hit_count) = super::search_from_kind(search_kind, search)?; | ||||
|                 let format = AttributesFormat { | ||||
|                     attributes_to_retrieve: query.attributes_to_retrieve, | ||||
|                     retrieve_vectors, | ||||
|                     attributes_to_highlight: query.attributes_to_highlight, | ||||
|                     attributes_to_crop: query.attributes_to_crop, | ||||
|                     crop_length: query.crop_length, | ||||
|                     crop_marker: query.crop_marker, | ||||
|                     highlight_pre_tag: query.highlight_pre_tag, | ||||
|                     highlight_post_tag: query.highlight_post_tag, | ||||
|                     show_matches_position: query.show_matches_position, | ||||
|                     sort: query.sort, | ||||
|                     show_ranking_score: query.show_ranking_score, | ||||
|                     show_ranking_score_details: query.show_ranking_score_details, | ||||
|                 }; | ||||
|  | ||||
|                 let milli::SearchResult { | ||||
|                     matching_words, | ||||
|                     candidates: query_candidates, | ||||
|                     documents_ids, | ||||
|                     document_scores, | ||||
|                     degraded: query_degraded, | ||||
|                     used_negative_operator: query_used_negative_operator, | ||||
|                 } = result; | ||||
|  | ||||
|                 candidates |= query_candidates; | ||||
|                 degraded |= query_degraded; | ||||
|                 used_negative_operator |= query_used_negative_operator; | ||||
|  | ||||
|                 let tokenizer = HitMaker::tokenizer( | ||||
|                     &script_lang_map, | ||||
|                     dictionary.as_deref(), | ||||
|                     separators.as_deref(), | ||||
|                 ); | ||||
|  | ||||
|                 let formatter_builder = HitMaker::formatter_builder(matching_words, tokenizer); | ||||
|  | ||||
|                 let hit_maker = HitMaker::new(&index, &rtxn, format, formatter_builder)?; | ||||
|  | ||||
|                 results_by_query.push(SearchResultByQuery { | ||||
|                     federation_options, | ||||
|                     hit_maker, | ||||
|                     query_index, | ||||
|                     documents_ids, | ||||
|                     document_scores, | ||||
|                 }); | ||||
|                 Ok(()) | ||||
|             })(); | ||||
|  | ||||
|             if let Err(mut error) = res { | ||||
|                 error.message = format!("Inside `.queries[{query_index}]`: {}", error.message); | ||||
|                 return Err(error); | ||||
|             } | ||||
|         } | ||||
|         // 2.2. merge inside index | ||||
|         let mut documents_seen = RoaringBitmap::new(); | ||||
|         let merged_result: Result<Vec<_>, ResponseError> = | ||||
|             merge_index_local_results(results_by_query) | ||||
|                 // skip documents we've already seen & mark that we saw the current document | ||||
|                 .filter(|SearchResultByQueryIterItem { docid, .. }| documents_seen.insert(*docid)) | ||||
|                 .take(required_hit_count) | ||||
|                 // 2.3 make hits | ||||
|                 .map( | ||||
|                     |SearchResultByQueryIterItem { | ||||
|                          docid, | ||||
|                          score, | ||||
|                          federation_options, | ||||
|                          hit_maker, | ||||
|                          query_index, | ||||
|                      }| { | ||||
|                         let mut hit = hit_maker.make_hit(docid, &score)?; | ||||
|                         let weighted_score = | ||||
|                             ScoreDetails::global_score(score.iter()) * (*federation_options.weight); | ||||
|  | ||||
|                         let _federation = serde_json::json!( | ||||
|                             { | ||||
|                                 "indexUid": index_uid, | ||||
|                                 "queriesPosition": query_index, | ||||
|                                 "weightedRankingScore": weighted_score, | ||||
|                             } | ||||
|                         ); | ||||
|                         hit.document.insert("_federation".to_string(), _federation); | ||||
|                         Ok(SearchHitByIndex { hit, score, federation_options, query_index }) | ||||
|                     }, | ||||
|                 ) | ||||
|                 .collect(); | ||||
|  | ||||
|         let merged_result = merged_result?; | ||||
|         results_by_index.push(SearchResultByIndex { | ||||
|             hits: merged_result, | ||||
|             candidates, | ||||
|             degraded, | ||||
|             used_negative_operator, | ||||
|         }); | ||||
|     } | ||||
|  | ||||
|     // 3. merge hits and metadata across indexes | ||||
|     // 3.1 merge metadata | ||||
|     let (estimated_total_hits, degraded, used_negative_operator) = { | ||||
|         let mut estimated_total_hits = 0; | ||||
|         let mut degraded = false; | ||||
|         let mut used_negative_operator = false; | ||||
|  | ||||
|         for SearchResultByIndex { | ||||
|             hits: _, | ||||
|             candidates, | ||||
|             degraded: degraded_by_index, | ||||
|             used_negative_operator: used_negative_operator_by_index, | ||||
|         } in &results_by_index | ||||
|         { | ||||
|             estimated_total_hits += candidates.len() as usize; | ||||
|             degraded |= *degraded_by_index; | ||||
|             used_negative_operator |= *used_negative_operator_by_index; | ||||
|         } | ||||
|  | ||||
|         (estimated_total_hits, degraded, used_negative_operator) | ||||
|     }; | ||||
|  | ||||
|     // 3.2 merge hits | ||||
|     let merged_hits: Vec<_> = merge_index_global_results(results_by_index) | ||||
|         .skip(federation.offset) | ||||
|         .take(federation.limit) | ||||
|         .inspect(|hit| { | ||||
|             if let Some(semantic_hit_count) = &mut semantic_hit_count { | ||||
|                 if hit.score.iter().any(|score| matches!(&score, ScoreDetails::Vector(_))) { | ||||
|                     *semantic_hit_count += 1; | ||||
|                 } | ||||
|             } | ||||
|         }) | ||||
|         .map(|hit| hit.hit) | ||||
|         .collect(); | ||||
|  | ||||
|     let search_result = FederatedSearchResult { | ||||
|         hits: merged_hits, | ||||
|         processing_time_ms: before_search.elapsed().as_millis(), | ||||
|         hits_info: HitsInfo::OffsetLimit { | ||||
|             limit: federation.limit, | ||||
|             offset: federation.offset, | ||||
|             estimated_total_hits, | ||||
|         }, | ||||
|         semantic_hit_count, | ||||
|         degraded, | ||||
|         used_negative_operator, | ||||
|     }; | ||||
|  | ||||
|     Ok(search_result) | ||||
| } | ||||
| @@ -1,6 +1,6 @@ | ||||
| use core::fmt; | ||||
| use std::cmp::min; | ||||
| use std::collections::{BTreeMap, BTreeSet, HashSet}; | ||||
| use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet}; | ||||
| use std::str::FromStr; | ||||
| use std::sync::Arc; | ||||
| use std::time::{Duration, Instant}; | ||||
| @@ -31,6 +31,11 @@ use serde_json::{json, Value}; | ||||
| 
 | ||||
| use crate::error::MeilisearchHttpError; | ||||
| 
 | ||||
| mod federated; | ||||
| pub use federated::{perform_federated_search, FederatedSearch, Federation, FederationOptions}; | ||||
| 
 | ||||
| mod ranking_rules; | ||||
| 
 | ||||
| type MatchesPosition = BTreeMap<String, Vec<MatchBounds>>; | ||||
| 
 | ||||
| pub const DEFAULT_SEARCH_OFFSET: fn() -> usize = || 0; | ||||
| @@ -257,11 +262,13 @@ pub struct HybridQuery { | ||||
|     pub embedder: Option<String>, | ||||
| } | ||||
| 
 | ||||
| #[derive(Clone)] | ||||
| pub enum SearchKind { | ||||
|     KeywordOnly, | ||||
|     SemanticOnly { embedder_name: String, embedder: Arc<Embedder> }, | ||||
|     Hybrid { embedder_name: String, embedder: Arc<Embedder>, semantic_ratio: f32 }, | ||||
| } | ||||
| 
 | ||||
| impl SearchKind { | ||||
|     pub(crate) fn semantic( | ||||
|         index_scheduler: &index_scheduler::IndexScheduler, | ||||
| @@ -358,7 +365,7 @@ impl SearchQuery { | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| /// A `SearchQuery` + an index UID.
 | ||||
| /// A `SearchQuery` + an index UID and optional FederationOptions.
 | ||||
| // This struct contains the fields of `SearchQuery` inline.
 | ||||
| // This is because neither deserr nor serde support `flatten` when using `deny_unknown_fields.
 | ||||
| // The `From<SearchQueryWithIndex>` implementation ensures both structs remain up to date.
 | ||||
| @@ -373,10 +380,10 @@ pub struct SearchQueryWithIndex { | ||||
|     pub vector: Option<Vec<f32>>, | ||||
|     #[deserr(default, error = DeserrJsonError<InvalidHybridQuery>)] | ||||
|     pub hybrid: Option<HybridQuery>, | ||||
|     #[deserr(default = DEFAULT_SEARCH_OFFSET(), error = DeserrJsonError<InvalidSearchOffset>)] | ||||
|     pub offset: usize, | ||||
|     #[deserr(default = DEFAULT_SEARCH_LIMIT(), error = DeserrJsonError<InvalidSearchLimit>)] | ||||
|     pub limit: usize, | ||||
|     #[deserr(default, error = DeserrJsonError<InvalidSearchOffset>)] | ||||
|     pub offset: Option<usize>, | ||||
|     #[deserr(default, error = DeserrJsonError<InvalidSearchLimit>)] | ||||
|     pub limit: Option<usize>, | ||||
|     #[deserr(default, error = DeserrJsonError<InvalidSearchPage>)] | ||||
|     pub page: Option<usize>, | ||||
|     #[deserr(default, error = DeserrJsonError<InvalidSearchHitsPerPage>)] | ||||
| @@ -417,12 +424,33 @@ pub struct SearchQueryWithIndex { | ||||
|     pub attributes_to_search_on: Option<Vec<String>>, | ||||
|     #[deserr(default, error = DeserrJsonError<InvalidSearchRankingScoreThreshold>, default)] | ||||
|     pub ranking_score_threshold: Option<RankingScoreThreshold>, | ||||
| 
 | ||||
|     #[deserr(default)] | ||||
|     pub federation_options: Option<FederationOptions>, | ||||
| } | ||||
| 
 | ||||
| impl SearchQueryWithIndex { | ||||
|     pub fn into_index_query(self) -> (IndexUid, SearchQuery) { | ||||
|     pub fn has_federation_options(&self) -> bool { | ||||
|         self.federation_options.is_some() | ||||
|     } | ||||
|     pub fn has_pagination(&self) -> Option<&'static str> { | ||||
|         if self.offset.is_some() { | ||||
|             Some("offset") | ||||
|         } else if self.limit.is_some() { | ||||
|             Some("limit") | ||||
|         } else if self.page.is_some() { | ||||
|             Some("page") | ||||
|         } else if self.hits_per_page.is_some() { | ||||
|             Some("hitsPerPage") | ||||
|         } else { | ||||
|             None | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     pub fn into_index_query_federation(self) -> (IndexUid, SearchQuery, Option<FederationOptions>) { | ||||
|         let SearchQueryWithIndex { | ||||
|             index_uid, | ||||
|             federation_options, | ||||
|             q, | ||||
|             vector, | ||||
|             offset, | ||||
| @@ -454,8 +482,8 @@ impl SearchQueryWithIndex { | ||||
|             SearchQuery { | ||||
|                 q, | ||||
|                 vector, | ||||
|                 offset, | ||||
|                 limit, | ||||
|                 offset: offset.unwrap_or(DEFAULT_SEARCH_OFFSET()), | ||||
|                 limit: limit.unwrap_or(DEFAULT_SEARCH_LIMIT()), | ||||
|                 page, | ||||
|                 hits_per_page, | ||||
|                 attributes_to_retrieve, | ||||
| @@ -480,6 +508,7 @@ impl SearchQueryWithIndex { | ||||
|                 // do not use ..Default::default() here,
 | ||||
|                 // rather add any missing field from `SearchQuery` to `SearchQueryWithIndex`
 | ||||
|             }, | ||||
|             federation_options, | ||||
|         ) | ||||
|     } | ||||
| } | ||||
| @@ -864,15 +893,7 @@ pub fn perform_search( | ||||
|             used_negative_operator, | ||||
|         }, | ||||
|         semantic_hit_count, | ||||
|     ) = match &search_kind { | ||||
|         SearchKind::KeywordOnly => (search.execute()?, None), | ||||
|         SearchKind::SemanticOnly { .. } => { | ||||
|             let results = search.execute()?; | ||||
|             let semantic_hit_count = results.document_scores.len() as u32; | ||||
|             (results, Some(semantic_hit_count)) | ||||
|         } | ||||
|         SearchKind::Hybrid { semantic_ratio, .. } => search.execute_hybrid(*semantic_ratio)?, | ||||
|     }; | ||||
|     ) = search_from_kind(search_kind, search)?; | ||||
| 
 | ||||
|     let SearchQuery { | ||||
|         q, | ||||
| @@ -919,8 +940,13 @@ pub fn perform_search( | ||||
|         show_ranking_score_details, | ||||
|     }; | ||||
| 
 | ||||
|     let documents = | ||||
|         make_hits(index, &rtxn, format, matching_words, documents_ids, document_scores)?; | ||||
|     let documents = make_hits( | ||||
|         index, | ||||
|         &rtxn, | ||||
|         format, | ||||
|         matching_words, | ||||
|         documents_ids.iter().copied().zip(document_scores.iter()), | ||||
|     )?; | ||||
| 
 | ||||
|     let number_of_hits = min(candidates.len() as usize, max_total_hits); | ||||
|     let hits_info = if is_finite_pagination { | ||||
| @@ -988,6 +1014,22 @@ pub fn perform_search( | ||||
|     Ok(result) | ||||
| } | ||||
| 
 | ||||
| pub fn search_from_kind( | ||||
|     search_kind: SearchKind, | ||||
|     search: milli::Search<'_>, | ||||
| ) -> Result<(milli::SearchResult, Option<u32>), MeilisearchHttpError> { | ||||
|     let (milli_result, semantic_hit_count) = match &search_kind { | ||||
|         SearchKind::KeywordOnly => (search.execute()?, None), | ||||
|         SearchKind::SemanticOnly { .. } => { | ||||
|             let results = search.execute()?; | ||||
|             let semantic_hit_count = results.document_scores.len() as u32; | ||||
|             (results, Some(semantic_hit_count)) | ||||
|         } | ||||
|         SearchKind::Hybrid { semantic_ratio, .. } => search.execute_hybrid(*semantic_ratio)?, | ||||
|     }; | ||||
|     Ok((milli_result, semantic_hit_count)) | ||||
| } | ||||
| 
 | ||||
| struct AttributesFormat { | ||||
|     attributes_to_retrieve: Option<BTreeSet<String>>, | ||||
|     retrieve_vectors: RetrieveVectors, | ||||
| @@ -1033,131 +1075,191 @@ impl RetrieveVectors { | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| fn make_hits( | ||||
|     index: &Index, | ||||
|     rtxn: &RoTxn<'_>, | ||||
|     format: AttributesFormat, | ||||
|     matching_words: milli::MatchingWords, | ||||
|     documents_ids: Vec<u32>, | ||||
|     document_scores: Vec<Vec<ScoreDetails>>, | ||||
| ) -> Result<Vec<SearchHit>, MeilisearchHttpError> { | ||||
|     let fields_ids_map = index.fields_ids_map(rtxn).unwrap(); | ||||
|     let displayed_ids = | ||||
|         index.displayed_fields_ids(rtxn)?.map(|fields| fields.into_iter().collect::<BTreeSet<_>>()); | ||||
| struct HitMaker<'a> { | ||||
|     index: &'a Index, | ||||
|     rtxn: &'a RoTxn<'a>, | ||||
|     fields_ids_map: FieldsIdsMap, | ||||
|     displayed_ids: BTreeSet<FieldId>, | ||||
|     vectors_fid: Option<FieldId>, | ||||
|     retrieve_vectors: RetrieveVectors, | ||||
|     to_retrieve_ids: BTreeSet<FieldId>, | ||||
|     embedding_configs: Vec<milli::index::IndexEmbeddingConfig>, | ||||
|     formatter_builder: MatcherBuilder<'a>, | ||||
|     formatted_options: BTreeMap<FieldId, FormatOptions>, | ||||
|     show_ranking_score: bool, | ||||
|     show_ranking_score_details: bool, | ||||
|     sort: Option<Vec<String>>, | ||||
|     show_matches_position: bool, | ||||
| } | ||||
| 
 | ||||
|     let vectors_fid = fields_ids_map.id(milli::vector::parsed_vectors::RESERVED_VECTORS_FIELD_NAME); | ||||
| impl<'a> HitMaker<'a> { | ||||
|     pub fn tokenizer<'b>( | ||||
|         script_lang_map: &'b HashMap<milli::tokenizer::Script, Vec<milli::tokenizer::Language>>, | ||||
|         dictionary: Option<&'b [&'b str]>, | ||||
|         separators: Option<&'b [&'b str]>, | ||||
|     ) -> milli::tokenizer::Tokenizer<'b> { | ||||
|         let mut tokenizer_builder = TokenizerBuilder::default(); | ||||
|         tokenizer_builder.create_char_map(true); | ||||
|         if !script_lang_map.is_empty() { | ||||
|             tokenizer_builder.allow_list(script_lang_map); | ||||
|         } | ||||
| 
 | ||||
|     let vectors_is_hidden = match (&displayed_ids, vectors_fid) { | ||||
|         // displayed_ids is a wildcard, so `_vectors` can be displayed regardless of its fid
 | ||||
|         (None, _) => false, | ||||
|         // displayed_ids is a finite list, and `_vectors` cannot be part of it because it is not an existing field
 | ||||
|         (Some(_), None) => true, | ||||
|         // displayed_ids is a finit list, so hide if `_vectors` is not part of it
 | ||||
|         (Some(map), Some(vectors_fid)) => map.contains(&vectors_fid), | ||||
|     }; | ||||
|         if let Some(separators) = separators { | ||||
|             tokenizer_builder.separators(separators); | ||||
|         } | ||||
| 
 | ||||
|     let retrieve_vectors = if let RetrieveVectors::Retrieve = format.retrieve_vectors { | ||||
|         if vectors_is_hidden { | ||||
|             RetrieveVectors::Hide | ||||
|         if let Some(dictionary) = dictionary { | ||||
|             tokenizer_builder.words_dict(dictionary); | ||||
|         } | ||||
| 
 | ||||
|         tokenizer_builder.into_tokenizer() | ||||
|     } | ||||
| 
 | ||||
|     pub fn formatter_builder( | ||||
|         matching_words: milli::MatchingWords, | ||||
|         tokenizer: milli::tokenizer::Tokenizer<'_>, | ||||
|     ) -> MatcherBuilder<'_> { | ||||
|         let formatter_builder = MatcherBuilder::new(matching_words, tokenizer); | ||||
| 
 | ||||
|         formatter_builder | ||||
|     } | ||||
| 
 | ||||
|     pub fn new( | ||||
|         index: &'a Index, | ||||
|         rtxn: &'a RoTxn<'a>, | ||||
|         format: AttributesFormat, | ||||
|         mut formatter_builder: MatcherBuilder<'a>, | ||||
|     ) -> Result<Self, MeilisearchHttpError> { | ||||
|         formatter_builder.crop_marker(format.crop_marker); | ||||
|         formatter_builder.highlight_prefix(format.highlight_pre_tag); | ||||
|         formatter_builder.highlight_suffix(format.highlight_post_tag); | ||||
| 
 | ||||
|         let fields_ids_map = index.fields_ids_map(rtxn)?; | ||||
|         let displayed_ids = index | ||||
|             .displayed_fields_ids(rtxn)? | ||||
|             .map(|fields| fields.into_iter().collect::<BTreeSet<_>>()); | ||||
| 
 | ||||
|         let vectors_fid = | ||||
|             fields_ids_map.id(milli::vector::parsed_vectors::RESERVED_VECTORS_FIELD_NAME); | ||||
| 
 | ||||
|         let vectors_is_hidden = match (&displayed_ids, vectors_fid) { | ||||
|             // displayed_ids is a wildcard, so `_vectors` can be displayed regardless of its fid
 | ||||
|             (None, _) => false, | ||||
|             // displayed_ids is a finite list, and `_vectors` cannot be part of it because it is not an existing field
 | ||||
|             (Some(_), None) => true, | ||||
|             // displayed_ids is a finit list, so hide if `_vectors` is not part of it
 | ||||
|             (Some(map), Some(vectors_fid)) => map.contains(&vectors_fid), | ||||
|         }; | ||||
| 
 | ||||
|         let displayed_ids = | ||||
|             displayed_ids.unwrap_or_else(|| fields_ids_map.iter().map(|(id, _)| id).collect()); | ||||
| 
 | ||||
|         let retrieve_vectors = if let RetrieveVectors::Retrieve = format.retrieve_vectors { | ||||
|             if vectors_is_hidden { | ||||
|                 RetrieveVectors::Hide | ||||
|             } else { | ||||
|                 RetrieveVectors::Retrieve | ||||
|             } | ||||
|         } else { | ||||
|             RetrieveVectors::Retrieve | ||||
|         } | ||||
|     } else { | ||||
|         format.retrieve_vectors | ||||
|     }; | ||||
|             format.retrieve_vectors | ||||
|         }; | ||||
| 
 | ||||
|     let displayed_ids = | ||||
|         displayed_ids.unwrap_or_else(|| fields_ids_map.iter().map(|(id, _)| id).collect()); | ||||
|     let fids = |attrs: &BTreeSet<String>| { | ||||
|         let mut ids = BTreeSet::new(); | ||||
|         for attr in attrs { | ||||
|             if attr == "*" { | ||||
|                 ids.clone_from(&displayed_ids); | ||||
|                 break; | ||||
|         let fids = |attrs: &BTreeSet<String>| { | ||||
|             let mut ids = BTreeSet::new(); | ||||
|             for attr in attrs { | ||||
|                 if attr == "*" { | ||||
|                     ids.clone_from(&displayed_ids); | ||||
|                     break; | ||||
|                 } | ||||
| 
 | ||||
|                 if let Some(id) = fields_ids_map.id(attr) { | ||||
|                     ids.insert(id); | ||||
|                 } | ||||
|             } | ||||
|             ids | ||||
|         }; | ||||
|         let to_retrieve_ids: BTreeSet<_> = format | ||||
|             .attributes_to_retrieve | ||||
|             .as_ref() | ||||
|             .map(fids) | ||||
|             .unwrap_or_else(|| displayed_ids.clone()) | ||||
|             .intersection(&displayed_ids) | ||||
|             .cloned() | ||||
|             .collect(); | ||||
| 
 | ||||
|             if let Some(id) = fields_ids_map.id(attr) { | ||||
|                 ids.insert(id); | ||||
|             } | ||||
|         } | ||||
|         ids | ||||
|     }; | ||||
|     let to_retrieve_ids: BTreeSet<_> = format | ||||
|         .attributes_to_retrieve | ||||
|         .as_ref() | ||||
|         .map(fids) | ||||
|         .unwrap_or_else(|| displayed_ids.clone()) | ||||
|         .intersection(&displayed_ids) | ||||
|         .cloned() | ||||
|         .collect(); | ||||
|         let attr_to_highlight = format.attributes_to_highlight.unwrap_or_default(); | ||||
|         let attr_to_crop = format.attributes_to_crop.unwrap_or_default(); | ||||
|         let formatted_options = compute_formatted_options( | ||||
|             &attr_to_highlight, | ||||
|             &attr_to_crop, | ||||
|             format.crop_length, | ||||
|             &to_retrieve_ids, | ||||
|             &fields_ids_map, | ||||
|             &displayed_ids, | ||||
|         ); | ||||
| 
 | ||||
|     let attr_to_highlight = format.attributes_to_highlight.unwrap_or_default(); | ||||
|     let attr_to_crop = format.attributes_to_crop.unwrap_or_default(); | ||||
|     let formatted_options = compute_formatted_options( | ||||
|         &attr_to_highlight, | ||||
|         &attr_to_crop, | ||||
|         format.crop_length, | ||||
|         &to_retrieve_ids, | ||||
|         &fields_ids_map, | ||||
|         &displayed_ids, | ||||
|     ); | ||||
|     let mut tokenizer_builder = TokenizerBuilder::default(); | ||||
|     tokenizer_builder.create_char_map(true); | ||||
|     let script_lang_map = index.script_language(rtxn)?; | ||||
|     if !script_lang_map.is_empty() { | ||||
|         tokenizer_builder.allow_list(&script_lang_map); | ||||
|         let embedding_configs = index.embedding_configs(rtxn)?; | ||||
| 
 | ||||
|         Ok(Self { | ||||
|             index, | ||||
|             rtxn, | ||||
|             fields_ids_map, | ||||
|             displayed_ids, | ||||
|             vectors_fid, | ||||
|             retrieve_vectors, | ||||
|             to_retrieve_ids, | ||||
|             embedding_configs, | ||||
|             formatter_builder, | ||||
|             formatted_options, | ||||
|             show_ranking_score: format.show_ranking_score, | ||||
|             show_ranking_score_details: format.show_ranking_score_details, | ||||
|             show_matches_position: format.show_matches_position, | ||||
|             sort: format.sort, | ||||
|         }) | ||||
|     } | ||||
|     let separators = index.allowed_separators(rtxn)?; | ||||
|     let separators: Option<Vec<_>> = | ||||
|         separators.as_ref().map(|x| x.iter().map(String::as_str).collect()); | ||||
|     if let Some(ref separators) = separators { | ||||
|         tokenizer_builder.separators(separators); | ||||
|     } | ||||
|     let dictionary = index.dictionary(rtxn)?; | ||||
|     let dictionary: Option<Vec<_>> = | ||||
|         dictionary.as_ref().map(|x| x.iter().map(String::as_str).collect()); | ||||
|     if let Some(ref dictionary) = dictionary { | ||||
|         tokenizer_builder.words_dict(dictionary); | ||||
|     } | ||||
|     let mut formatter_builder = MatcherBuilder::new(matching_words, tokenizer_builder.build()); | ||||
|     formatter_builder.crop_marker(format.crop_marker); | ||||
|     formatter_builder.highlight_prefix(format.highlight_pre_tag); | ||||
|     formatter_builder.highlight_suffix(format.highlight_post_tag); | ||||
|     let mut documents = Vec::new(); | ||||
|     let embedding_configs = index.embedding_configs(rtxn)?; | ||||
|     let documents_iter = index.documents(rtxn, documents_ids)?; | ||||
|     for ((id, obkv), score) in documents_iter.into_iter().zip(document_scores.into_iter()) { | ||||
| 
 | ||||
|     pub fn make_hit( | ||||
|         &self, | ||||
|         id: u32, | ||||
|         score: &[ScoreDetails], | ||||
|     ) -> Result<SearchHit, MeilisearchHttpError> { | ||||
|         let (_, obkv) = | ||||
|             self.index.iter_documents(self.rtxn, std::iter::once(id))?.next().unwrap()?; | ||||
| 
 | ||||
|         // First generate a document with all the displayed fields
 | ||||
|         let displayed_document = make_document(&displayed_ids, &fields_ids_map, obkv)?; | ||||
|         let displayed_document = make_document(&self.displayed_ids, &self.fields_ids_map, obkv)?; | ||||
| 
 | ||||
|         let add_vectors_fid = | ||||
|             vectors_fid.filter(|_fid| retrieve_vectors == RetrieveVectors::Retrieve); | ||||
|             self.vectors_fid.filter(|_fid| self.retrieve_vectors == RetrieveVectors::Retrieve); | ||||
| 
 | ||||
|         // select the attributes to retrieve
 | ||||
|         let attributes_to_retrieve = to_retrieve_ids | ||||
|         let attributes_to_retrieve = self | ||||
|             .to_retrieve_ids | ||||
|             .iter() | ||||
|             // skip the vectors_fid if RetrieveVectors::Hide
 | ||||
|             .filter(|fid| match vectors_fid { | ||||
|             .filter(|fid| match self.vectors_fid { | ||||
|                 Some(vectors_fid) => { | ||||
|                     !(retrieve_vectors == RetrieveVectors::Hide && **fid == vectors_fid) | ||||
|                     !(self.retrieve_vectors == RetrieveVectors::Hide && **fid == vectors_fid) | ||||
|                 } | ||||
|                 None => true, | ||||
|             }) | ||||
|             // need to retrieve the existing `_vectors` field if the `RetrieveVectors::Retrieve`
 | ||||
|             .chain(add_vectors_fid.iter()) | ||||
|             .map(|&fid| fields_ids_map.name(fid).expect("Missing field name")); | ||||
|             .map(|&fid| self.fields_ids_map.name(fid).expect("Missing field name")); | ||||
| 
 | ||||
|         let mut document = | ||||
|             permissive_json_pointer::select_values(&displayed_document, attributes_to_retrieve); | ||||
| 
 | ||||
|         if retrieve_vectors == RetrieveVectors::Retrieve { | ||||
|         if self.retrieve_vectors == RetrieveVectors::Retrieve { | ||||
|             // Clippy is wrong
 | ||||
|             #[allow(clippy::manual_unwrap_or_default)] | ||||
|             let mut vectors = match document.remove("_vectors") { | ||||
|                 Some(Value::Object(map)) => map, | ||||
|                 _ => Default::default(), | ||||
|             }; | ||||
|             for (name, vector) in index.embeddings(rtxn, id)? { | ||||
|                 let user_provided = embedding_configs | ||||
|             for (name, vector) in self.index.embeddings(self.rtxn, id)? { | ||||
|                 let user_provided = self | ||||
|                     .embedding_configs | ||||
|                     .iter() | ||||
|                     .find(|conf| conf.name == name) | ||||
|                     .is_some_and(|conf| conf.user_provided.contains(id)); | ||||
| @@ -1170,21 +1272,21 @@ fn make_hits( | ||||
| 
 | ||||
|         let (matches_position, formatted) = format_fields( | ||||
|             &displayed_document, | ||||
|             &fields_ids_map, | ||||
|             &formatter_builder, | ||||
|             &formatted_options, | ||||
|             format.show_matches_position, | ||||
|             &displayed_ids, | ||||
|             &self.fields_ids_map, | ||||
|             &self.formatter_builder, | ||||
|             &self.formatted_options, | ||||
|             self.show_matches_position, | ||||
|             &self.displayed_ids, | ||||
|         )?; | ||||
| 
 | ||||
|         if let Some(sort) = format.sort.as_ref() { | ||||
|         if let Some(sort) = self.sort.as_ref() { | ||||
|             insert_geo_distance(sort, &mut document); | ||||
|         } | ||||
| 
 | ||||
|         let ranking_score = | ||||
|             format.show_ranking_score.then(|| ScoreDetails::global_score(score.iter())); | ||||
|             self.show_ranking_score.then(|| ScoreDetails::global_score(score.iter())); | ||||
|         let ranking_score_details = | ||||
|             format.show_ranking_score_details.then(|| ScoreDetails::to_json_map(score.iter())); | ||||
|             self.show_ranking_score_details.then(|| ScoreDetails::to_json_map(score.iter())); | ||||
| 
 | ||||
|         let hit = SearchHit { | ||||
|             document, | ||||
| @@ -1193,7 +1295,38 @@ fn make_hits( | ||||
|             ranking_score_details, | ||||
|             ranking_score, | ||||
|         }; | ||||
|         documents.push(hit); | ||||
| 
 | ||||
|         Ok(hit) | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| fn make_hits<'a>( | ||||
|     index: &Index, | ||||
|     rtxn: &RoTxn<'_>, | ||||
|     format: AttributesFormat, | ||||
|     matching_words: milli::MatchingWords, | ||||
|     documents_ids_scores: impl Iterator<Item = (u32, &'a Vec<ScoreDetails>)> + 'a, | ||||
| ) -> Result<Vec<SearchHit>, MeilisearchHttpError> { | ||||
|     let mut documents = Vec::new(); | ||||
| 
 | ||||
|     let script_lang_map = index.script_language(rtxn)?; | ||||
| 
 | ||||
|     let dictionary = index.dictionary(rtxn)?; | ||||
|     let dictionary: Option<Vec<_>> = | ||||
|         dictionary.as_ref().map(|x| x.iter().map(String::as_str).collect()); | ||||
|     let separators = index.allowed_separators(rtxn)?; | ||||
|     let separators: Option<Vec<_>> = | ||||
|         separators.as_ref().map(|x| x.iter().map(String::as_str).collect()); | ||||
| 
 | ||||
|     let tokenizer = | ||||
|         HitMaker::tokenizer(&script_lang_map, dictionary.as_deref(), separators.as_deref()); | ||||
| 
 | ||||
|     let formatter_builder = HitMaker::formatter_builder(matching_words, tokenizer); | ||||
| 
 | ||||
|     let hit_maker = HitMaker::new(index, rtxn, format, formatter_builder)?; | ||||
| 
 | ||||
|     for (id, score) in documents_ids_scores { | ||||
|         documents.push(hit_maker.make_hit(id, score)?); | ||||
|     } | ||||
|     Ok(documents) | ||||
| } | ||||
| @@ -1309,7 +1442,13 @@ pub fn perform_similar( | ||||
|         show_ranking_score_details, | ||||
|     }; | ||||
| 
 | ||||
|     let hits = make_hits(index, &rtxn, format, Default::default(), documents_ids, document_scores)?; | ||||
|     let hits = make_hits( | ||||
|         index, | ||||
|         &rtxn, | ||||
|         format, | ||||
|         Default::default(), | ||||
|         documents_ids.iter().copied().zip(document_scores.iter()), | ||||
|     )?; | ||||
| 
 | ||||
|     let max_total_hits = index | ||||
|         .pagination_max_total_hits(&rtxn) | ||||
| @@ -1482,10 +1621,10 @@ fn make_document( | ||||
|     Ok(document) | ||||
| } | ||||
| 
 | ||||
| fn format_fields<'a>( | ||||
| fn format_fields( | ||||
|     document: &Document, | ||||
|     field_ids_map: &FieldsIdsMap, | ||||
|     builder: &'a MatcherBuilder<'a>, | ||||
|     builder: &MatcherBuilder<'_>, | ||||
|     formatted_options: &BTreeMap<FieldId, FormatOptions>, | ||||
|     compute_matches: bool, | ||||
|     displayable_ids: &BTreeSet<FieldId>, | ||||
| @@ -1540,9 +1679,9 @@ fn format_fields<'a>( | ||||
|     Ok((matches_position, document)) | ||||
| } | ||||
| 
 | ||||
| fn format_value<'a>( | ||||
| fn format_value( | ||||
|     value: Value, | ||||
|     builder: &'a MatcherBuilder<'a>, | ||||
|     builder: &MatcherBuilder<'_>, | ||||
|     format_options: Option<FormatOptions>, | ||||
|     infos: &mut Vec<MatchBounds>, | ||||
|     compute_matches: bool, | ||||
							
								
								
									
										823
									
								
								meilisearch/src/search/ranking_rules.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										823
									
								
								meilisearch/src/search/ranking_rules.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,823 @@ | ||||
| use std::collections::HashMap; | ||||
| use std::fmt::Write; | ||||
|  | ||||
| use itertools::Itertools as _; | ||||
| use meilisearch_types::error::{Code, ResponseError}; | ||||
| use meilisearch_types::milli::{AscDesc, Criterion, Member, TermsMatchingStrategy}; | ||||
|  | ||||
| pub struct RankingRules { | ||||
|     canonical_criteria: Vec<Criterion>, | ||||
|     canonical_sort: Option<Vec<AscDesc>>, | ||||
|     canonicalization_actions: Vec<CanonicalizationAction>, | ||||
|     source_criteria: Vec<Criterion>, | ||||
|     source_sort: Option<Vec<AscDesc>>, | ||||
| } | ||||
|  | ||||
| pub enum CanonicalizationAction { | ||||
|     PrependedWords { | ||||
|         prepended_index: RankingRuleSource, | ||||
|     }, | ||||
|     RemovedDuplicate { | ||||
|         earlier_occurrence: RankingRuleSource, | ||||
|         removed_occurrence: RankingRuleSource, | ||||
|     }, | ||||
|     RemovedWords { | ||||
|         reason: RemoveWords, | ||||
|         removed_occurrence: RankingRuleSource, | ||||
|     }, | ||||
|     RemovedPlaceholder { | ||||
|         removed_occurrence: RankingRuleSource, | ||||
|     }, | ||||
|     TruncatedVector { | ||||
|         vector_rule: RankingRuleSource, | ||||
|         truncated_from: RankingRuleSource, | ||||
|     }, | ||||
|     RemovedVector { | ||||
|         vector_rule: RankingRuleSource, | ||||
|         removed_occurrence: RankingRuleSource, | ||||
|     }, | ||||
|     RemovedSort { | ||||
|         removed_occurrence: RankingRuleSource, | ||||
|     }, | ||||
| } | ||||
|  | ||||
| pub enum RemoveWords { | ||||
|     WasPrepended, | ||||
|     MatchingStrategyAll, | ||||
| } | ||||
|  | ||||
| impl std::fmt::Display for RemoveWords { | ||||
|     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | ||||
|         let reason = match self { | ||||
|             RemoveWords::WasPrepended => "it was previously prepended", | ||||
|             RemoveWords::MatchingStrategyAll => "`query.matchingWords` is set to `all`", | ||||
|         }; | ||||
|         f.write_str(reason) | ||||
|     } | ||||
| } | ||||
|  | ||||
| pub enum CanonicalizationKind { | ||||
|     Placeholder, | ||||
|     Keyword, | ||||
|     Vector, | ||||
| } | ||||
|  | ||||
| pub struct CompatibilityError { | ||||
|     previous: RankingRule, | ||||
|     current: RankingRule, | ||||
| } | ||||
| impl CompatibilityError { | ||||
|     pub(crate) fn to_response_error( | ||||
|         &self, | ||||
|         ranking_rules: &RankingRules, | ||||
|         previous_ranking_rules: &RankingRules, | ||||
|         query_index: usize, | ||||
|         previous_query_index: usize, | ||||
|         index_uid: &str, | ||||
|         previous_index_uid: &str, | ||||
|     ) -> meilisearch_types::error::ResponseError { | ||||
|         let rule = self.current.as_string( | ||||
|             &ranking_rules.canonical_criteria, | ||||
|             &ranking_rules.canonical_sort, | ||||
|             query_index, | ||||
|             index_uid, | ||||
|         ); | ||||
|         let previous_rule = self.previous.as_string( | ||||
|             &previous_ranking_rules.canonical_criteria, | ||||
|             &previous_ranking_rules.canonical_sort, | ||||
|             previous_query_index, | ||||
|             previous_index_uid, | ||||
|         ); | ||||
|  | ||||
|         let canonicalization_actions = ranking_rules.canonicalization_notes(); | ||||
|         let previous_canonicalization_actions = previous_ranking_rules.canonicalization_notes(); | ||||
|  | ||||
|         let mut msg = String::new(); | ||||
|         let reason = self.reason(); | ||||
|         let _ = writeln!( | ||||
|             &mut msg, | ||||
|             "The results of queries #{previous_query_index} and #{query_index} are incompatible: " | ||||
|         ); | ||||
|         let _ = writeln!(&mut msg, "  1. {previous_rule}"); | ||||
|         let _ = writeln!(&mut msg, "  2. {rule}"); | ||||
|         let _ = writeln!(&mut msg, "  - {reason}"); | ||||
|  | ||||
|         if !previous_canonicalization_actions.is_empty() { | ||||
|             let _ = write!(&mut msg, "  - note: The ranking rules of query #{previous_query_index} were modified during canonicalization:\n{previous_canonicalization_actions}"); | ||||
|         } | ||||
|  | ||||
|         if !canonicalization_actions.is_empty() { | ||||
|             let _ = write!(&mut msg, "  - note: The ranking rules of query #{query_index} were modified during canonicalization:\n{canonicalization_actions}"); | ||||
|         } | ||||
|  | ||||
|         ResponseError::from_msg(msg, Code::InvalidMultiSearchQueryRankingRules) | ||||
|     } | ||||
|     pub fn reason(&self) -> &'static str { | ||||
|         match (self.previous.kind, self.current.kind) { | ||||
|             (RankingRuleKind::Relevancy, RankingRuleKind::AscendingSort) | ||||
|             | (RankingRuleKind::Relevancy, RankingRuleKind::DescendingSort) | ||||
|             | (RankingRuleKind::AscendingSort, RankingRuleKind::Relevancy) | ||||
|             | (RankingRuleKind::DescendingSort, RankingRuleKind::Relevancy) => { | ||||
|                 "cannot compare a relevancy rule with a sort rule" | ||||
|             } | ||||
|  | ||||
|             (RankingRuleKind::Relevancy, RankingRuleKind::AscendingGeoSort) | ||||
|             | (RankingRuleKind::Relevancy, RankingRuleKind::DescendingGeoSort) | ||||
|             | (RankingRuleKind::AscendingGeoSort, RankingRuleKind::Relevancy) | ||||
|             | (RankingRuleKind::DescendingGeoSort, RankingRuleKind::Relevancy) => { | ||||
|                 "cannot compare a relevancy rule with a geosort rule" | ||||
|             } | ||||
|  | ||||
|             (RankingRuleKind::AscendingSort, RankingRuleKind::DescendingSort) | ||||
|             | (RankingRuleKind::DescendingSort, RankingRuleKind::AscendingSort) => { | ||||
|                 "cannot compare two sort rules in opposite directions" | ||||
|             } | ||||
|  | ||||
|             (RankingRuleKind::AscendingSort, RankingRuleKind::AscendingGeoSort) | ||||
|             | (RankingRuleKind::AscendingSort, RankingRuleKind::DescendingGeoSort) | ||||
|             | (RankingRuleKind::DescendingSort, RankingRuleKind::AscendingGeoSort) | ||||
|             | (RankingRuleKind::DescendingSort, RankingRuleKind::DescendingGeoSort) | ||||
|             | (RankingRuleKind::AscendingGeoSort, RankingRuleKind::AscendingSort) | ||||
|             | (RankingRuleKind::AscendingGeoSort, RankingRuleKind::DescendingSort) | ||||
|             | (RankingRuleKind::DescendingGeoSort, RankingRuleKind::AscendingSort) | ||||
|             | (RankingRuleKind::DescendingGeoSort, RankingRuleKind::DescendingSort) => { | ||||
|                 "cannot compare a sort rule with a geosort rule" | ||||
|             } | ||||
|  | ||||
|             (RankingRuleKind::AscendingGeoSort, RankingRuleKind::DescendingGeoSort) | ||||
|             | (RankingRuleKind::DescendingGeoSort, RankingRuleKind::AscendingGeoSort) => { | ||||
|                 "cannot compare two geosort rules in opposite directions" | ||||
|             } | ||||
|             (RankingRuleKind::Relevancy, RankingRuleKind::Relevancy) | ||||
|             | (RankingRuleKind::AscendingSort, RankingRuleKind::AscendingSort) | ||||
|             | (RankingRuleKind::DescendingSort, RankingRuleKind::DescendingSort) | ||||
|             | (RankingRuleKind::AscendingGeoSort, RankingRuleKind::AscendingGeoSort) | ||||
|             | (RankingRuleKind::DescendingGeoSort, RankingRuleKind::DescendingGeoSort) => { | ||||
|                 "internal error, comparison should be possible" | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl RankingRules { | ||||
|     pub fn new( | ||||
|         criteria: Vec<Criterion>, | ||||
|         sort: Option<Vec<AscDesc>>, | ||||
|         terms_matching_strategy: TermsMatchingStrategy, | ||||
|         canonicalization_kind: CanonicalizationKind, | ||||
|     ) -> Self { | ||||
|         let (canonical_criteria, canonical_sort, canonicalization_actions) = | ||||
|             Self::canonicalize(&criteria, &sort, terms_matching_strategy, canonicalization_kind); | ||||
|         Self { | ||||
|             canonical_criteria, | ||||
|             canonical_sort, | ||||
|             canonicalization_actions, | ||||
|             source_criteria: criteria, | ||||
|             source_sort: sort, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     fn canonicalize( | ||||
|         criteria: &[Criterion], | ||||
|         sort: &Option<Vec<AscDesc>>, | ||||
|         terms_matching_strategy: TermsMatchingStrategy, | ||||
|         canonicalization_kind: CanonicalizationKind, | ||||
|     ) -> (Vec<Criterion>, Option<Vec<AscDesc>>, Vec<CanonicalizationAction>) { | ||||
|         match canonicalization_kind { | ||||
|             CanonicalizationKind::Placeholder => Self::canonicalize_placeholder(criteria, sort), | ||||
|             CanonicalizationKind::Keyword => { | ||||
|                 Self::canonicalize_keyword(criteria, sort, terms_matching_strategy) | ||||
|             } | ||||
|             CanonicalizationKind::Vector => Self::canonicalize_vector(criteria, sort), | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     fn canonicalize_placeholder( | ||||
|         criteria: &[Criterion], | ||||
|         sort_query: &Option<Vec<AscDesc>>, | ||||
|     ) -> (Vec<Criterion>, Option<Vec<AscDesc>>, Vec<CanonicalizationAction>) { | ||||
|         let mut sort = None; | ||||
|  | ||||
|         let mut sorted_fields = HashMap::new(); | ||||
|         let mut canonicalization_actions = Vec::new(); | ||||
|         let mut canonical_criteria = Vec::new(); | ||||
|         let mut canonical_sort = None; | ||||
|  | ||||
|         for (criterion_index, criterion) in criteria.iter().enumerate() { | ||||
|             match criterion.clone() { | ||||
|                 Criterion::Words | ||||
|                 | Criterion::Typo | ||||
|                 | Criterion::Proximity | ||||
|                 | Criterion::Attribute | ||||
|                 | Criterion::Exactness => { | ||||
|                     canonicalization_actions.push(CanonicalizationAction::RemovedPlaceholder { | ||||
|                         removed_occurrence: RankingRuleSource::Criterion(criterion_index), | ||||
|                     }) | ||||
|                 } | ||||
|  | ||||
|                 Criterion::Sort => { | ||||
|                     if let Some(previous_index) = sort { | ||||
|                         canonicalization_actions.push(CanonicalizationAction::RemovedDuplicate { | ||||
|                             earlier_occurrence: RankingRuleSource::Criterion(previous_index), | ||||
|                             removed_occurrence: RankingRuleSource::Criterion(criterion_index), | ||||
|                         }); | ||||
|                     } else if let Some(sort_query) = sort_query { | ||||
|                         sort = Some(criterion_index); | ||||
|                         canonical_criteria.push(criterion.clone()); | ||||
|                         canonical_sort = Some(canonicalize_sort( | ||||
|                             &mut sorted_fields, | ||||
|                             sort_query.as_slice(), | ||||
|                             criterion_index, | ||||
|                             &mut canonicalization_actions, | ||||
|                         )); | ||||
|                     } else { | ||||
|                         canonicalization_actions.push(CanonicalizationAction::RemovedSort { | ||||
|                             removed_occurrence: RankingRuleSource::Criterion(criterion_index), | ||||
|                         }) | ||||
|                     } | ||||
|                 } | ||||
|                 Criterion::Asc(s) | Criterion::Desc(s) => match sorted_fields.entry(s) { | ||||
|                     std::collections::hash_map::Entry::Occupied(entry) => canonicalization_actions | ||||
|                         .push(CanonicalizationAction::RemovedDuplicate { | ||||
|                             earlier_occurrence: *entry.get(), | ||||
|                             removed_occurrence: RankingRuleSource::Criterion(criterion_index), | ||||
|                         }), | ||||
|                     std::collections::hash_map::Entry::Vacant(entry) => { | ||||
|                         entry.insert(RankingRuleSource::Criterion(criterion_index)); | ||||
|                         canonical_criteria.push(criterion.clone()) | ||||
|                     } | ||||
|                 }, | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         (canonical_criteria, canonical_sort, canonicalization_actions) | ||||
|     } | ||||
|  | ||||
|     fn canonicalize_vector( | ||||
|         criteria: &[Criterion], | ||||
|         sort_query: &Option<Vec<AscDesc>>, | ||||
|     ) -> (Vec<Criterion>, Option<Vec<AscDesc>>, Vec<CanonicalizationAction>) { | ||||
|         let mut sort = None; | ||||
|  | ||||
|         let mut sorted_fields = HashMap::new(); | ||||
|         let mut canonicalization_actions = Vec::new(); | ||||
|         let mut canonical_criteria = Vec::new(); | ||||
|         let mut canonical_sort = None; | ||||
|  | ||||
|         let mut vector = None; | ||||
|  | ||||
|         'criteria: for (criterion_index, criterion) in criteria.iter().enumerate() { | ||||
|             match criterion.clone() { | ||||
|                 Criterion::Words | ||||
|                 | Criterion::Typo | ||||
|                 | Criterion::Proximity | ||||
|                 | Criterion::Attribute | ||||
|                 | Criterion::Exactness => match vector { | ||||
|                     Some(previous_occurrence) => { | ||||
|                         if sorted_fields.is_empty() { | ||||
|                             canonicalization_actions.push(CanonicalizationAction::RemovedVector { | ||||
|                                 vector_rule: RankingRuleSource::Criterion(previous_occurrence), | ||||
|                                 removed_occurrence: RankingRuleSource::Criterion(criterion_index), | ||||
|                             }); | ||||
|                         } else { | ||||
|                             canonicalization_actions.push( | ||||
|                                 CanonicalizationAction::TruncatedVector { | ||||
|                                     vector_rule: RankingRuleSource::Criterion(previous_occurrence), | ||||
|                                     truncated_from: RankingRuleSource::Criterion(criterion_index), | ||||
|                                 }, | ||||
|                             ); | ||||
|                             break 'criteria; | ||||
|                         } | ||||
|                     } | ||||
|                     None => { | ||||
|                         canonical_criteria.push(criterion.clone()); | ||||
|                         vector = Some(criterion_index); | ||||
|                     } | ||||
|                 }, | ||||
|  | ||||
|                 Criterion::Sort => { | ||||
|                     if let Some(previous_index) = sort { | ||||
|                         canonicalization_actions.push(CanonicalizationAction::RemovedDuplicate { | ||||
|                             earlier_occurrence: RankingRuleSource::Criterion(previous_index), | ||||
|                             removed_occurrence: RankingRuleSource::Criterion(criterion_index), | ||||
|                         }); | ||||
|                     } else if let Some(sort_query) = sort_query { | ||||
|                         sort = Some(criterion_index); | ||||
|                         canonical_criteria.push(criterion.clone()); | ||||
|                         canonical_sort = Some(canonicalize_sort( | ||||
|                             &mut sorted_fields, | ||||
|                             sort_query.as_slice(), | ||||
|                             criterion_index, | ||||
|                             &mut canonicalization_actions, | ||||
|                         )); | ||||
|                     } else { | ||||
|                         canonicalization_actions.push(CanonicalizationAction::RemovedSort { | ||||
|                             removed_occurrence: RankingRuleSource::Criterion(criterion_index), | ||||
|                         }) | ||||
|                     } | ||||
|                 } | ||||
|                 Criterion::Asc(s) | Criterion::Desc(s) => match sorted_fields.entry(s) { | ||||
|                     std::collections::hash_map::Entry::Occupied(entry) => canonicalization_actions | ||||
|                         .push(CanonicalizationAction::RemovedDuplicate { | ||||
|                             earlier_occurrence: *entry.get(), | ||||
|                             removed_occurrence: RankingRuleSource::Criterion(criterion_index), | ||||
|                         }), | ||||
|                     std::collections::hash_map::Entry::Vacant(entry) => { | ||||
|                         entry.insert(RankingRuleSource::Criterion(criterion_index)); | ||||
|                         canonical_criteria.push(criterion.clone()) | ||||
|                     } | ||||
|                 }, | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         (canonical_criteria, canonical_sort, canonicalization_actions) | ||||
|     } | ||||
|  | ||||
|     fn canonicalize_keyword( | ||||
|         criteria: &[Criterion], | ||||
|         sort_query: &Option<Vec<AscDesc>>, | ||||
|         terms_matching_strategy: TermsMatchingStrategy, | ||||
|     ) -> (Vec<Criterion>, Option<Vec<AscDesc>>, Vec<CanonicalizationAction>) { | ||||
|         let mut words = None; | ||||
|         let mut typo = None; | ||||
|         let mut proximity = None; | ||||
|         let mut sort = None; | ||||
|         let mut attribute = None; | ||||
|         let mut exactness = None; | ||||
|         let mut sorted_fields = HashMap::new(); | ||||
|  | ||||
|         let mut canonical_criteria = Vec::new(); | ||||
|         let mut canonical_sort = None; | ||||
|  | ||||
|         let mut canonicalization_actions = Vec::new(); | ||||
|  | ||||
|         for (criterion_index, criterion) in criteria.iter().enumerate() { | ||||
|             let criterion = criterion.clone(); | ||||
|             match criterion.clone() { | ||||
|                 Criterion::Words => { | ||||
|                     if let TermsMatchingStrategy::All = terms_matching_strategy { | ||||
|                         canonicalization_actions.push(CanonicalizationAction::RemovedWords { | ||||
|                             reason: RemoveWords::MatchingStrategyAll, | ||||
|                             removed_occurrence: RankingRuleSource::Criterion(criterion_index), | ||||
|                         }); | ||||
|                         continue; | ||||
|                     } | ||||
|                     if let Some(maybe_previous_index) = words { | ||||
|                         if let Some(previous_index) = maybe_previous_index { | ||||
|                             canonicalization_actions.push( | ||||
|                                 CanonicalizationAction::RemovedDuplicate { | ||||
|                                     earlier_occurrence: RankingRuleSource::Criterion( | ||||
|                                         previous_index, | ||||
|                                     ), | ||||
|                                     removed_occurrence: RankingRuleSource::Criterion( | ||||
|                                         criterion_index, | ||||
|                                     ), | ||||
|                                 }, | ||||
|                             ); | ||||
|                             continue; | ||||
|                         } | ||||
|                         canonicalization_actions.push(CanonicalizationAction::RemovedWords { | ||||
|                             reason: RemoveWords::WasPrepended, | ||||
|                             removed_occurrence: RankingRuleSource::Criterion(criterion_index), | ||||
|                         }) | ||||
|                     } | ||||
|                     words = Some(Some(criterion_index)); | ||||
|                     canonical_criteria.push(criterion); | ||||
|                 } | ||||
|                 Criterion::Typo => { | ||||
|                     canonicalize_criterion( | ||||
|                         criterion, | ||||
|                         criterion_index, | ||||
|                         terms_matching_strategy, | ||||
|                         &mut words, | ||||
|                         &mut canonicalization_actions, | ||||
|                         &mut canonical_criteria, | ||||
|                         &mut typo, | ||||
|                     ); | ||||
|                 } | ||||
|                 Criterion::Proximity => { | ||||
|                     canonicalize_criterion( | ||||
|                         criterion, | ||||
|                         criterion_index, | ||||
|                         terms_matching_strategy, | ||||
|                         &mut words, | ||||
|                         &mut canonicalization_actions, | ||||
|                         &mut canonical_criteria, | ||||
|                         &mut proximity, | ||||
|                     ); | ||||
|                 } | ||||
|                 Criterion::Attribute => { | ||||
|                     canonicalize_criterion( | ||||
|                         criterion, | ||||
|                         criterion_index, | ||||
|                         terms_matching_strategy, | ||||
|                         &mut words, | ||||
|                         &mut canonicalization_actions, | ||||
|                         &mut canonical_criteria, | ||||
|                         &mut attribute, | ||||
|                     ); | ||||
|                 } | ||||
|                 Criterion::Exactness => { | ||||
|                     canonicalize_criterion( | ||||
|                         criterion, | ||||
|                         criterion_index, | ||||
|                         terms_matching_strategy, | ||||
|                         &mut words, | ||||
|                         &mut canonicalization_actions, | ||||
|                         &mut canonical_criteria, | ||||
|                         &mut exactness, | ||||
|                     ); | ||||
|                 } | ||||
|  | ||||
|                 Criterion::Sort => { | ||||
|                     if let Some(previous_index) = sort { | ||||
|                         canonicalization_actions.push(CanonicalizationAction::RemovedDuplicate { | ||||
|                             earlier_occurrence: RankingRuleSource::Criterion(previous_index), | ||||
|                             removed_occurrence: RankingRuleSource::Criterion(criterion_index), | ||||
|                         }); | ||||
|                     } else if let Some(sort_query) = sort_query { | ||||
|                         sort = Some(criterion_index); | ||||
|                         canonical_criteria.push(criterion); | ||||
|                         canonical_sort = Some(canonicalize_sort( | ||||
|                             &mut sorted_fields, | ||||
|                             sort_query.as_slice(), | ||||
|                             criterion_index, | ||||
|                             &mut canonicalization_actions, | ||||
|                         )); | ||||
|                     } else { | ||||
|                         canonicalization_actions.push(CanonicalizationAction::RemovedSort { | ||||
|                             removed_occurrence: RankingRuleSource::Criterion(criterion_index), | ||||
|                         }) | ||||
|                     } | ||||
|                 } | ||||
|                 Criterion::Asc(s) | Criterion::Desc(s) => match sorted_fields.entry(s) { | ||||
|                     std::collections::hash_map::Entry::Occupied(entry) => canonicalization_actions | ||||
|                         .push(CanonicalizationAction::RemovedDuplicate { | ||||
|                             earlier_occurrence: *entry.get(), | ||||
|                             removed_occurrence: RankingRuleSource::Criterion(criterion_index), | ||||
|                         }), | ||||
|                     std::collections::hash_map::Entry::Vacant(entry) => { | ||||
|                         entry.insert(RankingRuleSource::Criterion(criterion_index)); | ||||
|                         canonical_criteria.push(criterion) | ||||
|                     } | ||||
|                 }, | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         (canonical_criteria, canonical_sort, canonicalization_actions) | ||||
|     } | ||||
|  | ||||
|     pub fn is_compatible_with(&self, previous: &Self) -> Result<(), CompatibilityError> { | ||||
|         for (current, previous) in self.coalesce_iterator().zip(previous.coalesce_iterator()) { | ||||
|             if current.kind != previous.kind { | ||||
|                 return Err(CompatibilityError { current, previous }); | ||||
|             } | ||||
|         } | ||||
|         Ok(()) | ||||
|     } | ||||
|  | ||||
|     pub fn constraint_count(&self) -> usize { | ||||
|         self.coalesce_iterator().count() | ||||
|     } | ||||
|  | ||||
|     fn coalesce_iterator(&self) -> impl Iterator<Item = RankingRule> + '_ { | ||||
|         self.canonical_criteria | ||||
|             .iter() | ||||
|             .enumerate() | ||||
|             .flat_map(|(criterion_index, criterion)| { | ||||
|                 RankingRule::from_criterion(criterion_index, criterion, &self.canonical_sort) | ||||
|             }) | ||||
|             .coalesce( | ||||
|                 |previous @ RankingRule { source: previous_source, kind: previous_kind }, | ||||
|                  current @ RankingRule { source, kind }| { | ||||
|                     match (previous_kind, kind) { | ||||
|                         (RankingRuleKind::Relevancy, RankingRuleKind::Relevancy) => { | ||||
|                             let merged_source = match (previous_source, source) { | ||||
|                                 ( | ||||
|                                     RankingRuleSource::Criterion(previous), | ||||
|                                     RankingRuleSource::Criterion(current), | ||||
|                                 ) => RankingRuleSource::CoalescedCriteria(previous, current), | ||||
|                                 ( | ||||
|                                     RankingRuleSource::CoalescedCriteria(begin, _end), | ||||
|                                     RankingRuleSource::Criterion(current), | ||||
|                                 ) => RankingRuleSource::CoalescedCriteria(begin, current), | ||||
|                                 (_previous, current) => current, | ||||
|                             }; | ||||
|                             Ok(RankingRule { source: merged_source, kind }) | ||||
|                         } | ||||
|                         _ => Err((previous, current)), | ||||
|                     } | ||||
|                 }, | ||||
|             ) | ||||
|     } | ||||
|  | ||||
|     fn canonicalization_notes(&self) -> String { | ||||
|         use CanonicalizationAction::*; | ||||
|         let mut notes = String::new(); | ||||
|         for (index, action) in self.canonicalization_actions.iter().enumerate() { | ||||
|             let index = index + 1; | ||||
|             let _ = match action { | ||||
|                 PrependedWords { prepended_index } => writeln!( | ||||
|                     &mut notes, | ||||
|                     "    {index}. Prepended rule `words` before first relevancy rule `{}` at position {}", | ||||
|                     prepended_index.rule_name(&self.source_criteria, &self.source_sort), | ||||
|                     prepended_index.rule_position() | ||||
|                 ), | ||||
|                 RemovedDuplicate { earlier_occurrence, removed_occurrence } => writeln!( | ||||
|                     &mut notes, | ||||
|                     "    {index}. Removed duplicate rule `{}` at position {} as it already appears at position {}", | ||||
|                     earlier_occurrence.rule_name(&self.source_criteria, &self.source_sort), | ||||
|                     removed_occurrence.rule_position(), | ||||
|                     earlier_occurrence.rule_position(), | ||||
|                 ), | ||||
|                 RemovedWords { reason, removed_occurrence } => writeln!( | ||||
|                     &mut notes, | ||||
|                     "    {index}. Removed rule `words` at position {} because {reason}", | ||||
|                     removed_occurrence.rule_position() | ||||
|                 ), | ||||
|                 RemovedPlaceholder { removed_occurrence } => writeln!( | ||||
|                     &mut notes, | ||||
|                     "    {index}. Removed relevancy rule `{}` at position {} because the query is a placeholder search (`q`: \"\")", | ||||
|                     removed_occurrence.rule_name(&self.source_criteria, &self.source_sort), | ||||
|                     removed_occurrence.rule_position() | ||||
|                 ), | ||||
|                 TruncatedVector { vector_rule, truncated_from } => writeln!( | ||||
|                     &mut notes, | ||||
|                     "    {index}. Truncated relevancy rule `{}` at position {} and later rules because the query is a vector search and `vector` was inserted at position {}", | ||||
|                     truncated_from.rule_name(&self.source_criteria, &self.source_sort), | ||||
|                     truncated_from.rule_position(), | ||||
|                     vector_rule.rule_position(), | ||||
|                 ), | ||||
|                 RemovedVector { vector_rule, removed_occurrence } => writeln!( | ||||
|                     &mut notes, | ||||
|                     "    {index}. Removed relevancy rule `{}` at position {} because the query is a vector search and `vector` was already inserted at position {}", | ||||
|                     removed_occurrence.rule_name(&self.source_criteria, &self.source_sort), | ||||
|                     removed_occurrence.rule_position(), | ||||
|                     vector_rule.rule_position(), | ||||
|                 ), | ||||
|                 RemovedSort { removed_occurrence } => writeln!( | ||||
|                     &mut notes, | ||||
|                     "   {index}. Removed rule `sort` at position {} because `query.sort` is empty", | ||||
| removed_occurrence.rule_position() | ||||
|                 ), | ||||
|             }; | ||||
|         } | ||||
|         notes | ||||
|     } | ||||
| } | ||||
|  | ||||
| fn canonicalize_sort( | ||||
|     sorted_fields: &mut HashMap<String, RankingRuleSource>, | ||||
|     sort_query: &[AscDesc], | ||||
|     criterion_index: usize, | ||||
|     canonicalization_actions: &mut Vec<CanonicalizationAction>, | ||||
| ) -> Vec<AscDesc> { | ||||
|     let mut geo_sorted = None; | ||||
|     let mut canonical_sort = Vec::new(); | ||||
|     for (sort_index, asc_desc) in sort_query.iter().enumerate() { | ||||
|         let source = RankingRuleSource::Sort { criterion_index, sort_index }; | ||||
|         let asc_desc = asc_desc.clone(); | ||||
|         match asc_desc.clone() { | ||||
|             AscDesc::Asc(Member::Field(s)) | AscDesc::Desc(Member::Field(s)) => { | ||||
|                 match sorted_fields.entry(s) { | ||||
|                     std::collections::hash_map::Entry::Occupied(entry) => canonicalization_actions | ||||
|                         .push(CanonicalizationAction::RemovedDuplicate { | ||||
|                             earlier_occurrence: *entry.get(), | ||||
|                             removed_occurrence: source, | ||||
|                         }), | ||||
|                     std::collections::hash_map::Entry::Vacant(entry) => { | ||||
|                         entry.insert(source); | ||||
|                         canonical_sort.push(asc_desc); | ||||
|                     } | ||||
|                 } | ||||
|             } | ||||
|             AscDesc::Asc(Member::Geo(_)) | AscDesc::Desc(Member::Geo(_)) => match geo_sorted { | ||||
|                 Some(earlier_sort_index) => { | ||||
|                     canonicalization_actions.push(CanonicalizationAction::RemovedDuplicate { | ||||
|                         earlier_occurrence: RankingRuleSource::Sort { | ||||
|                             criterion_index, | ||||
|                             sort_index: earlier_sort_index, | ||||
|                         }, | ||||
|                         removed_occurrence: source, | ||||
|                     }) | ||||
|                 } | ||||
|                 None => { | ||||
|                     geo_sorted = Some(sort_index); | ||||
|                     canonical_sort.push(asc_desc); | ||||
|                 } | ||||
|             }, | ||||
|         } | ||||
|     } | ||||
|     canonical_sort | ||||
| } | ||||
|  | ||||
| fn canonicalize_criterion( | ||||
|     criterion: Criterion, | ||||
|     criterion_index: usize, | ||||
|     terms_matching_strategy: TermsMatchingStrategy, | ||||
|     words: &mut Option<Option<usize>>, | ||||
|     canonicalization_actions: &mut Vec<CanonicalizationAction>, | ||||
|     canonical_criteria: &mut Vec<Criterion>, | ||||
|     rule: &mut Option<usize>, | ||||
| ) { | ||||
|     *words = match (terms_matching_strategy, words.take()) { | ||||
|         (TermsMatchingStrategy::All, words) => words, | ||||
|         (_, None) => { | ||||
|             // inject words | ||||
|             canonicalization_actions.push(CanonicalizationAction::PrependedWords { | ||||
|                 prepended_index: RankingRuleSource::Criterion(criterion_index), | ||||
|             }); | ||||
|             canonical_criteria.push(Criterion::Words); | ||||
|             Some(None) | ||||
|         } | ||||
|         (_, words) => words, | ||||
|     }; | ||||
|     if let Some(previous_index) = *rule { | ||||
|         canonicalization_actions.push(CanonicalizationAction::RemovedDuplicate { | ||||
|             earlier_occurrence: RankingRuleSource::Criterion(previous_index), | ||||
|             removed_occurrence: RankingRuleSource::Criterion(criterion_index), | ||||
|         }); | ||||
|     } else { | ||||
|         *rule = Some(criterion_index); | ||||
|         canonical_criteria.push(criterion) | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[derive(Debug, Clone, Copy, PartialEq, Eq)] | ||||
| enum RankingRuleKind { | ||||
|     Relevancy, | ||||
|     AscendingSort, | ||||
|     DescendingSort, | ||||
|     AscendingGeoSort, | ||||
|     DescendingGeoSort, | ||||
| } | ||||
|  | ||||
| #[derive(Debug, Clone, Copy)] | ||||
| pub struct RankingRule { | ||||
|     source: RankingRuleSource, | ||||
|     kind: RankingRuleKind, | ||||
| } | ||||
|  | ||||
| #[derive(Debug, Clone, Copy)] | ||||
| pub enum RankingRuleSource { | ||||
|     Criterion(usize), | ||||
|     CoalescedCriteria(usize, usize), | ||||
|     Sort { criterion_index: usize, sort_index: usize }, | ||||
| } | ||||
|  | ||||
| impl RankingRuleSource { | ||||
|     fn rule_name(&self, criteria: &[Criterion], sort: &Option<Vec<AscDesc>>) -> String { | ||||
|         match self { | ||||
|             RankingRuleSource::Criterion(criterion_index) => criteria | ||||
|                 .get(*criterion_index) | ||||
|                 .map(|c| c.to_string()) | ||||
|                 .unwrap_or_else(|| "unknown".into()), | ||||
|             RankingRuleSource::CoalescedCriteria(begin, end) => { | ||||
|                 let rules: Vec<_> = criteria | ||||
|                     .get(*begin..=*end) | ||||
|                     .iter() | ||||
|                     .flat_map(|c| c.iter()) | ||||
|                     .map(|c| c.to_string()) | ||||
|                     .collect(); | ||||
|                 rules.join(", ") | ||||
|             } | ||||
|             RankingRuleSource::Sort { criterion_index: _, sort_index } => { | ||||
|                 match sort.as_deref().and_then(|sort| sort.get(*sort_index)) { | ||||
|                     Some(sort) => match sort { | ||||
|                         AscDesc::Asc(Member::Field(field_name)) => format!("{field_name}:asc"), | ||||
|                         AscDesc::Desc(Member::Field(field_name)) => { | ||||
|                             format!("{field_name}:desc") | ||||
|                         } | ||||
|                         AscDesc::Asc(Member::Geo(_)) => "_geo(..):asc".to_string(), | ||||
|                         AscDesc::Desc(Member::Geo(_)) => "_geo(..):desc".to_string(), | ||||
|                     }, | ||||
|                     None => "unknown".into(), | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     fn rule_position(&self) -> String { | ||||
|         match self { | ||||
|             RankingRuleSource::Criterion(criterion_index) => { | ||||
|                 format!("#{criterion_index} in ranking rules") | ||||
|             } | ||||
|             RankingRuleSource::CoalescedCriteria(begin, end) => { | ||||
|                 format!("#{begin} to #{end} in ranking rules") | ||||
|             } | ||||
|             RankingRuleSource::Sort { criterion_index, sort_index } => format!( | ||||
|                 "#{sort_index} in `query.sort` (as `sort` is #{criterion_index} in ranking rules)" | ||||
|             ), | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl RankingRule { | ||||
|     fn from_criterion<'a>( | ||||
|         criterion_index: usize, | ||||
|         criterion: &'a Criterion, | ||||
|         sort: &'a Option<Vec<AscDesc>>, | ||||
|     ) -> impl Iterator<Item = Self> + 'a { | ||||
|         let kind = match criterion { | ||||
|             Criterion::Words | ||||
|             | Criterion::Typo | ||||
|             | Criterion::Proximity | ||||
|             | Criterion::Attribute | ||||
|             | Criterion::Exactness => RankingRuleKind::Relevancy, | ||||
|             Criterion::Asc(s) if s == "_geo" => RankingRuleKind::AscendingGeoSort, | ||||
|  | ||||
|             Criterion::Asc(_) => RankingRuleKind::AscendingSort, | ||||
|             Criterion::Desc(s) if s == "_geo" => RankingRuleKind::DescendingGeoSort, | ||||
|  | ||||
|             Criterion::Desc(_) => RankingRuleKind::DescendingSort, | ||||
|             Criterion::Sort => { | ||||
|                 return either::Right(sort.iter().flatten().enumerate().map( | ||||
|                     move |(rule_index, asc_desc)| { | ||||
|                         Self::from_asc_desc(asc_desc, criterion_index, rule_index) | ||||
|                     }, | ||||
|                 )) | ||||
|             } | ||||
|         }; | ||||
|  | ||||
|         either::Left(std::iter::once(Self { | ||||
|             source: RankingRuleSource::Criterion(criterion_index), | ||||
|             kind, | ||||
|         })) | ||||
|     } | ||||
|  | ||||
|     fn from_asc_desc(asc_desc: &AscDesc, sort_index: usize, rule_index_in_sort: usize) -> Self { | ||||
|         let kind = match asc_desc { | ||||
|             AscDesc::Asc(Member::Field(_)) => RankingRuleKind::AscendingSort, | ||||
|             AscDesc::Desc(Member::Field(_)) => RankingRuleKind::DescendingSort, | ||||
|             AscDesc::Asc(Member::Geo(_)) => RankingRuleKind::AscendingGeoSort, | ||||
|             AscDesc::Desc(Member::Geo(_)) => RankingRuleKind::DescendingGeoSort, | ||||
|         }; | ||||
|         Self { | ||||
|             source: RankingRuleSource::Sort { | ||||
|                 criterion_index: sort_index, | ||||
|                 sort_index: rule_index_in_sort, | ||||
|             }, | ||||
|             kind, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     fn as_string( | ||||
|         &self, | ||||
|         canonical_criteria: &[Criterion], | ||||
|         canonical_sort: &Option<Vec<AscDesc>>, | ||||
|         query_index: usize, | ||||
|         index_uid: &str, | ||||
|     ) -> String { | ||||
|         let kind = match self.kind { | ||||
|             RankingRuleKind::Relevancy => "relevancy", | ||||
|             RankingRuleKind::AscendingSort => "ascending sort", | ||||
|             RankingRuleKind::DescendingSort => "descending sort", | ||||
|             RankingRuleKind::AscendingGeoSort => "ascending geo sort", | ||||
|             RankingRuleKind::DescendingGeoSort => "descending geo sort", | ||||
|         }; | ||||
|         let rules = self.fetch_from_source(canonical_criteria, canonical_sort); | ||||
|  | ||||
|         let source = match self.source { | ||||
|             RankingRuleSource::Criterion(criterion_index) => format!("`queries[{query_index}]`, `{index_uid}.rankingRules[{criterion_index}]`"), | ||||
|             RankingRuleSource::CoalescedCriteria(begin, end) => format!("`queries[{query_index}]`, `{index_uid}.rankingRules[{begin}..={end}]`"), | ||||
|             RankingRuleSource::Sort { criterion_index, sort_index } => format!("`queries[{query_index}].sort[{sort_index}]`, `{index_uid}.rankingRules[{criterion_index}]`"), | ||||
|         }; | ||||
|  | ||||
|         format!("{source}: {kind} {rules}") | ||||
|     } | ||||
|  | ||||
|     fn fetch_from_source( | ||||
|         &self, | ||||
|         canonical_criteria: &[Criterion], | ||||
|         canonical_sort: &Option<Vec<AscDesc>>, | ||||
|     ) -> String { | ||||
|         let rule_name = match self.source { | ||||
|             RankingRuleSource::Criterion(index) => { | ||||
|                 canonical_criteria.get(index).map(|criterion| criterion.to_string()) | ||||
|             } | ||||
|             RankingRuleSource::CoalescedCriteria(begin, end) => { | ||||
|                 let rules: Vec<String> = canonical_criteria | ||||
|                     .get(begin..=end) | ||||
|                     .into_iter() | ||||
|                     .flat_map(|criteria| criteria.iter()) | ||||
|                     .map(|criterion| criterion.to_string()) | ||||
|                     .collect(); | ||||
|  | ||||
|                 (!rules.is_empty()).then_some(rules.join(", ")) | ||||
|             } | ||||
|             RankingRuleSource::Sort { criterion_index: _, sort_index } => canonical_sort | ||||
|                 .as_deref() | ||||
|                 .and_then(|canonical_sort| canonical_sort.get(sort_index)) | ||||
|                 .and_then(|asc_desc: &AscDesc| match asc_desc { | ||||
|                     AscDesc::Asc(Member::Field(s)) | AscDesc::Desc(Member::Field(s)) => { | ||||
|                         Some(format!("on field `{s}`")) | ||||
|                     } | ||||
|                     _ => None, | ||||
|                 }), | ||||
|         }; | ||||
|  | ||||
|         let rule_name = rule_name.unwrap_or_else(|| "default".into()); | ||||
|  | ||||
|         format!("rule(s) {rule_name}") | ||||
|     } | ||||
| } | ||||
| @@ -310,6 +310,23 @@ macro_rules! compute_authorized_single_search { | ||||
|                     tenant_token, | ||||
|                     key_content | ||||
|                 ); | ||||
|  | ||||
|                 // federated | ||||
|                 let (response, code) = server.multi_search(json!({"federation": {}, "queries" : [{"indexUid": "sales", "filter": $filter}]})).await; | ||||
|                 assert_eq!( | ||||
|                     200, code, | ||||
|                     "{} using tenant_token: {:?} generated with parent_key: {:?}", | ||||
|                     response, tenant_token, key_content | ||||
|                 ); | ||||
|                 assert_eq!( | ||||
|                     // same count as the search is federated over a single query | ||||
|                     $expected_count, | ||||
|                     response["hits"].as_array().unwrap().len(), | ||||
|                     "{} using tenant_token: {:?} generated with parent_key: {:?}", | ||||
|                     response, | ||||
|                     tenant_token, | ||||
|                     key_content | ||||
|                 ); | ||||
|             } | ||||
|         } | ||||
|     }; | ||||
| @@ -375,6 +392,25 @@ macro_rules! compute_authorized_multiple_search { | ||||
|                     tenant_token, | ||||
|                     key_content | ||||
|                 ); | ||||
|  | ||||
|                 let (response, code) = server.multi_search(json!({"federation": {}, "queries" : [ | ||||
|                     {"indexUid": "sales", "filter": $filter1}, | ||||
|                     {"indexUid": "products", "filter": $filter2}, | ||||
|                 ]})).await; | ||||
|                 assert_eq!( | ||||
|                     code, 200, | ||||
|                     "{} using tenant_token: {:?} generated with parent_key: {:?}", | ||||
|                     response, tenant_token, key_content | ||||
|                 ); | ||||
|                 assert_eq!( | ||||
|                     response["hits"].as_array().unwrap().len(), | ||||
|                     // sum of counts as the search is federated across to queries in different indexes | ||||
|                     $expected_count1 + $expected_count2, | ||||
|                     "{} using tenant_token: {:?} generated with parent_key: {:?}", | ||||
|                     response, | ||||
|                     tenant_token, | ||||
|                     key_content | ||||
|                 ); | ||||
|             } | ||||
|         } | ||||
|     }; | ||||
| @@ -433,6 +469,24 @@ macro_rules! compute_forbidden_single_search { | ||||
|                     "{} using tenant_token: {:?} generated with parent_key: {:?}", | ||||
|                     response, tenant_token, key_content | ||||
|                 ); | ||||
|  | ||||
|                 let (mut response, code) = server.multi_search(json!({"federation": {}, "queries" : [{"indexUid": "sales"}]})).await; | ||||
|                 if failed_query_index.is_none() && !response["message"].is_null() { | ||||
|                     response["message"] = serde_json::json!(null); | ||||
|                 } | ||||
|                 assert_eq!( | ||||
|                     response, | ||||
|                     invalid_response(failed_query_index), | ||||
|                     "{} using tenant_token: {:?} generated with parent_key: {:?}", | ||||
|                     response, | ||||
|                     tenant_token, | ||||
|                     key_content | ||||
|                 ); | ||||
|                 assert_eq!( | ||||
|                     code, 403, | ||||
|                     "{} using tenant_token: {:?} generated with parent_key: {:?}", | ||||
|                     response, tenant_token, key_content | ||||
|                 ); | ||||
|             } | ||||
|         } | ||||
|     }; | ||||
| @@ -494,6 +548,27 @@ macro_rules! compute_forbidden_multiple_search { | ||||
|                     "{} using tenant_token: {:?} generated with parent_key: {:?}", | ||||
|                     response, tenant_token, key_content | ||||
|                 ); | ||||
|  | ||||
|                 let (mut response, code) = server.multi_search(json!({"federation": {}, "queries" : [ | ||||
|                     {"indexUid": "sales"}, | ||||
|                     {"indexUid": "products"}, | ||||
|                 ]})).await; | ||||
|                 if failed_query_index.is_none() && !response["message"].is_null() { | ||||
|                     response["message"] = serde_json::json!(null); | ||||
|                 } | ||||
|                 assert_eq!( | ||||
|                     response, | ||||
|                     invalid_response(failed_query_index), | ||||
|                     "{} using tenant_token: {:?} generated with parent_key: {:?}", | ||||
|                     response, | ||||
|                     tenant_token, | ||||
|                     key_content | ||||
|                 ); | ||||
|                 assert_eq!( | ||||
|                     code, 403, | ||||
|                     "{} using tenant_token: {:?} generated with parent_key: {:?}", | ||||
|                     response, tenant_token, key_content | ||||
|                 ); | ||||
|             } | ||||
|         } | ||||
|     }; | ||||
|   | ||||
| @@ -132,6 +132,79 @@ static NESTED_DOCUMENTS: Lazy<Value> = Lazy::new(|| { | ||||
|     ]) | ||||
| }); | ||||
|  | ||||
| static FRUITS_DOCUMENTS: Lazy<Value> = Lazy::new(|| { | ||||
|     json!([ | ||||
|         { | ||||
|             "name": "Exclusive sale: green apple", | ||||
|             "id": "green-apple-boosted", | ||||
|             "BOOST": true | ||||
|         }, | ||||
|         { | ||||
|             "name": "Pear", | ||||
|             "id": "pear", | ||||
|         }, | ||||
|         { | ||||
|             "name": "Red apple gala", | ||||
|             "id": "red-apple-gala", | ||||
|         }, | ||||
|         { | ||||
|             "name": "Exclusive sale: Red Tomato", | ||||
|             "id": "red-tomatoes-boosted", | ||||
|             "BOOST": true | ||||
|         }, | ||||
|         { | ||||
|             "name": "Exclusive sale: Red delicious apple", | ||||
|             "id": "red-delicious-boosted", | ||||
|             "BOOST": true, | ||||
|         } | ||||
|     ]) | ||||
| }); | ||||
|  | ||||
| static VECTOR_DOCUMENTS: Lazy<Value> = Lazy::new(|| { | ||||
|     json!([ | ||||
|       { | ||||
|         "id": "A", | ||||
|         "description": "the dog barks at the cat", | ||||
|         "_vectors": { | ||||
|           // dimensions [canine, feline, young] | ||||
|           "animal": [0.9, 0.8, 0.05], | ||||
|           // dimensions [negative/positive, energy] | ||||
|           "sentiment": [-0.1, 0.55] | ||||
|         } | ||||
|       }, | ||||
|       { | ||||
|         "id": "B", | ||||
|         "description": "the kitten scratched the beagle", | ||||
|         "_vectors": { | ||||
|           // dimensions [canine, feline, young] | ||||
|           "animal": [0.8, 0.9, 0.5], | ||||
|           // dimensions [negative/positive, energy] | ||||
|           "sentiment": [-0.2, 0.65] | ||||
|         } | ||||
|       }, | ||||
|       { | ||||
|         "id": "C", | ||||
|         "description": "the dog had to stay alone today", | ||||
|         "_vectors": { | ||||
|           // dimensions [canine, feline, young] | ||||
|           "animal": [0.85, 0.02, 0.1], | ||||
|           // dimensions [negative/positive, energy] | ||||
|           "sentiment": [-1.0, 0.1] | ||||
|         } | ||||
|       }, | ||||
|       { | ||||
|         "id": "D", | ||||
|         "description": "the little boy pets the puppy", | ||||
|         "_vectors": { | ||||
|           // dimensions [canine, feline, young] | ||||
|           "animal": [0.8, 0.09, 0.8], | ||||
|           // dimensions [negative/positive, energy] | ||||
|           "sentiment": [0.8, 0.3] | ||||
|         } | ||||
|       }, | ||||
|     ]) | ||||
| }); | ||||
|  | ||||
| #[actix_rt::test] | ||||
| async fn simple_placeholder_search() { | ||||
|     let server = Server::new().await; | ||||
|   | ||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @@ -425,9 +425,6 @@ pub struct Sort { | ||||
|  | ||||
| impl PartialOrd for Sort { | ||||
|     fn partial_cmp(&self, other: &Self) -> Option<Ordering> { | ||||
|         if self.field_name != other.field_name { | ||||
|             return None; | ||||
|         } | ||||
|         if self.ascending != other.ascending { | ||||
|             return None; | ||||
|         } | ||||
| @@ -466,9 +463,6 @@ pub struct GeoSort { | ||||
|  | ||||
| impl PartialOrd for GeoSort { | ||||
|     fn partial_cmp(&self, other: &Self) -> Option<Ordering> { | ||||
|         if self.target_point != other.target_point { | ||||
|             return None; | ||||
|         } | ||||
|         if self.ascending != other.ascending { | ||||
|             return None; | ||||
|         } | ||||
|   | ||||
| @@ -46,7 +46,7 @@ impl<'m> MatcherBuilder<'m> { | ||||
|         self | ||||
|     } | ||||
|  | ||||
|     pub fn build<'t>(&'m self, text: &'t str) -> Matcher<'t, 'm> { | ||||
|     pub fn build<'t>(&self, text: &'t str) -> Matcher<'t, 'm, '_> { | ||||
|         let crop_marker = match &self.crop_marker { | ||||
|             Some(marker) => marker.as_str(), | ||||
|             None => DEFAULT_CROP_MARKER, | ||||
| @@ -105,19 +105,19 @@ pub struct MatchBounds { | ||||
|     pub length: usize, | ||||
| } | ||||
|  | ||||
| /// Structure used to analize a string, compute words that match, | ||||
| /// Structure used to analyze a string, compute words that match, | ||||
| /// and format the source string, returning a highlighted and cropped sub-string. | ||||
| pub struct Matcher<'t, 'm> { | ||||
| pub struct Matcher<'t, 'tokenizer, 'b> { | ||||
|     text: &'t str, | ||||
|     matching_words: &'m MatchingWords, | ||||
|     tokenizer: &'m Tokenizer<'m>, | ||||
|     crop_marker: &'m str, | ||||
|     highlight_prefix: &'m str, | ||||
|     highlight_suffix: &'m str, | ||||
|     matching_words: &'b MatchingWords, | ||||
|     tokenizer: &'b Tokenizer<'tokenizer>, | ||||
|     crop_marker: &'b str, | ||||
|     highlight_prefix: &'b str, | ||||
|     highlight_suffix: &'b str, | ||||
|     matches: Option<(Vec<Token<'t>>, Vec<Match>)>, | ||||
| } | ||||
|  | ||||
| impl<'t> Matcher<'t, '_> { | ||||
| impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_> { | ||||
|     /// Iterates over tokens and save any of them that matches the query. | ||||
|     fn compute_matches(&mut self) -> &mut Self { | ||||
|         /// some words are counted as matches only if they are close together and in the good order, | ||||
|   | ||||
		Reference in New Issue
	
	Block a user