mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-25 13:06:27 +00:00 
			
		
		
		
	Merge #4929
4929: Add facets support to federated r=Kerollmops a=dureuill # Pull Request ## Related issue - Fixes #4932 (sprint issue) - Fixes #4913 (user-opened issue) ## What does this PR do? See [public usage](https://meilisearch.notion.site/v1-11-Federated-search-59b30e03383c40729d7541a3dffb0069) > [!CAUTION] > This PR introduces a 🚨**breaking change**🚨: `queries.facets` when `federation` is present and non-`null` is now **an error** ### Implementation standpoint: - Facet distribution: fix issue where truncated facet distribution would have a wrong order - facet distribution: implement Display for OrderBy Co-authored-by: Louis Dureuil <louis@meilisearch.com>
This commit is contained in:
		| @@ -238,8 +238,14 @@ InvalidIndexLimit                     , InvalidRequest       , BAD_REQUEST ; | |||||||
| InvalidIndexOffset                    , InvalidRequest       , BAD_REQUEST ; | InvalidIndexOffset                    , InvalidRequest       , BAD_REQUEST ; | ||||||
| InvalidIndexPrimaryKey                , InvalidRequest       , BAD_REQUEST ; | InvalidIndexPrimaryKey                , InvalidRequest       , BAD_REQUEST ; | ||||||
| InvalidIndexUid                       , InvalidRequest       , BAD_REQUEST ; | InvalidIndexUid                       , InvalidRequest       , BAD_REQUEST ; | ||||||
|  | InvalidMultiSearchFacets              , InvalidRequest       , BAD_REQUEST ; | ||||||
|  | InvalidMultiSearchFacetsByIndex       , InvalidRequest       , BAD_REQUEST ; | ||||||
|  | InvalidMultiSearchFacetOrder          , InvalidRequest       , BAD_REQUEST ; | ||||||
| InvalidMultiSearchFederated           , InvalidRequest       , BAD_REQUEST ; | InvalidMultiSearchFederated           , InvalidRequest       , BAD_REQUEST ; | ||||||
| InvalidMultiSearchFederationOptions   , InvalidRequest       , BAD_REQUEST ; | InvalidMultiSearchFederationOptions   , InvalidRequest       , BAD_REQUEST ; | ||||||
|  | InvalidMultiSearchMaxValuesPerFacet   , InvalidRequest       , BAD_REQUEST ; | ||||||
|  | InvalidMultiSearchMergeFacets         , InvalidRequest       , BAD_REQUEST ; | ||||||
|  | InvalidMultiSearchQueryFacets         , InvalidRequest       , BAD_REQUEST ; | ||||||
| InvalidMultiSearchQueryPagination     , InvalidRequest       , BAD_REQUEST ; | InvalidMultiSearchQueryPagination     , InvalidRequest       , BAD_REQUEST ; | ||||||
| InvalidMultiSearchQueryRankingRules   , InvalidRequest       , BAD_REQUEST ; | InvalidMultiSearchQueryRankingRules   , InvalidRequest       , BAD_REQUEST ; | ||||||
| InvalidMultiSearchWeight              , InvalidRequest       , BAD_REQUEST ; | InvalidMultiSearchWeight              , InvalidRequest       , BAD_REQUEST ; | ||||||
|   | |||||||
| @@ -1,3 +1,4 @@ | |||||||
|  | use std::borrow::Borrow; | ||||||
| use std::error::Error; | use std::error::Error; | ||||||
| use std::fmt; | use std::fmt; | ||||||
| use std::str::FromStr; | use std::str::FromStr; | ||||||
| @@ -8,7 +9,7 @@ use crate::error::{Code, ErrorCode}; | |||||||
|  |  | ||||||
| /// An index uid is composed of only ascii alphanumeric characters, - and _, between 1 and 400 | /// An index uid is composed of only ascii alphanumeric characters, - and _, between 1 and 400 | ||||||
| /// bytes long | /// bytes long | ||||||
| #[derive(Debug, Clone, PartialEq, Eq, Deserr)] | #[derive(Debug, Clone, PartialEq, Eq, Deserr, PartialOrd, Ord)] | ||||||
| #[deserr(try_from(String) = IndexUid::try_from -> IndexUidFormatError)] | #[deserr(try_from(String) = IndexUid::try_from -> IndexUidFormatError)] | ||||||
| pub struct IndexUid(String); | pub struct IndexUid(String); | ||||||
|  |  | ||||||
| @@ -70,6 +71,12 @@ impl From<IndexUid> for String { | |||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
|  | impl Borrow<String> for IndexUid { | ||||||
|  |     fn borrow(&self) -> &String { | ||||||
|  |         &self.0 | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
| #[derive(Debug)] | #[derive(Debug)] | ||||||
| pub struct IndexUidFormatError { | pub struct IndexUidFormatError { | ||||||
|     pub invalid_uid: String, |     pub invalid_uid: String, | ||||||
|   | |||||||
| @@ -4,6 +4,7 @@ use byte_unit::{Byte, UnitType}; | |||||||
| use meilisearch_types::document_formats::{DocumentFormatError, PayloadType}; | use meilisearch_types::document_formats::{DocumentFormatError, PayloadType}; | ||||||
| use meilisearch_types::error::{Code, ErrorCode, ResponseError}; | use meilisearch_types::error::{Code, ErrorCode, ResponseError}; | ||||||
| use meilisearch_types::index_uid::{IndexUid, IndexUidFormatError}; | use meilisearch_types::index_uid::{IndexUid, IndexUidFormatError}; | ||||||
|  | use meilisearch_types::milli::OrderBy; | ||||||
| use serde_json::Value; | use serde_json::Value; | ||||||
| use tokio::task::JoinError; | use tokio::task::JoinError; | ||||||
|  |  | ||||||
| @@ -27,10 +28,20 @@ pub enum MeilisearchHttpError { | |||||||
|     EmptyFilter, |     EmptyFilter, | ||||||
|     #[error("Invalid syntax for the filter parameter: `expected {}, found: {1}`.", .0.join(", "))] |     #[error("Invalid syntax for the filter parameter: `expected {}, found: {1}`.", .0.join(", "))] | ||||||
|     InvalidExpression(&'static [&'static str], Value), |     InvalidExpression(&'static [&'static str], Value), | ||||||
|     #[error("Using `federationOptions` is not allowed in a non-federated search.\n Hint: remove `federationOptions` from query #{0} or add `federation: {{}}` to the request.")] |     #[error("Using `federationOptions` is not allowed in a non-federated search.\n - Hint: remove `federationOptions` from query #{0} or add `federation` to the request.")] | ||||||
|     FederationOptionsInNonFederatedRequest(usize), |     FederationOptionsInNonFederatedRequest(usize), | ||||||
|     #[error("Inside `.queries[{0}]`: Using pagination options is not allowed in federated queries.\n Hint: remove `{1}` from query #{0} or remove `federation: {{}}` from the request")] |     #[error("Inside `.queries[{0}]`: Using pagination options is not allowed in federated queries.\n - Hint: remove `{1}` from query #{0} or remove `federation` from the request\n - Hint: pass `federation.limit` and `federation.offset` for pagination in federated search")] | ||||||
|     PaginationInFederatedQuery(usize, &'static str), |     PaginationInFederatedQuery(usize, &'static str), | ||||||
|  |     #[error("Inside `.queries[{0}]`: Using facet options is not allowed in federated queries.\n - Hint: remove `facets` from query #{0} or remove `federation` from the request\n - Hint: pass `federation.facetsByIndex.{1}: {2:?}` for facets in federated search")] | ||||||
|  |     FacetsInFederatedQuery(usize, String, Vec<String>), | ||||||
|  |     #[error("Inconsistent order for values in facet `{facet}`: index `{previous_uid}` orders {previous_facet_order}, but index `{current_uid}` orders {index_facet_order}.\n - Hint: Remove `federation.mergeFacets` or change `faceting.sortFacetValuesBy` to be consistent in settings.")] | ||||||
|  |     InconsistentFacetOrder { | ||||||
|  |         facet: String, | ||||||
|  |         previous_facet_order: OrderBy, | ||||||
|  |         previous_uid: String, | ||||||
|  |         index_facet_order: OrderBy, | ||||||
|  |         current_uid: String, | ||||||
|  |     }, | ||||||
|     #[error("A {0} payload is missing.")] |     #[error("A {0} payload is missing.")] | ||||||
|     MissingPayload(PayloadType), |     MissingPayload(PayloadType), | ||||||
|     #[error("Too many search requests running at the same time: {0}. Retry after 10s.")] |     #[error("Too many search requests running at the same time: {0}. Retry after 10s.")] | ||||||
| @@ -96,6 +107,10 @@ impl ErrorCode for MeilisearchHttpError { | |||||||
|             MeilisearchHttpError::PaginationInFederatedQuery(_, _) => { |             MeilisearchHttpError::PaginationInFederatedQuery(_, _) => { | ||||||
|                 Code::InvalidMultiSearchQueryPagination |                 Code::InvalidMultiSearchQueryPagination | ||||||
|             } |             } | ||||||
|  |             MeilisearchHttpError::FacetsInFederatedQuery(..) => Code::InvalidMultiSearchQueryFacets, | ||||||
|  |             MeilisearchHttpError::InconsistentFacetOrder { .. } => { | ||||||
|  |                 Code::InvalidMultiSearchFacetOrder | ||||||
|  |             } | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| } | } | ||||||
|   | |||||||
| @@ -9,20 +9,24 @@ use std::vec::{IntoIter, Vec}; | |||||||
|  |  | ||||||
| use actix_http::StatusCode; | use actix_http::StatusCode; | ||||||
| use index_scheduler::{IndexScheduler, RoFeatures}; | use index_scheduler::{IndexScheduler, RoFeatures}; | ||||||
|  | use indexmap::IndexMap; | ||||||
| use meilisearch_types::deserr::DeserrJsonError; | use meilisearch_types::deserr::DeserrJsonError; | ||||||
| use meilisearch_types::error::deserr_codes::{ | use meilisearch_types::error::deserr_codes::{ | ||||||
|     InvalidMultiSearchWeight, InvalidSearchLimit, InvalidSearchOffset, |     InvalidMultiSearchFacetsByIndex, InvalidMultiSearchMaxValuesPerFacet, | ||||||
|  |     InvalidMultiSearchMergeFacets, InvalidMultiSearchWeight, InvalidSearchLimit, | ||||||
|  |     InvalidSearchOffset, | ||||||
| }; | }; | ||||||
| use meilisearch_types::error::ResponseError; | use meilisearch_types::error::ResponseError; | ||||||
|  | use meilisearch_types::index_uid::IndexUid; | ||||||
| use meilisearch_types::milli::score_details::{ScoreDetails, ScoreValue}; | use meilisearch_types::milli::score_details::{ScoreDetails, ScoreValue}; | ||||||
| use meilisearch_types::milli::{self, DocumentId, TimeBudget}; | use meilisearch_types::milli::{self, DocumentId, OrderBy, TimeBudget}; | ||||||
| use roaring::RoaringBitmap; | use roaring::RoaringBitmap; | ||||||
| use serde::Serialize; | use serde::Serialize; | ||||||
|  |  | ||||||
| use super::ranking_rules::{self, RankingRules}; | use super::ranking_rules::{self, RankingRules}; | ||||||
| use super::{ | use super::{ | ||||||
|     prepare_search, AttributesFormat, HitMaker, HitsInfo, RetrieveVectors, SearchHit, SearchKind, |     compute_facet_distribution_stats, prepare_search, AttributesFormat, ComputedFacets, FacetStats, | ||||||
|     SearchQuery, SearchQueryWithIndex, |     HitMaker, HitsInfo, RetrieveVectors, SearchHit, SearchKind, SearchQuery, SearchQueryWithIndex, | ||||||
| }; | }; | ||||||
| use crate::error::MeilisearchHttpError; | use crate::error::MeilisearchHttpError; | ||||||
| use crate::routes::indexes::search::search_kind; | use crate::routes::indexes::search::search_kind; | ||||||
| @@ -73,6 +77,17 @@ pub struct Federation { | |||||||
|     pub limit: usize, |     pub limit: usize, | ||||||
|     #[deserr(default = super::DEFAULT_SEARCH_OFFSET(), error = DeserrJsonError<InvalidSearchOffset>)] |     #[deserr(default = super::DEFAULT_SEARCH_OFFSET(), error = DeserrJsonError<InvalidSearchOffset>)] | ||||||
|     pub offset: usize, |     pub offset: usize, | ||||||
|  |     #[deserr(default, error = DeserrJsonError<InvalidMultiSearchFacetsByIndex>)] | ||||||
|  |     pub facets_by_index: BTreeMap<IndexUid, Option<Vec<String>>>, | ||||||
|  |     #[deserr(default, error = DeserrJsonError<InvalidMultiSearchMergeFacets>)] | ||||||
|  |     pub merge_facets: Option<MergeFacets>, | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #[derive(Copy, Clone, Debug, deserr::Deserr, Default)] | ||||||
|  | #[deserr(error = DeserrJsonError<InvalidMultiSearchMergeFacets>, rename_all = camelCase, deny_unknown_fields)] | ||||||
|  | pub struct MergeFacets { | ||||||
|  |     #[deserr(default, error = DeserrJsonError<InvalidMultiSearchMaxValuesPerFacet>)] | ||||||
|  |     pub max_values_per_facet: Option<usize>, | ||||||
| } | } | ||||||
|  |  | ||||||
| #[derive(Debug, deserr::Deserr)] | #[derive(Debug, deserr::Deserr)] | ||||||
| @@ -82,7 +97,7 @@ pub struct FederatedSearch { | |||||||
|     #[deserr(default)] |     #[deserr(default)] | ||||||
|     pub federation: Option<Federation>, |     pub federation: Option<Federation>, | ||||||
| } | } | ||||||
| #[derive(Serialize, Clone, PartialEq)] | #[derive(Serialize, Clone)] | ||||||
| #[serde(rename_all = "camelCase")] | #[serde(rename_all = "camelCase")] | ||||||
| pub struct FederatedSearchResult { | pub struct FederatedSearchResult { | ||||||
|     pub hits: Vec<SearchHit>, |     pub hits: Vec<SearchHit>, | ||||||
| @@ -93,6 +108,13 @@ pub struct FederatedSearchResult { | |||||||
|     #[serde(skip_serializing_if = "Option::is_none")] |     #[serde(skip_serializing_if = "Option::is_none")] | ||||||
|     pub semantic_hit_count: Option<u32>, |     pub semantic_hit_count: Option<u32>, | ||||||
|  |  | ||||||
|  |     #[serde(skip_serializing_if = "Option::is_none")] | ||||||
|  |     pub facet_distribution: Option<BTreeMap<String, IndexMap<String, u64>>>, | ||||||
|  |     #[serde(skip_serializing_if = "Option::is_none")] | ||||||
|  |     pub facet_stats: Option<BTreeMap<String, FacetStats>>, | ||||||
|  |     #[serde(skip_serializing_if = "FederatedFacets::is_empty")] | ||||||
|  |     pub facets_by_index: FederatedFacets, | ||||||
|  |  | ||||||
|     // These fields are only used for analytics purposes |     // These fields are only used for analytics purposes | ||||||
|     #[serde(skip)] |     #[serde(skip)] | ||||||
|     pub degraded: bool, |     pub degraded: bool, | ||||||
| @@ -109,6 +131,9 @@ impl fmt::Debug for FederatedSearchResult { | |||||||
|             semantic_hit_count, |             semantic_hit_count, | ||||||
|             degraded, |             degraded, | ||||||
|             used_negative_operator, |             used_negative_operator, | ||||||
|  |             facet_distribution, | ||||||
|  |             facet_stats, | ||||||
|  |             facets_by_index, | ||||||
|         } = self; |         } = self; | ||||||
|  |  | ||||||
|         let mut debug = f.debug_struct("SearchResult"); |         let mut debug = f.debug_struct("SearchResult"); | ||||||
| @@ -122,9 +147,18 @@ impl fmt::Debug for FederatedSearchResult { | |||||||
|         if *degraded { |         if *degraded { | ||||||
|             debug.field("degraded", degraded); |             debug.field("degraded", degraded); | ||||||
|         } |         } | ||||||
|  |         if let Some(facet_distribution) = facet_distribution { | ||||||
|  |             debug.field("facet_distribution", &facet_distribution); | ||||||
|  |         } | ||||||
|  |         if let Some(facet_stats) = facet_stats { | ||||||
|  |             debug.field("facet_stats", &facet_stats); | ||||||
|  |         } | ||||||
|         if let Some(semantic_hit_count) = semantic_hit_count { |         if let Some(semantic_hit_count) = semantic_hit_count { | ||||||
|             debug.field("semantic_hit_count", &semantic_hit_count); |             debug.field("semantic_hit_count", &semantic_hit_count); | ||||||
|         } |         } | ||||||
|  |         if !facets_by_index.is_empty() { | ||||||
|  |             debug.field("facets_by_index", &facets_by_index); | ||||||
|  |         } | ||||||
|  |  | ||||||
|         debug.finish() |         debug.finish() | ||||||
|     } |     } | ||||||
| @@ -313,16 +347,104 @@ struct SearchHitByIndex { | |||||||
| } | } | ||||||
|  |  | ||||||
| struct SearchResultByIndex { | struct SearchResultByIndex { | ||||||
|  |     index: String, | ||||||
|     hits: Vec<SearchHitByIndex>, |     hits: Vec<SearchHitByIndex>, | ||||||
|     candidates: RoaringBitmap, |     estimated_total_hits: usize, | ||||||
|     degraded: bool, |     degraded: bool, | ||||||
|     used_negative_operator: bool, |     used_negative_operator: bool, | ||||||
|  |     facets: Option<ComputedFacets>, | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #[derive(Debug, Clone, Default, Serialize)] | ||||||
|  | pub struct FederatedFacets(pub BTreeMap<String, ComputedFacets>); | ||||||
|  |  | ||||||
|  | impl FederatedFacets { | ||||||
|  |     pub fn insert(&mut self, index: String, facets: Option<ComputedFacets>) { | ||||||
|  |         if let Some(facets) = facets { | ||||||
|  |             self.0.insert(index, facets); | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub fn is_empty(&self) -> bool { | ||||||
|  |         self.0.is_empty() | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub fn merge( | ||||||
|  |         self, | ||||||
|  |         MergeFacets { max_values_per_facet }: MergeFacets, | ||||||
|  |         facet_order: BTreeMap<String, (String, OrderBy)>, | ||||||
|  |     ) -> Option<ComputedFacets> { | ||||||
|  |         if self.is_empty() { | ||||||
|  |             return None; | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         let mut distribution: BTreeMap<String, _> = Default::default(); | ||||||
|  |         let mut stats: BTreeMap<String, FacetStats> = Default::default(); | ||||||
|  |  | ||||||
|  |         for facets_by_index in self.0.into_values() { | ||||||
|  |             for (facet, index_distribution) in facets_by_index.distribution { | ||||||
|  |                 match distribution.entry(facet) { | ||||||
|  |                     std::collections::btree_map::Entry::Vacant(entry) => { | ||||||
|  |                         entry.insert(index_distribution); | ||||||
|  |                     } | ||||||
|  |                     std::collections::btree_map::Entry::Occupied(mut entry) => { | ||||||
|  |                         let distribution = entry.get_mut(); | ||||||
|  |  | ||||||
|  |                         for (value, index_count) in index_distribution { | ||||||
|  |                             distribution | ||||||
|  |                                 .entry(value) | ||||||
|  |                                 .and_modify(|count| *count += index_count) | ||||||
|  |                                 .or_insert(index_count); | ||||||
|  |                         } | ||||||
|  |                     } | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |  | ||||||
|  |             for (facet, index_stats) in facets_by_index.stats { | ||||||
|  |                 match stats.entry(facet) { | ||||||
|  |                     std::collections::btree_map::Entry::Vacant(entry) => { | ||||||
|  |                         entry.insert(index_stats); | ||||||
|  |                     } | ||||||
|  |                     std::collections::btree_map::Entry::Occupied(mut entry) => { | ||||||
|  |                         let stats = entry.get_mut(); | ||||||
|  |  | ||||||
|  |                         stats.min = f64::min(stats.min, index_stats.min); | ||||||
|  |                         stats.max = f64::max(stats.max, index_stats.max); | ||||||
|  |                     } | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         // fixup order | ||||||
|  |         for (facet, values) in &mut distribution { | ||||||
|  |             let order_by = facet_order.get(facet).map(|(_, order)| *order).unwrap_or_default(); | ||||||
|  |  | ||||||
|  |             match order_by { | ||||||
|  |                 OrderBy::Lexicographic => { | ||||||
|  |                     values.sort_unstable_by(|left, _, right, _| left.cmp(right)) | ||||||
|  |                 } | ||||||
|  |                 OrderBy::Count => { | ||||||
|  |                     values.sort_unstable_by(|_, left, _, right| { | ||||||
|  |                         left.cmp(right) | ||||||
|  |                             // biggest first | ||||||
|  |                             .reverse() | ||||||
|  |                     }) | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |  | ||||||
|  |             if let Some(max_values_per_facet) = max_values_per_facet { | ||||||
|  |                 values.truncate(max_values_per_facet) | ||||||
|  |             }; | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         Some(ComputedFacets { distribution, stats }) | ||||||
|  |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| pub fn perform_federated_search( | pub fn perform_federated_search( | ||||||
|     index_scheduler: &IndexScheduler, |     index_scheduler: &IndexScheduler, | ||||||
|     queries: Vec<SearchQueryWithIndex>, |     queries: Vec<SearchQueryWithIndex>, | ||||||
|     federation: Federation, |     mut federation: Federation, | ||||||
|     features: RoFeatures, |     features: RoFeatures, | ||||||
| ) -> Result<FederatedSearchResult, ResponseError> { | ) -> Result<FederatedSearchResult, ResponseError> { | ||||||
|     let before_search = std::time::Instant::now(); |     let before_search = std::time::Instant::now(); | ||||||
| @@ -342,6 +464,16 @@ pub fn perform_federated_search( | |||||||
|             .into()); |             .into()); | ||||||
|         } |         } | ||||||
|  |  | ||||||
|  |         if let Some(facets) = federated_query.has_facets() { | ||||||
|  |             let facets = facets.to_owned(); | ||||||
|  |             return Err(MeilisearchHttpError::FacetsInFederatedQuery( | ||||||
|  |                 query_index, | ||||||
|  |                 federated_query.index_uid.into_inner(), | ||||||
|  |                 facets, | ||||||
|  |             ) | ||||||
|  |             .into()); | ||||||
|  |         } | ||||||
|  |  | ||||||
|         let (index_uid, query, federation_options) = federated_query.into_index_query_federation(); |         let (index_uid, query, federation_options) = federated_query.into_index_query_federation(); | ||||||
|  |  | ||||||
|         queries_by_index.entry(index_uid.into_inner()).or_default().push(QueryByIndex { |         queries_by_index.entry(index_uid.into_inner()).or_default().push(QueryByIndex { | ||||||
| @@ -353,13 +485,24 @@ pub fn perform_federated_search( | |||||||
|  |  | ||||||
|     // 2. perform queries, merge and make hits index by index |     // 2. perform queries, merge and make hits index by index | ||||||
|     let required_hit_count = federation.limit + federation.offset; |     let required_hit_count = federation.limit + federation.offset; | ||||||
|  |  | ||||||
|     // In step (2), semantic_hit_count will be set to Some(0) if any search kind uses semantic |     // In step (2), semantic_hit_count will be set to Some(0) if any search kind uses semantic | ||||||
|     // Then in step (3), we'll update its value if there is any semantic search |     // Then in step (3), we'll update its value if there is any semantic search | ||||||
|     let mut semantic_hit_count = None; |     let mut semantic_hit_count = None; | ||||||
|     let mut results_by_index = Vec::with_capacity(queries_by_index.len()); |     let mut results_by_index = Vec::with_capacity(queries_by_index.len()); | ||||||
|     let mut previous_query_data: Option<(RankingRules, usize, String)> = None; |     let mut previous_query_data: Option<(RankingRules, usize, String)> = None; | ||||||
|  |  | ||||||
|  |     // remember the order and name of first index for each facet when merging with index settings | ||||||
|  |     // to detect if the order is inconsistent for a facet. | ||||||
|  |     let mut facet_order: Option<BTreeMap<String, (String, OrderBy)>> = match federation.merge_facets | ||||||
|  |     { | ||||||
|  |         Some(MergeFacets { .. }) => Some(Default::default()), | ||||||
|  |         _ => None, | ||||||
|  |     }; | ||||||
|  |  | ||||||
|     for (index_uid, queries) in queries_by_index { |     for (index_uid, queries) in queries_by_index { | ||||||
|  |         let first_query_index = queries.first().map(|query| query.query_index); | ||||||
|  |  | ||||||
|         let index = match index_scheduler.index(&index_uid) { |         let index = match index_scheduler.index(&index_uid) { | ||||||
|             Ok(index) => index, |             Ok(index) => index, | ||||||
|             Err(err) => { |             Err(err) => { | ||||||
| @@ -367,9 +510,8 @@ pub fn perform_federated_search( | |||||||
|                 // Patch the HTTP status code to 400 as it defaults to 404 for `index_not_found`, but |                 // Patch the HTTP status code to 400 as it defaults to 404 for `index_not_found`, but | ||||||
|                 // here the resource not found is not part of the URL. |                 // here the resource not found is not part of the URL. | ||||||
|                 err.code = StatusCode::BAD_REQUEST; |                 err.code = StatusCode::BAD_REQUEST; | ||||||
|                 if let Some(query) = queries.first() { |                 if let Some(query_index) = first_query_index { | ||||||
|                     err.message = |                     err.message = format!("Inside `.queries[{}]`: {}", query_index, err.message); | ||||||
|                         format!("Inside `.queries[{}]`: {}", query.query_index, err.message); |  | ||||||
|                 } |                 } | ||||||
|                 return Err(err); |                 return Err(err); | ||||||
|             } |             } | ||||||
| @@ -394,6 +536,23 @@ pub fn perform_federated_search( | |||||||
|         let mut used_negative_operator = false; |         let mut used_negative_operator = false; | ||||||
|         let mut candidates = RoaringBitmap::new(); |         let mut candidates = RoaringBitmap::new(); | ||||||
|  |  | ||||||
|  |         let facets_by_index = federation.facets_by_index.remove(&index_uid).flatten(); | ||||||
|  |  | ||||||
|  |         // TODO: recover the max size + facets_by_index as return value of this function so as not to ask it for all queries | ||||||
|  |         if let Err(mut error) = | ||||||
|  |             check_facet_order(&mut facet_order, &index_uid, &facets_by_index, &index, &rtxn) | ||||||
|  |         { | ||||||
|  |             error.message = format!( | ||||||
|  |                 "Inside `.federation.facetsByIndex.{index_uid}`: {error}{}", | ||||||
|  |                 if let Some(query_index) = first_query_index { | ||||||
|  |                     format!("\n - Note: index `{index_uid}` used in `.queries[{query_index}]`") | ||||||
|  |                 } else { | ||||||
|  |                     Default::default() | ||||||
|  |                 } | ||||||
|  |             ); | ||||||
|  |             return Err(error); | ||||||
|  |         } | ||||||
|  |  | ||||||
|         // 2.1. Compute all candidates for each query in the index |         // 2.1. Compute all candidates for each query in the index | ||||||
|         let mut results_by_query = Vec::with_capacity(queries.len()); |         let mut results_by_query = Vec::with_capacity(queries.len()); | ||||||
|  |  | ||||||
| @@ -562,34 +721,116 @@ pub fn perform_federated_search( | |||||||
|                 .collect(); |                 .collect(); | ||||||
|  |  | ||||||
|         let merged_result = merged_result?; |         let merged_result = merged_result?; | ||||||
|         results_by_index.push(SearchResultByIndex { |  | ||||||
|             hits: merged_result, |         let estimated_total_hits = candidates.len() as usize; | ||||||
|  |  | ||||||
|  |         let facets = facets_by_index | ||||||
|  |             .map(|facets_by_index| { | ||||||
|  |                 compute_facet_distribution_stats( | ||||||
|  |                     &facets_by_index, | ||||||
|  |                     &index, | ||||||
|  |                     &rtxn, | ||||||
|                     candidates, |                     candidates, | ||||||
|  |                     super::Route::MultiSearch, | ||||||
|  |                 ) | ||||||
|  |             }) | ||||||
|  |             .transpose() | ||||||
|  |             .map_err(|mut error| { | ||||||
|  |                 error.message = format!( | ||||||
|  |                     "Inside `.federation.facetsByIndex.{index_uid}`: {}{}", | ||||||
|  |                     error.message, | ||||||
|  |                     if let Some(query_index) = first_query_index { | ||||||
|  |                         format!("\n - Note: index `{index_uid}` used in `.queries[{query_index}]`") | ||||||
|  |                     } else { | ||||||
|  |                         Default::default() | ||||||
|  |                     } | ||||||
|  |                 ); | ||||||
|  |                 error | ||||||
|  |             })?; | ||||||
|  |  | ||||||
|  |         results_by_index.push(SearchResultByIndex { | ||||||
|  |             index: index_uid, | ||||||
|  |             hits: merged_result, | ||||||
|  |             estimated_total_hits, | ||||||
|             degraded, |             degraded, | ||||||
|             used_negative_operator, |             used_negative_operator, | ||||||
|  |             facets, | ||||||
|         }); |         }); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     // bonus step, make sure to return an error if an index wants a non-faceted field, even if no query actually uses that index. | ||||||
|  |     for (index_uid, facets) in federation.facets_by_index { | ||||||
|  |         let index = match index_scheduler.index(&index_uid) { | ||||||
|  |             Ok(index) => index, | ||||||
|  |             Err(err) => { | ||||||
|  |                 let mut err = ResponseError::from(err); | ||||||
|  |                 // Patch the HTTP status code to 400 as it defaults to 404 for `index_not_found`, but | ||||||
|  |                 // here the resource not found is not part of the URL. | ||||||
|  |                 err.code = StatusCode::BAD_REQUEST; | ||||||
|  |                 err.message = format!( | ||||||
|  |                     "Inside `.federation.facetsByIndex.{index_uid}`: {}\n - Note: index `{index_uid}` is not used in queries", | ||||||
|  |                     err.message | ||||||
|  |                 ); | ||||||
|  |                 return Err(err); | ||||||
|  |             } | ||||||
|  |         }; | ||||||
|  |  | ||||||
|  |         // Important: this is the only transaction we'll use for this index during this federated search | ||||||
|  |         let rtxn = index.read_txn()?; | ||||||
|  |  | ||||||
|  |         if let Err(mut error) = | ||||||
|  |             check_facet_order(&mut facet_order, &index_uid, &facets, &index, &rtxn) | ||||||
|  |         { | ||||||
|  |             error.message = format!( | ||||||
|  |                 "Inside `.federation.facetsByIndex.{index_uid}`: {error}\n - Note: index `{index_uid}` is not used in queries", | ||||||
|  |             ); | ||||||
|  |             return Err(error); | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         if let Some(facets) = facets { | ||||||
|  |             if let Err(mut error) = compute_facet_distribution_stats( | ||||||
|  |                 &facets, | ||||||
|  |                 &index, | ||||||
|  |                 &rtxn, | ||||||
|  |                 Default::default(), | ||||||
|  |                 super::Route::MultiSearch, | ||||||
|  |             ) { | ||||||
|  |                 error.message = | ||||||
|  |                     format!("Inside `.federation.facetsByIndex.{index_uid}`: {}\n - Note: index `{index_uid}` is not used in queries", error.message); | ||||||
|  |                 return Err(error); | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|     // 3. merge hits and metadata across indexes |     // 3. merge hits and metadata across indexes | ||||||
|     // 3.1 merge metadata |     // 3.1 merge metadata | ||||||
|     let (estimated_total_hits, degraded, used_negative_operator) = { |     let (estimated_total_hits, degraded, used_negative_operator, facets) = { | ||||||
|         let mut estimated_total_hits = 0; |         let mut estimated_total_hits = 0; | ||||||
|         let mut degraded = false; |         let mut degraded = false; | ||||||
|         let mut used_negative_operator = false; |         let mut used_negative_operator = false; | ||||||
|  |  | ||||||
|  |         let mut facets: FederatedFacets = FederatedFacets::default(); | ||||||
|  |  | ||||||
|         for SearchResultByIndex { |         for SearchResultByIndex { | ||||||
|  |             index, | ||||||
|             hits: _, |             hits: _, | ||||||
|             candidates, |             estimated_total_hits: estimated_total_hits_by_index, | ||||||
|  |             facets: facets_by_index, | ||||||
|             degraded: degraded_by_index, |             degraded: degraded_by_index, | ||||||
|             used_negative_operator: used_negative_operator_by_index, |             used_negative_operator: used_negative_operator_by_index, | ||||||
|         } in &results_by_index |         } in &mut results_by_index | ||||||
|         { |         { | ||||||
|             estimated_total_hits += candidates.len() as usize; |             estimated_total_hits += *estimated_total_hits_by_index; | ||||||
|             degraded |= *degraded_by_index; |             degraded |= *degraded_by_index; | ||||||
|             used_negative_operator |= *used_negative_operator_by_index; |             used_negative_operator |= *used_negative_operator_by_index; | ||||||
|  |  | ||||||
|  |             let facets_by_index = std::mem::take(facets_by_index); | ||||||
|  |             let index = std::mem::take(index); | ||||||
|  |  | ||||||
|  |             facets.insert(index, facets_by_index); | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         (estimated_total_hits, degraded, used_negative_operator) |         (estimated_total_hits, degraded, used_negative_operator, facets) | ||||||
|     }; |     }; | ||||||
|  |  | ||||||
|     // 3.2 merge hits |     // 3.2 merge hits | ||||||
| @@ -606,6 +847,20 @@ pub fn perform_federated_search( | |||||||
|         .map(|hit| hit.hit) |         .map(|hit| hit.hit) | ||||||
|         .collect(); |         .collect(); | ||||||
|  |  | ||||||
|  |     let (facet_distribution, facet_stats, facets_by_index) = | ||||||
|  |         match federation.merge_facets.zip(facet_order) { | ||||||
|  |             Some((merge_facets, facet_order)) => { | ||||||
|  |                 let facets = facets.merge(merge_facets, facet_order); | ||||||
|  |  | ||||||
|  |                 let (facet_distribution, facet_stats) = facets | ||||||
|  |                     .map(|ComputedFacets { distribution, stats }| (distribution, stats)) | ||||||
|  |                     .unzip(); | ||||||
|  |  | ||||||
|  |                 (facet_distribution, facet_stats, FederatedFacets::default()) | ||||||
|  |             } | ||||||
|  |             None => (None, None, facets), | ||||||
|  |         }; | ||||||
|  |  | ||||||
|     let search_result = FederatedSearchResult { |     let search_result = FederatedSearchResult { | ||||||
|         hits: merged_hits, |         hits: merged_hits, | ||||||
|         processing_time_ms: before_search.elapsed().as_millis(), |         processing_time_ms: before_search.elapsed().as_millis(), | ||||||
| @@ -617,7 +872,39 @@ pub fn perform_federated_search( | |||||||
|         semantic_hit_count, |         semantic_hit_count, | ||||||
|         degraded, |         degraded, | ||||||
|         used_negative_operator, |         used_negative_operator, | ||||||
|  |         facet_distribution, | ||||||
|  |         facet_stats, | ||||||
|  |         facets_by_index, | ||||||
|     }; |     }; | ||||||
|  |  | ||||||
|     Ok(search_result) |     Ok(search_result) | ||||||
| } | } | ||||||
|  |  | ||||||
|  | fn check_facet_order( | ||||||
|  |     facet_order: &mut Option<BTreeMap<String, (String, OrderBy)>>, | ||||||
|  |     current_index: &str, | ||||||
|  |     facets_by_index: &Option<Vec<String>>, | ||||||
|  |     index: &milli::Index, | ||||||
|  |     rtxn: &milli::heed::RoTxn<'_>, | ||||||
|  | ) -> Result<(), ResponseError> { | ||||||
|  |     if let (Some(facet_order), Some(facets_by_index)) = (facet_order, facets_by_index) { | ||||||
|  |         let index_facet_order = index.sort_facet_values_by(rtxn)?; | ||||||
|  |         for facet in facets_by_index { | ||||||
|  |             let index_facet_order = index_facet_order.get(facet); | ||||||
|  |             let (previous_index, previous_facet_order) = facet_order | ||||||
|  |                 .entry(facet.to_owned()) | ||||||
|  |                 .or_insert_with(|| (current_index.to_owned(), index_facet_order)); | ||||||
|  |             if previous_facet_order != &index_facet_order { | ||||||
|  |                 return Err(MeilisearchHttpError::InconsistentFacetOrder { | ||||||
|  |                     facet: facet.clone(), | ||||||
|  |                     previous_facet_order: *previous_facet_order, | ||||||
|  |                     previous_uid: previous_index.clone(), | ||||||
|  |                     current_uid: current_index.to_owned(), | ||||||
|  |                     index_facet_order, | ||||||
|  |                 } | ||||||
|  |                 .into()); | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |     }; | ||||||
|  |     Ok(()) | ||||||
|  | } | ||||||
|   | |||||||
| @@ -441,9 +441,6 @@ pub struct SearchQueryWithIndex { | |||||||
| } | } | ||||||
|  |  | ||||||
| impl SearchQueryWithIndex { | impl SearchQueryWithIndex { | ||||||
|     pub fn has_federation_options(&self) -> bool { |  | ||||||
|         self.federation_options.is_some() |  | ||||||
|     } |  | ||||||
|     pub fn has_pagination(&self) -> Option<&'static str> { |     pub fn has_pagination(&self) -> Option<&'static str> { | ||||||
|         if self.offset.is_some() { |         if self.offset.is_some() { | ||||||
|             Some("offset") |             Some("offset") | ||||||
| @@ -458,6 +455,10 @@ impl SearchQueryWithIndex { | |||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     pub fn has_facets(&self) -> Option<&[String]> { | ||||||
|  |         self.facets.as_deref().filter(|v| !v.is_empty()) | ||||||
|  |     } | ||||||
|  |  | ||||||
|     pub fn into_index_query_federation(self) -> (IndexUid, SearchQuery, Option<FederationOptions>) { |     pub fn into_index_query_federation(self) -> (IndexUid, SearchQuery, Option<FederationOptions>) { | ||||||
|         let SearchQueryWithIndex { |         let SearchQueryWithIndex { | ||||||
|             index_uid, |             index_uid, | ||||||
| @@ -987,39 +988,13 @@ pub fn perform_search( | |||||||
|         HitsInfo::OffsetLimit { limit, offset, estimated_total_hits: number_of_hits } |         HitsInfo::OffsetLimit { limit, offset, estimated_total_hits: number_of_hits } | ||||||
|     }; |     }; | ||||||
|  |  | ||||||
|     let (facet_distribution, facet_stats) = match facets { |     let (facet_distribution, facet_stats) = facets | ||||||
|         Some(ref fields) => { |         .map(move |facets| { | ||||||
|             let mut facet_distribution = index.facets_distribution(&rtxn); |             compute_facet_distribution_stats(&facets, index, &rtxn, candidates, Route::Search) | ||||||
|  |         }) | ||||||
|             let max_values_by_facet = index |         .transpose()? | ||||||
|                 .max_values_per_facet(&rtxn) |         .map(|ComputedFacets { distribution, stats }| (distribution, stats)) | ||||||
|                 .map_err(milli::Error::from)? |         .unzip(); | ||||||
|                 .map(|x| x as usize) |  | ||||||
|                 .unwrap_or(DEFAULT_VALUES_PER_FACET); |  | ||||||
|             facet_distribution.max_values_per_facet(max_values_by_facet); |  | ||||||
|  |  | ||||||
|             let sort_facet_values_by = |  | ||||||
|                 index.sort_facet_values_by(&rtxn).map_err(milli::Error::from)?; |  | ||||||
|  |  | ||||||
|             if fields.iter().all(|f| f != "*") { |  | ||||||
|                 let fields: Vec<_> = |  | ||||||
|                     fields.iter().map(|n| (n, sort_facet_values_by.get(n))).collect(); |  | ||||||
|                 facet_distribution.facets(fields); |  | ||||||
|             } |  | ||||||
|  |  | ||||||
|             let distribution = facet_distribution |  | ||||||
|                 .candidates(candidates) |  | ||||||
|                 .default_order_by(sort_facet_values_by.get("*")) |  | ||||||
|                 .execute()?; |  | ||||||
|             let stats = facet_distribution.compute_stats()?; |  | ||||||
|             (Some(distribution), Some(stats)) |  | ||||||
|         } |  | ||||||
|         None => (None, None), |  | ||||||
|     }; |  | ||||||
|  |  | ||||||
|     let facet_stats = facet_stats.map(|stats| { |  | ||||||
|         stats.into_iter().map(|(k, (min, max))| (k, FacetStats { min, max })).collect() |  | ||||||
|     }); |  | ||||||
|  |  | ||||||
|     let result = SearchResult { |     let result = SearchResult { | ||||||
|         hits: documents, |         hits: documents, | ||||||
| @@ -1035,6 +1010,61 @@ pub fn perform_search( | |||||||
|     Ok(result) |     Ok(result) | ||||||
| } | } | ||||||
|  |  | ||||||
|  | #[derive(Debug, Clone, Default, Serialize)] | ||||||
|  | pub struct ComputedFacets { | ||||||
|  |     pub distribution: BTreeMap<String, IndexMap<String, u64>>, | ||||||
|  |     pub stats: BTreeMap<String, FacetStats>, | ||||||
|  | } | ||||||
|  |  | ||||||
|  | enum Route { | ||||||
|  |     Search, | ||||||
|  |     MultiSearch, | ||||||
|  | } | ||||||
|  |  | ||||||
|  | fn compute_facet_distribution_stats<S: AsRef<str>>( | ||||||
|  |     facets: &[S], | ||||||
|  |     index: &Index, | ||||||
|  |     rtxn: &RoTxn, | ||||||
|  |     candidates: roaring::RoaringBitmap, | ||||||
|  |     route: Route, | ||||||
|  | ) -> Result<ComputedFacets, ResponseError> { | ||||||
|  |     let mut facet_distribution = index.facets_distribution(rtxn); | ||||||
|  |  | ||||||
|  |     let max_values_by_facet = index | ||||||
|  |         .max_values_per_facet(rtxn) | ||||||
|  |         .map_err(milli::Error::from)? | ||||||
|  |         .map(|x| x as usize) | ||||||
|  |         .unwrap_or(DEFAULT_VALUES_PER_FACET); | ||||||
|  |  | ||||||
|  |     facet_distribution.max_values_per_facet(max_values_by_facet); | ||||||
|  |  | ||||||
|  |     let sort_facet_values_by = index.sort_facet_values_by(rtxn).map_err(milli::Error::from)?; | ||||||
|  |  | ||||||
|  |     // add specific facet if there is no placeholder | ||||||
|  |     if facets.iter().all(|f| f.as_ref() != "*") { | ||||||
|  |         let fields: Vec<_> = | ||||||
|  |             facets.iter().map(|n| (n, sort_facet_values_by.get(n.as_ref()))).collect(); | ||||||
|  |         facet_distribution.facets(fields); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     let distribution = facet_distribution | ||||||
|  |         .candidates(candidates) | ||||||
|  |         .default_order_by(sort_facet_values_by.get("*")) | ||||||
|  |         .execute() | ||||||
|  |         .map_err(|error| match (error, route) { | ||||||
|  |             ( | ||||||
|  |                 error @ milli::Error::UserError(milli::UserError::InvalidFacetsDistribution { | ||||||
|  |                     .. | ||||||
|  |                 }), | ||||||
|  |                 Route::MultiSearch, | ||||||
|  |             ) => ResponseError::from_msg(error.to_string(), Code::InvalidMultiSearchFacets), | ||||||
|  |             (error, _) => error.into(), | ||||||
|  |         })?; | ||||||
|  |     let stats = facet_distribution.compute_stats()?; | ||||||
|  |     let stats = stats.into_iter().map(|(k, (min, max))| (k, FacetStats { min, max })).collect(); | ||||||
|  |     Ok(ComputedFacets { distribution, stats }) | ||||||
|  | } | ||||||
|  |  | ||||||
| pub fn search_from_kind( | pub fn search_from_kind( | ||||||
|     search_kind: SearchKind, |     search_kind: SearchKind, | ||||||
|     search: milli::Search<'_>, |     search: milli::Search<'_>, | ||||||
|   | |||||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @@ -1,4 +1,5 @@ | |||||||
| use std::collections::{BTreeMap, HashMap, HashSet}; | use std::collections::{BTreeMap, HashMap, HashSet}; | ||||||
|  | use std::fmt::Display; | ||||||
| use std::ops::ControlFlow; | use std::ops::ControlFlow; | ||||||
| use std::{fmt, mem}; | use std::{fmt, mem}; | ||||||
|  |  | ||||||
| @@ -37,6 +38,15 @@ pub enum OrderBy { | |||||||
|     Count, |     Count, | ||||||
| } | } | ||||||
|  |  | ||||||
|  | impl Display for OrderBy { | ||||||
|  |     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | ||||||
|  |         match self { | ||||||
|  |             OrderBy::Lexicographic => f.write_str("alphabetically"), | ||||||
|  |             OrderBy::Count => f.write_str("by count"), | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
| pub struct FacetDistribution<'a> { | pub struct FacetDistribution<'a> { | ||||||
|     facets: Option<HashMap<String, OrderBy>>, |     facets: Option<HashMap<String, OrderBy>>, | ||||||
|     candidates: Option<RoaringBitmap>, |     candidates: Option<RoaringBitmap>, | ||||||
| @@ -100,7 +110,6 @@ impl<'a> FacetDistribution<'a> { | |||||||
|                 let mut lexicographic_distribution = BTreeMap::new(); |                 let mut lexicographic_distribution = BTreeMap::new(); | ||||||
|                 let mut key_buffer: Vec<_> = field_id.to_be_bytes().to_vec(); |                 let mut key_buffer: Vec<_> = field_id.to_be_bytes().to_vec(); | ||||||
|  |  | ||||||
|                 let distribution_prelength = distribution.len(); |  | ||||||
|                 let db = self.index.field_id_docid_facet_f64s; |                 let db = self.index.field_id_docid_facet_f64s; | ||||||
|                 for docid in candidates { |                 for docid in candidates { | ||||||
|                     key_buffer.truncate(mem::size_of::<FieldId>()); |                     key_buffer.truncate(mem::size_of::<FieldId>()); | ||||||
| @@ -113,23 +122,21 @@ impl<'a> FacetDistribution<'a> { | |||||||
|                     for result in iter { |                     for result in iter { | ||||||
|                         let ((_, _, value), ()) = result?; |                         let ((_, _, value), ()) = result?; | ||||||
|                         *lexicographic_distribution.entry(value.to_string()).or_insert(0) += 1; |                         *lexicographic_distribution.entry(value.to_string()).or_insert(0) += 1; | ||||||
|  |  | ||||||
|                         if lexicographic_distribution.len() - distribution_prelength |  | ||||||
|                             == self.max_values_per_facet |  | ||||||
|                         { |  | ||||||
|                             break; |  | ||||||
|                         } |  | ||||||
|                     } |                     } | ||||||
|                 } |                 } | ||||||
|  |  | ||||||
|                 distribution.extend(lexicographic_distribution); |                 distribution.extend( | ||||||
|  |                     lexicographic_distribution | ||||||
|  |                         .into_iter() | ||||||
|  |                         .take(self.max_values_per_facet.saturating_sub(distribution.len())), | ||||||
|  |                 ); | ||||||
|             } |             } | ||||||
|             FacetType::String => { |             FacetType::String => { | ||||||
|                 let mut normalized_distribution = BTreeMap::new(); |                 let mut normalized_distribution = BTreeMap::new(); | ||||||
|                 let mut key_buffer: Vec<_> = field_id.to_be_bytes().to_vec(); |                 let mut key_buffer: Vec<_> = field_id.to_be_bytes().to_vec(); | ||||||
|  |  | ||||||
|                 let db = self.index.field_id_docid_facet_strings; |                 let db = self.index.field_id_docid_facet_strings; | ||||||
|                 'outer: for docid in candidates { |                 for docid in candidates { | ||||||
|                     key_buffer.truncate(mem::size_of::<FieldId>()); |                     key_buffer.truncate(mem::size_of::<FieldId>()); | ||||||
|                     key_buffer.extend_from_slice(&docid.to_be_bytes()); |                     key_buffer.extend_from_slice(&docid.to_be_bytes()); | ||||||
|                     let iter = db |                     let iter = db | ||||||
| @@ -144,14 +151,14 @@ impl<'a> FacetDistribution<'a> { | |||||||
|                             .or_insert_with(|| (original_value, 0)); |                             .or_insert_with(|| (original_value, 0)); | ||||||
|                         *count += 1; |                         *count += 1; | ||||||
|  |  | ||||||
|                         if normalized_distribution.len() == self.max_values_per_facet { |                         // we'd like to break here if we have enough facet values, but we are collecting them by increasing docid, | ||||||
|                             break 'outer; |                         // so higher ranked facets could be in later docids | ||||||
|                         } |  | ||||||
|                     } |                     } | ||||||
|                 } |                 } | ||||||
|  |  | ||||||
|                 let iter = normalized_distribution |                 let iter = normalized_distribution | ||||||
|                     .into_iter() |                     .into_iter() | ||||||
|  |                     .take(self.max_values_per_facet.saturating_sub(distribution.len())) | ||||||
|                     .map(|(_normalized, (original, count))| (original.to_string(), count)); |                     .map(|(_normalized, (original, count))| (original.to_string(), count)); | ||||||
|                 distribution.extend(iter); |                 distribution.extend(iter); | ||||||
|             } |             } | ||||||
| @@ -467,7 +474,7 @@ mod tests { | |||||||
|             .execute() |             .execute() | ||||||
|             .unwrap(); |             .unwrap(); | ||||||
|  |  | ||||||
|         milli_snap!(format!("{map:?}"), @r###"{"colour": {"Blue": 1}}"###); |         milli_snap!(format!("{map:?}"), @r###"{"colour": {"Blue": 2}}"###); | ||||||
|  |  | ||||||
|         let map = FacetDistribution::new(&txn, &index) |         let map = FacetDistribution::new(&txn, &index) | ||||||
|             .facets(iter::once(("colour", OrderBy::Count))) |             .facets(iter::once(("colour", OrderBy::Count))) | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user