mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-26 21:46:27 +00:00 
			
		
		
		
	Merge #4476
4476: Make the `/facet-search` route use the `sortFacetValuesBy` setting r=irevoire a=Kerollmops This PR fixes #4423 by ensuring that the `/facet-search` route uses the `sortFacetValuesBy` setting. Note for the documentation team (to be moved in the tracking issue): Using the new `sortFacetValuesBy` setting can slow down the facet-search requests as Meilisearch iterates over the whole list of facet values and computes the count of documents on every entry. That is hardly or even impossible to optimize correctly. ### TODO - [x] Create a custom HashMap wrapper for the facet `OrderBy` settings. This wrapper will return the `OrderBy` setting of the facet, if not defined will use the default `*` one, and if not there either (strange) will fall back on the lexicographic one. - [x] Create a `ValuesCollection` wrapper that implements the logic for the lexicographic and count order by. - [x] Use it when there is no search query. - [x] Use it when there is a search query with and without allowed typos. - [x] Do not change the original logic, only use a wrapper. - [x] Add tests Co-authored-by: Clément Renault <clement@meilisearch.com>
This commit is contained in:
		| @@ -671,27 +671,16 @@ pub fn perform_search( | |||||||
|  |  | ||||||
|             let sort_facet_values_by = |             let sort_facet_values_by = | ||||||
|                 index.sort_facet_values_by(&rtxn).map_err(milli::Error::from)?; |                 index.sort_facet_values_by(&rtxn).map_err(milli::Error::from)?; | ||||||
|             let default_sort_facet_values_by = |  | ||||||
|                 sort_facet_values_by.get("*").copied().unwrap_or_default(); |  | ||||||
|  |  | ||||||
|             if fields.iter().all(|f| f != "*") { |             if fields.iter().all(|f| f != "*") { | ||||||
|                 let fields: Vec<_> = fields |                 let fields: Vec<_> = | ||||||
|                     .iter() |                     fields.iter().map(|n| (n, sort_facet_values_by.get(n))).collect(); | ||||||
|                     .map(|n| { |  | ||||||
|                         ( |  | ||||||
|                             n, |  | ||||||
|                             sort_facet_values_by |  | ||||||
|                                 .get(n) |  | ||||||
|                                 .copied() |  | ||||||
|                                 .unwrap_or(default_sort_facet_values_by), |  | ||||||
|                         ) |  | ||||||
|                     }) |  | ||||||
|                     .collect(); |  | ||||||
|                 facet_distribution.facets(fields); |                 facet_distribution.facets(fields); | ||||||
|             } |             } | ||||||
|  |  | ||||||
|             let distribution = facet_distribution |             let distribution = facet_distribution | ||||||
|                 .candidates(candidates) |                 .candidates(candidates) | ||||||
|                 .default_order_by(default_sort_facet_values_by) |                 .default_order_by(sort_facet_values_by.get("*")) | ||||||
|                 .execute()?; |                 .execute()?; | ||||||
|             let stats = facet_distribution.compute_stats()?; |             let stats = facet_distribution.compute_stats()?; | ||||||
|             (Some(distribution), Some(stats)) |             (Some(distribution), Some(stats)) | ||||||
|   | |||||||
| @@ -123,6 +123,28 @@ async fn simple_facet_search_with_max_values() { | |||||||
|     assert_eq!(dbg!(response)["facetHits"].as_array().unwrap().len(), 1); |     assert_eq!(dbg!(response)["facetHits"].as_array().unwrap().len(), 1); | ||||||
| } | } | ||||||
|  |  | ||||||
|  | #[actix_rt::test] | ||||||
|  | async fn simple_facet_search_by_count_with_max_values() { | ||||||
|  |     let server = Server::new().await; | ||||||
|  |     let index = server.index("test"); | ||||||
|  |  | ||||||
|  |     let documents = DOCUMENTS.clone(); | ||||||
|  |     index | ||||||
|  |         .update_settings_faceting( | ||||||
|  |             json!({ "maxValuesPerFacet": 1, "sortFacetValuesBy": { "*": "count" } }), | ||||||
|  |         ) | ||||||
|  |         .await; | ||||||
|  |     index.update_settings_filterable_attributes(json!(["genres"])).await; | ||||||
|  |     index.add_documents(documents, None).await; | ||||||
|  |     index.wait_task(2).await; | ||||||
|  |  | ||||||
|  |     let (response, code) = | ||||||
|  |         index.facet_search(json!({"facetName": "genres", "facetQuery": "a"})).await; | ||||||
|  |  | ||||||
|  |     assert_eq!(code, 200, "{}", response); | ||||||
|  |     assert_eq!(dbg!(response)["facetHits"].as_array().unwrap().len(), 1); | ||||||
|  | } | ||||||
|  |  | ||||||
| #[actix_rt::test] | #[actix_rt::test] | ||||||
| async fn non_filterable_facet_search_error() { | async fn non_filterable_facet_search_error() { | ||||||
|     let server = Server::new().await; |     let server = Server::new().await; | ||||||
| @@ -157,3 +179,24 @@ async fn facet_search_dont_support_words() { | |||||||
|     assert_eq!(code, 200, "{}", response); |     assert_eq!(code, 200, "{}", response); | ||||||
|     assert_eq!(response["facetHits"].as_array().unwrap().len(), 0); |     assert_eq!(response["facetHits"].as_array().unwrap().len(), 0); | ||||||
| } | } | ||||||
|  |  | ||||||
|  | #[actix_rt::test] | ||||||
|  | async fn simple_facet_search_with_sort_by_count() { | ||||||
|  |     let server = Server::new().await; | ||||||
|  |     let index = server.index("test"); | ||||||
|  |  | ||||||
|  |     let documents = DOCUMENTS.clone(); | ||||||
|  |     index.update_settings_faceting(json!({ "sortFacetValuesBy": { "*": "count" } })).await; | ||||||
|  |     index.update_settings_filterable_attributes(json!(["genres"])).await; | ||||||
|  |     index.add_documents(documents, None).await; | ||||||
|  |     index.wait_task(2).await; | ||||||
|  |  | ||||||
|  |     let (response, code) = | ||||||
|  |         index.facet_search(json!({"facetName": "genres", "facetQuery": "a"})).await; | ||||||
|  |  | ||||||
|  |     assert_eq!(code, 200, "{}", response); | ||||||
|  |     let hits = response["facetHits"].as_array().unwrap(); | ||||||
|  |     assert_eq!(hits.len(), 2); | ||||||
|  |     assert_eq!(hits[0], json!({ "value": "Action", "count": 3 })); | ||||||
|  |     assert_eq!(hits[1], json!({ "value": "Adventure", "count": 2 })); | ||||||
|  | } | ||||||
|   | |||||||
| @@ -20,13 +20,13 @@ use crate::heed_codec::facet::{ | |||||||
| use crate::heed_codec::{ | use crate::heed_codec::{ | ||||||
|     BEU16StrCodec, FstSetCodec, ScriptLanguageCodec, StrBEU16Codec, StrRefCodec, |     BEU16StrCodec, FstSetCodec, ScriptLanguageCodec, StrBEU16Codec, StrRefCodec, | ||||||
| }; | }; | ||||||
|  | use crate::order_by_map::OrderByMap; | ||||||
| use crate::proximity::ProximityPrecision; | use crate::proximity::ProximityPrecision; | ||||||
| use crate::vector::EmbeddingConfig; | use crate::vector::EmbeddingConfig; | ||||||
| use crate::{ | use crate::{ | ||||||
|     default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds, |     default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds, | ||||||
|     FacetDistribution, FieldDistribution, FieldId, FieldIdWordCountCodec, GeoPoint, ObkvCodec, |     FacetDistribution, FieldDistribution, FieldId, FieldIdWordCountCodec, GeoPoint, ObkvCodec, | ||||||
|     OrderBy, Result, RoaringBitmapCodec, RoaringBitmapLenCodec, Search, U8StrStrCodec, BEU16, |     Result, RoaringBitmapCodec, RoaringBitmapLenCodec, Search, U8StrStrCodec, BEU16, BEU32, BEU64, | ||||||
|     BEU32, BEU64, |  | ||||||
| }; | }; | ||||||
|  |  | ||||||
| pub const DEFAULT_MIN_WORD_LEN_ONE_TYPO: u8 = 5; | pub const DEFAULT_MIN_WORD_LEN_ONE_TYPO: u8 = 5; | ||||||
| @@ -1373,21 +1373,19 @@ impl Index { | |||||||
|         self.main.remap_key_type::<Str>().delete(txn, main_key::MAX_VALUES_PER_FACET) |         self.main.remap_key_type::<Str>().delete(txn, main_key::MAX_VALUES_PER_FACET) | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     pub fn sort_facet_values_by(&self, txn: &RoTxn) -> heed::Result<HashMap<String, OrderBy>> { |     pub fn sort_facet_values_by(&self, txn: &RoTxn) -> heed::Result<OrderByMap> { | ||||||
|         let mut orders = self |         let orders = self | ||||||
|             .main |             .main | ||||||
|             .remap_types::<Str, SerdeJson<HashMap<String, OrderBy>>>() |             .remap_types::<Str, SerdeJson<OrderByMap>>() | ||||||
|             .get(txn, main_key::SORT_FACET_VALUES_BY)? |             .get(txn, main_key::SORT_FACET_VALUES_BY)? | ||||||
|             .unwrap_or_default(); |             .unwrap_or_default(); | ||||||
|         // Insert the default ordering if it is not already overwritten by the user. |  | ||||||
|         orders.entry("*".to_string()).or_insert(OrderBy::Lexicographic); |  | ||||||
|         Ok(orders) |         Ok(orders) | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     pub(crate) fn put_sort_facet_values_by( |     pub(crate) fn put_sort_facet_values_by( | ||||||
|         &self, |         &self, | ||||||
|         txn: &mut RwTxn, |         txn: &mut RwTxn, | ||||||
|         val: &HashMap<String, OrderBy>, |         val: &OrderByMap, | ||||||
|     ) -> heed::Result<()> { |     ) -> heed::Result<()> { | ||||||
|         self.main.remap_types::<Str, SerdeJson<_>>().put(txn, main_key::SORT_FACET_VALUES_BY, &val) |         self.main.remap_types::<Str, SerdeJson<_>>().put(txn, main_key::SORT_FACET_VALUES_BY, &val) | ||||||
|     } |     } | ||||||
|   | |||||||
| @@ -16,6 +16,7 @@ pub mod facet; | |||||||
| mod fields_ids_map; | mod fields_ids_map; | ||||||
| pub mod heed_codec; | pub mod heed_codec; | ||||||
| pub mod index; | pub mod index; | ||||||
|  | pub mod order_by_map; | ||||||
| pub mod prompt; | pub mod prompt; | ||||||
| pub mod proximity; | pub mod proximity; | ||||||
| pub mod score_details; | pub mod score_details; | ||||||
| @@ -56,10 +57,10 @@ pub use self::heed_codec::{ | |||||||
|     UncheckedU8StrStrCodec, |     UncheckedU8StrStrCodec, | ||||||
| }; | }; | ||||||
| pub use self::index::Index; | pub use self::index::Index; | ||||||
|  | pub use self::search::facet::{FacetValueHit, SearchForFacetValues}; | ||||||
| pub use self::search::{ | pub use self::search::{ | ||||||
|     FacetDistribution, FacetValueHit, Filter, FormatOptions, MatchBounds, MatcherBuilder, |     FacetDistribution, Filter, FormatOptions, MatchBounds, MatcherBuilder, MatchingWords, OrderBy, | ||||||
|     MatchingWords, OrderBy, Search, SearchForFacetValues, SearchResult, TermsMatchingStrategy, |     Search, SearchResult, TermsMatchingStrategy, DEFAULT_VALUES_PER_FACET, | ||||||
|     DEFAULT_VALUES_PER_FACET, |  | ||||||
| }; | }; | ||||||
|  |  | ||||||
| pub type Result<T> = std::result::Result<T, error::Error>; | pub type Result<T> = std::result::Result<T, error::Error>; | ||||||
|   | |||||||
							
								
								
									
										57
									
								
								milli/src/order_by_map.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										57
									
								
								milli/src/order_by_map.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,57 @@ | |||||||
|  | use std::collections::{hash_map, HashMap}; | ||||||
|  | use std::iter::FromIterator; | ||||||
|  |  | ||||||
|  | use serde::{Deserialize, Deserializer, Serialize}; | ||||||
|  |  | ||||||
|  | use crate::OrderBy; | ||||||
|  |  | ||||||
|  | #[derive(Serialize)] | ||||||
|  | pub struct OrderByMap(HashMap<String, OrderBy>); | ||||||
|  |  | ||||||
|  | impl OrderByMap { | ||||||
|  |     pub fn get(&self, key: impl AsRef<str>) -> OrderBy { | ||||||
|  |         self.0 | ||||||
|  |             .get(key.as_ref()) | ||||||
|  |             .copied() | ||||||
|  |             .unwrap_or_else(|| self.0.get("*").copied().unwrap_or_default()) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub fn insert(&mut self, key: String, value: OrderBy) -> Option<OrderBy> { | ||||||
|  |         self.0.insert(key, value) | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | impl Default for OrderByMap { | ||||||
|  |     fn default() -> Self { | ||||||
|  |         let mut map = HashMap::new(); | ||||||
|  |         map.insert("*".to_string(), OrderBy::Lexicographic); | ||||||
|  |         OrderByMap(map) | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | impl FromIterator<(String, OrderBy)> for OrderByMap { | ||||||
|  |     fn from_iter<T: IntoIterator<Item = (String, OrderBy)>>(iter: T) -> Self { | ||||||
|  |         OrderByMap(iter.into_iter().collect()) | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | impl IntoIterator for OrderByMap { | ||||||
|  |     type Item = (String, OrderBy); | ||||||
|  |     type IntoIter = hash_map::IntoIter<String, OrderBy>; | ||||||
|  |  | ||||||
|  |     fn into_iter(self) -> Self::IntoIter { | ||||||
|  |         self.0.into_iter() | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | impl<'de> Deserialize<'de> for OrderByMap { | ||||||
|  |     fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> | ||||||
|  |     where | ||||||
|  |         D: Deserializer<'de>, | ||||||
|  |     { | ||||||
|  |         let mut map = Deserialize::deserialize(deserializer).map(OrderByMap)?; | ||||||
|  |         // Insert the default ordering if it is not already overwritten by the user. | ||||||
|  |         map.0.entry("*".to_string()).or_insert(OrderBy::default()); | ||||||
|  |         Ok(map) | ||||||
|  |     } | ||||||
|  | } | ||||||
| @@ -6,15 +6,18 @@ use roaring::RoaringBitmap; | |||||||
|  |  | ||||||
| pub use self::facet_distribution::{FacetDistribution, OrderBy, DEFAULT_VALUES_PER_FACET}; | pub use self::facet_distribution::{FacetDistribution, OrderBy, DEFAULT_VALUES_PER_FACET}; | ||||||
| pub use self::filter::{BadGeoError, Filter}; | pub use self::filter::{BadGeoError, Filter}; | ||||||
|  | pub use self::search::{FacetValueHit, SearchForFacetValues}; | ||||||
| use crate::heed_codec::facet::{FacetGroupKeyCodec, FacetGroupValueCodec, OrderedF64Codec}; | use crate::heed_codec::facet::{FacetGroupKeyCodec, FacetGroupValueCodec, OrderedF64Codec}; | ||||||
| use crate::heed_codec::BytesRefCodec; | use crate::heed_codec::BytesRefCodec; | ||||||
| use crate::{Index, Result}; | use crate::{Index, Result}; | ||||||
|  |  | ||||||
| mod facet_distribution; | mod facet_distribution; | ||||||
| mod facet_distribution_iter; | mod facet_distribution_iter; | ||||||
| mod facet_range_search; | mod facet_range_search; | ||||||
| mod facet_sort_ascending; | mod facet_sort_ascending; | ||||||
| mod facet_sort_descending; | mod facet_sort_descending; | ||||||
| mod filter; | mod filter; | ||||||
|  | mod search; | ||||||
|  |  | ||||||
| fn facet_extreme_value<'t>( | fn facet_extreme_value<'t>( | ||||||
|     mut extreme_it: impl Iterator<Item = heed::Result<(RoaringBitmap, &'t [u8])>> + 't, |     mut extreme_it: impl Iterator<Item = heed::Result<(RoaringBitmap, &'t [u8])>> + 't, | ||||||
|   | |||||||
							
								
								
									
										326
									
								
								milli/src/search/facet/search.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										326
									
								
								milli/src/search/facet/search.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,326 @@ | |||||||
|  | use std::cmp::{Ordering, Reverse}; | ||||||
|  | use std::collections::BinaryHeap; | ||||||
|  | use std::ops::ControlFlow; | ||||||
|  |  | ||||||
|  | use charabia::normalizer::NormalizerOption; | ||||||
|  | use charabia::Normalize; | ||||||
|  | use fst::automaton::{Automaton, Str}; | ||||||
|  | use fst::{IntoStreamer, Streamer}; | ||||||
|  | use roaring::RoaringBitmap; | ||||||
|  | use tracing::error; | ||||||
|  |  | ||||||
|  | use crate::error::UserError; | ||||||
|  | use crate::heed_codec::facet::{FacetGroupKey, FacetGroupValue}; | ||||||
|  | use crate::search::build_dfa; | ||||||
|  | use crate::{DocumentId, FieldId, OrderBy, Result, Search}; | ||||||
|  |  | ||||||
|  | /// The maximum number of values per facet returned by the facet search route. | ||||||
|  | const DEFAULT_MAX_NUMBER_OF_VALUES_PER_FACET: usize = 100; | ||||||
|  |  | ||||||
|  | pub struct SearchForFacetValues<'a> { | ||||||
|  |     query: Option<String>, | ||||||
|  |     facet: String, | ||||||
|  |     search_query: Search<'a>, | ||||||
|  |     max_values: usize, | ||||||
|  |     is_hybrid: bool, | ||||||
|  | } | ||||||
|  |  | ||||||
|  | impl<'a> SearchForFacetValues<'a> { | ||||||
|  |     pub fn new( | ||||||
|  |         facet: String, | ||||||
|  |         search_query: Search<'a>, | ||||||
|  |         is_hybrid: bool, | ||||||
|  |     ) -> SearchForFacetValues<'a> { | ||||||
|  |         SearchForFacetValues { | ||||||
|  |             query: None, | ||||||
|  |             facet, | ||||||
|  |             search_query, | ||||||
|  |             max_values: DEFAULT_MAX_NUMBER_OF_VALUES_PER_FACET, | ||||||
|  |             is_hybrid, | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub fn query(&mut self, query: impl Into<String>) -> &mut Self { | ||||||
|  |         self.query = Some(query.into()); | ||||||
|  |         self | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub fn max_values(&mut self, max: usize) -> &mut Self { | ||||||
|  |         self.max_values = max; | ||||||
|  |         self | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     fn one_original_value_of( | ||||||
|  |         &self, | ||||||
|  |         field_id: FieldId, | ||||||
|  |         facet_str: &str, | ||||||
|  |         any_docid: DocumentId, | ||||||
|  |     ) -> Result<Option<String>> { | ||||||
|  |         let index = self.search_query.index; | ||||||
|  |         let rtxn = self.search_query.rtxn; | ||||||
|  |         let key: (FieldId, _, &str) = (field_id, any_docid, facet_str); | ||||||
|  |         Ok(index.field_id_docid_facet_strings.get(rtxn, &key)?.map(|v| v.to_owned())) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub fn execute(&self) -> Result<Vec<FacetValueHit>> { | ||||||
|  |         let index = self.search_query.index; | ||||||
|  |         let rtxn = self.search_query.rtxn; | ||||||
|  |  | ||||||
|  |         let filterable_fields = index.filterable_fields(rtxn)?; | ||||||
|  |         if !filterable_fields.contains(&self.facet) { | ||||||
|  |             let (valid_fields, hidden_fields) = | ||||||
|  |                 index.remove_hidden_fields(rtxn, filterable_fields)?; | ||||||
|  |  | ||||||
|  |             return Err(UserError::InvalidFacetSearchFacetName { | ||||||
|  |                 field: self.facet.clone(), | ||||||
|  |                 valid_fields, | ||||||
|  |                 hidden_fields, | ||||||
|  |             } | ||||||
|  |             .into()); | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         let fields_ids_map = index.fields_ids_map(rtxn)?; | ||||||
|  |         let fid = match fields_ids_map.id(&self.facet) { | ||||||
|  |             Some(fid) => fid, | ||||||
|  |             // we return an empty list of results when the attribute has been | ||||||
|  |             // set as filterable but no document contains this field (yet). | ||||||
|  |             None => return Ok(Vec::new()), | ||||||
|  |         }; | ||||||
|  |  | ||||||
|  |         let fst = match self.search_query.index.facet_id_string_fst.get(rtxn, &fid)? { | ||||||
|  |             Some(fst) => fst, | ||||||
|  |             None => return Ok(Vec::new()), | ||||||
|  |         }; | ||||||
|  |  | ||||||
|  |         let search_candidates = self | ||||||
|  |             .search_query | ||||||
|  |             .execute_for_candidates(self.is_hybrid || self.search_query.vector.is_some())?; | ||||||
|  |  | ||||||
|  |         let mut results = match index.sort_facet_values_by(rtxn)?.get(&self.facet) { | ||||||
|  |             OrderBy::Lexicographic => ValuesCollection::by_lexicographic(self.max_values), | ||||||
|  |             OrderBy::Count => ValuesCollection::by_count(self.max_values), | ||||||
|  |         }; | ||||||
|  |  | ||||||
|  |         match self.query.as_ref() { | ||||||
|  |             Some(query) => { | ||||||
|  |                 let options = NormalizerOption { lossy: true, ..Default::default() }; | ||||||
|  |                 let query = query.normalize(&options); | ||||||
|  |                 let query = query.as_ref(); | ||||||
|  |  | ||||||
|  |                 let authorize_typos = self.search_query.index.authorize_typos(rtxn)?; | ||||||
|  |                 let field_authorizes_typos = | ||||||
|  |                     !self.search_query.index.exact_attributes_ids(rtxn)?.contains(&fid); | ||||||
|  |  | ||||||
|  |                 if authorize_typos && field_authorizes_typos { | ||||||
|  |                     let exact_words_fst = self.search_query.index.exact_words(rtxn)?; | ||||||
|  |                     if exact_words_fst.map_or(false, |fst| fst.contains(query)) { | ||||||
|  |                         if fst.contains(query) { | ||||||
|  |                             self.fetch_original_facets_using_normalized( | ||||||
|  |                                 fid, | ||||||
|  |                                 query, | ||||||
|  |                                 query, | ||||||
|  |                                 &search_candidates, | ||||||
|  |                                 &mut results, | ||||||
|  |                             )?; | ||||||
|  |                         } | ||||||
|  |                     } else { | ||||||
|  |                         let one_typo = self.search_query.index.min_word_len_one_typo(rtxn)?; | ||||||
|  |                         let two_typos = self.search_query.index.min_word_len_two_typos(rtxn)?; | ||||||
|  |  | ||||||
|  |                         let is_prefix = true; | ||||||
|  |                         let automaton = if query.len() < one_typo as usize { | ||||||
|  |                             build_dfa(query, 0, is_prefix) | ||||||
|  |                         } else if query.len() < two_typos as usize { | ||||||
|  |                             build_dfa(query, 1, is_prefix) | ||||||
|  |                         } else { | ||||||
|  |                             build_dfa(query, 2, is_prefix) | ||||||
|  |                         }; | ||||||
|  |  | ||||||
|  |                         let mut stream = fst.search(automaton).into_stream(); | ||||||
|  |                         while let Some(facet_value) = stream.next() { | ||||||
|  |                             let value = std::str::from_utf8(facet_value)?; | ||||||
|  |                             if self | ||||||
|  |                                 .fetch_original_facets_using_normalized( | ||||||
|  |                                     fid, | ||||||
|  |                                     value, | ||||||
|  |                                     query, | ||||||
|  |                                     &search_candidates, | ||||||
|  |                                     &mut results, | ||||||
|  |                                 )? | ||||||
|  |                                 .is_break() | ||||||
|  |                             { | ||||||
|  |                                 break; | ||||||
|  |                             } | ||||||
|  |                         } | ||||||
|  |                     } | ||||||
|  |                 } else { | ||||||
|  |                     let automaton = Str::new(query).starts_with(); | ||||||
|  |                     let mut stream = fst.search(automaton).into_stream(); | ||||||
|  |                     while let Some(facet_value) = stream.next() { | ||||||
|  |                         let value = std::str::from_utf8(facet_value)?; | ||||||
|  |                         if self | ||||||
|  |                             .fetch_original_facets_using_normalized( | ||||||
|  |                                 fid, | ||||||
|  |                                 value, | ||||||
|  |                                 query, | ||||||
|  |                                 &search_candidates, | ||||||
|  |                                 &mut results, | ||||||
|  |                             )? | ||||||
|  |                             .is_break() | ||||||
|  |                         { | ||||||
|  |                             break; | ||||||
|  |                         } | ||||||
|  |                     } | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |             None => { | ||||||
|  |                 let prefix = FacetGroupKey { field_id: fid, level: 0, left_bound: "" }; | ||||||
|  |                 for result in index.facet_id_string_docids.prefix_iter(rtxn, &prefix)? { | ||||||
|  |                     let (FacetGroupKey { left_bound, .. }, FacetGroupValue { bitmap, .. }) = | ||||||
|  |                         result?; | ||||||
|  |                     let count = search_candidates.intersection_len(&bitmap); | ||||||
|  |                     if count != 0 { | ||||||
|  |                         let value = self | ||||||
|  |                             .one_original_value_of(fid, left_bound, bitmap.min().unwrap())? | ||||||
|  |                             .unwrap_or_else(|| left_bound.to_string()); | ||||||
|  |                         if results.insert(FacetValueHit { value, count }).is_break() { | ||||||
|  |                             break; | ||||||
|  |                         } | ||||||
|  |                     } | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         Ok(results.into_sorted_vec()) | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     fn fetch_original_facets_using_normalized( | ||||||
|  |         &self, | ||||||
|  |         fid: FieldId, | ||||||
|  |         value: &str, | ||||||
|  |         query: &str, | ||||||
|  |         search_candidates: &RoaringBitmap, | ||||||
|  |         results: &mut ValuesCollection, | ||||||
|  |     ) -> Result<ControlFlow<()>> { | ||||||
|  |         let index = self.search_query.index; | ||||||
|  |         let rtxn = self.search_query.rtxn; | ||||||
|  |  | ||||||
|  |         let database = index.facet_id_normalized_string_strings; | ||||||
|  |         let key = (fid, value); | ||||||
|  |         let original_strings = match database.get(rtxn, &key)? { | ||||||
|  |             Some(original_strings) => original_strings, | ||||||
|  |             None => { | ||||||
|  |                 error!("the facet value is missing from the facet database: {key:?}"); | ||||||
|  |                 return Ok(ControlFlow::Continue(())); | ||||||
|  |             } | ||||||
|  |         }; | ||||||
|  |         for original in original_strings { | ||||||
|  |             let key = FacetGroupKey { field_id: fid, level: 0, left_bound: original.as_str() }; | ||||||
|  |             let docids = match index.facet_id_string_docids.get(rtxn, &key)? { | ||||||
|  |                 Some(FacetGroupValue { bitmap, .. }) => bitmap, | ||||||
|  |                 None => { | ||||||
|  |                     error!("the facet value is missing from the facet database: {key:?}"); | ||||||
|  |                     return Ok(ControlFlow::Continue(())); | ||||||
|  |                 } | ||||||
|  |             }; | ||||||
|  |             let count = search_candidates.intersection_len(&docids); | ||||||
|  |             if count != 0 { | ||||||
|  |                 let value = self | ||||||
|  |                     .one_original_value_of(fid, &original, docids.min().unwrap())? | ||||||
|  |                     .unwrap_or_else(|| query.to_string()); | ||||||
|  |                 if results.insert(FacetValueHit { value, count }).is_break() { | ||||||
|  |                     break; | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         Ok(ControlFlow::Continue(())) | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | #[derive(Debug, Clone, serde::Serialize, PartialEq)] | ||||||
|  | pub struct FacetValueHit { | ||||||
|  |     /// The original facet value | ||||||
|  |     pub value: String, | ||||||
|  |     /// The number of documents associated to this facet | ||||||
|  |     pub count: u64, | ||||||
|  | } | ||||||
|  |  | ||||||
|  | impl PartialOrd for FacetValueHit { | ||||||
|  |     fn partial_cmp(&self, other: &Self) -> Option<Ordering> { | ||||||
|  |         Some(self.cmp(other)) | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | impl Ord for FacetValueHit { | ||||||
|  |     fn cmp(&self, other: &Self) -> Ordering { | ||||||
|  |         self.count.cmp(&other.count).then_with(|| self.value.cmp(&other.value)) | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | impl Eq for FacetValueHit {} | ||||||
|  |  | ||||||
|  | /// A wrapper type that collects the best facet values by | ||||||
|  | /// lexicographic or number of associated values. | ||||||
|  | enum ValuesCollection { | ||||||
|  |     /// Keeps the top values according to the lexicographic order. | ||||||
|  |     Lexicographic { max: usize, content: Vec<FacetValueHit> }, | ||||||
|  |     /// Keeps the top values according to the number of values associated to them. | ||||||
|  |     /// | ||||||
|  |     /// Note that it is a max heap and we need to move the smallest counts | ||||||
|  |     /// at the top to be able to pop them when we reach the max_values limit. | ||||||
|  |     Count { max: usize, content: BinaryHeap<Reverse<FacetValueHit>> }, | ||||||
|  | } | ||||||
|  |  | ||||||
|  | impl ValuesCollection { | ||||||
|  |     pub fn by_lexicographic(max: usize) -> Self { | ||||||
|  |         ValuesCollection::Lexicographic { max, content: Vec::new() } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub fn by_count(max: usize) -> Self { | ||||||
|  |         ValuesCollection::Count { max, content: BinaryHeap::new() } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     pub fn insert(&mut self, value: FacetValueHit) -> ControlFlow<()> { | ||||||
|  |         match self { | ||||||
|  |             ValuesCollection::Lexicographic { max, content } => { | ||||||
|  |                 if content.len() < *max { | ||||||
|  |                     content.push(value); | ||||||
|  |                     if content.len() < *max { | ||||||
|  |                         return ControlFlow::Continue(()); | ||||||
|  |                     } | ||||||
|  |                 } | ||||||
|  |                 ControlFlow::Break(()) | ||||||
|  |             } | ||||||
|  |             ValuesCollection::Count { max, content } => { | ||||||
|  |                 if content.len() == *max { | ||||||
|  |                     // Peeking gives us the worst value in the list as | ||||||
|  |                     // this is a max-heap and we reversed it. | ||||||
|  |                     let Some(mut peek) = content.peek_mut() else { return ControlFlow::Break(()) }; | ||||||
|  |                     if peek.0.count <= value.count { | ||||||
|  |                         // Replace the current worst value in the heap | ||||||
|  |                         // with the new one we received that is better. | ||||||
|  |                         *peek = Reverse(value); | ||||||
|  |                     } | ||||||
|  |                 } else { | ||||||
|  |                     content.push(Reverse(value)); | ||||||
|  |                 } | ||||||
|  |                 ControlFlow::Continue(()) | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     /// Returns the list of facet values in descending order of, either, | ||||||
|  |     /// count or lexicographic order of the value depending on the type. | ||||||
|  |     pub fn into_sorted_vec(self) -> Vec<FacetValueHit> { | ||||||
|  |         match self { | ||||||
|  |             ValuesCollection::Lexicographic { content, .. } => content.into_iter().collect(), | ||||||
|  |             ValuesCollection::Count { content, .. } => { | ||||||
|  |                 // Convert the heap into a vec of hits by removing the Reverse wrapper. | ||||||
|  |                 // Hits are already in the right order as they were reversed and there | ||||||
|  |                 // are output in ascending order. | ||||||
|  |                 content.into_sorted_vec().into_iter().map(|Reverse(hit)| hit).collect() | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | } | ||||||
| @@ -1,25 +1,17 @@ | |||||||
| use std::fmt; | use std::fmt; | ||||||
| use std::ops::ControlFlow; |  | ||||||
|  |  | ||||||
| use charabia::normalizer::NormalizerOption; |  | ||||||
| use charabia::Normalize; |  | ||||||
| use fst::automaton::{Automaton, Str}; |  | ||||||
| use fst::{IntoStreamer, Streamer}; |  | ||||||
| use levenshtein_automata::{LevenshteinAutomatonBuilder as LevBuilder, DFA}; | use levenshtein_automata::{LevenshteinAutomatonBuilder as LevBuilder, DFA}; | ||||||
| use once_cell::sync::Lazy; | use once_cell::sync::Lazy; | ||||||
| use roaring::bitmap::RoaringBitmap; | use roaring::bitmap::RoaringBitmap; | ||||||
| use tracing::error; |  | ||||||
|  |  | ||||||
| pub use self::facet::{FacetDistribution, Filter, OrderBy, DEFAULT_VALUES_PER_FACET}; | pub use self::facet::{FacetDistribution, Filter, OrderBy, DEFAULT_VALUES_PER_FACET}; | ||||||
| pub use self::new::matches::{FormatOptions, MatchBounds, MatcherBuilder, MatchingWords}; | pub use self::new::matches::{FormatOptions, MatchBounds, MatcherBuilder, MatchingWords}; | ||||||
| use self::new::{execute_vector_search, PartialSearchResult}; | use self::new::{execute_vector_search, PartialSearchResult}; | ||||||
| use crate::error::UserError; |  | ||||||
| use crate::heed_codec::facet::{FacetGroupKey, FacetGroupValue}; |  | ||||||
| use crate::score_details::{ScoreDetails, ScoringStrategy}; | use crate::score_details::{ScoreDetails, ScoringStrategy}; | ||||||
| use crate::vector::DistributionShift; | use crate::vector::DistributionShift; | ||||||
| use crate::{ | use crate::{ | ||||||
|     execute_search, filtered_universe, AscDesc, DefaultSearchLogger, DocumentId, FieldId, Index, |     execute_search, filtered_universe, AscDesc, DefaultSearchLogger, DocumentId, Index, Result, | ||||||
|     Result, SearchContext, |     SearchContext, | ||||||
| }; | }; | ||||||
|  |  | ||||||
| // Building these factories is not free. | // Building these factories is not free. | ||||||
| @@ -27,9 +19,6 @@ static LEVDIST0: Lazy<LevBuilder> = Lazy::new(|| LevBuilder::new(0, true)); | |||||||
| static LEVDIST1: Lazy<LevBuilder> = Lazy::new(|| LevBuilder::new(1, true)); | static LEVDIST1: Lazy<LevBuilder> = Lazy::new(|| LevBuilder::new(1, true)); | ||||||
| static LEVDIST2: Lazy<LevBuilder> = Lazy::new(|| LevBuilder::new(2, true)); | static LEVDIST2: Lazy<LevBuilder> = Lazy::new(|| LevBuilder::new(2, true)); | ||||||
|  |  | ||||||
| /// The maximum number of values per facet returned by the facet search route. |  | ||||||
| const DEFAULT_MAX_NUMBER_OF_VALUES_PER_FACET: usize = 100; |  | ||||||
|  |  | ||||||
| pub mod facet; | pub mod facet; | ||||||
| mod fst_utils; | mod fst_utils; | ||||||
| pub mod hybrid; | pub mod hybrid; | ||||||
| @@ -302,240 +291,6 @@ pub fn build_dfa(word: &str, typos: u8, is_prefix: bool) -> DFA { | |||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| pub struct SearchForFacetValues<'a> { |  | ||||||
|     query: Option<String>, |  | ||||||
|     facet: String, |  | ||||||
|     search_query: Search<'a>, |  | ||||||
|     max_values: usize, |  | ||||||
|     is_hybrid: bool, |  | ||||||
| } |  | ||||||
|  |  | ||||||
| impl<'a> SearchForFacetValues<'a> { |  | ||||||
|     pub fn new( |  | ||||||
|         facet: String, |  | ||||||
|         search_query: Search<'a>, |  | ||||||
|         is_hybrid: bool, |  | ||||||
|     ) -> SearchForFacetValues<'a> { |  | ||||||
|         SearchForFacetValues { |  | ||||||
|             query: None, |  | ||||||
|             facet, |  | ||||||
|             search_query, |  | ||||||
|             max_values: DEFAULT_MAX_NUMBER_OF_VALUES_PER_FACET, |  | ||||||
|             is_hybrid, |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     pub fn query(&mut self, query: impl Into<String>) -> &mut Self { |  | ||||||
|         self.query = Some(query.into()); |  | ||||||
|         self |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     pub fn max_values(&mut self, max: usize) -> &mut Self { |  | ||||||
|         self.max_values = max; |  | ||||||
|         self |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     fn one_original_value_of( |  | ||||||
|         &self, |  | ||||||
|         field_id: FieldId, |  | ||||||
|         facet_str: &str, |  | ||||||
|         any_docid: DocumentId, |  | ||||||
|     ) -> Result<Option<String>> { |  | ||||||
|         let index = self.search_query.index; |  | ||||||
|         let rtxn = self.search_query.rtxn; |  | ||||||
|         let key: (FieldId, _, &str) = (field_id, any_docid, facet_str); |  | ||||||
|         Ok(index.field_id_docid_facet_strings.get(rtxn, &key)?.map(|v| v.to_owned())) |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     pub fn execute(&self) -> Result<Vec<FacetValueHit>> { |  | ||||||
|         let index = self.search_query.index; |  | ||||||
|         let rtxn = self.search_query.rtxn; |  | ||||||
|  |  | ||||||
|         let filterable_fields = index.filterable_fields(rtxn)?; |  | ||||||
|         if !filterable_fields.contains(&self.facet) { |  | ||||||
|             let (valid_fields, hidden_fields) = |  | ||||||
|                 index.remove_hidden_fields(rtxn, filterable_fields)?; |  | ||||||
|  |  | ||||||
|             return Err(UserError::InvalidFacetSearchFacetName { |  | ||||||
|                 field: self.facet.clone(), |  | ||||||
|                 valid_fields, |  | ||||||
|                 hidden_fields, |  | ||||||
|             } |  | ||||||
|             .into()); |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         let fields_ids_map = index.fields_ids_map(rtxn)?; |  | ||||||
|         let fid = match fields_ids_map.id(&self.facet) { |  | ||||||
|             Some(fid) => fid, |  | ||||||
|             // we return an empty list of results when the attribute has been |  | ||||||
|             // set as filterable but no document contains this field (yet). |  | ||||||
|             None => return Ok(Vec::new()), |  | ||||||
|         }; |  | ||||||
|  |  | ||||||
|         let fst = match self.search_query.index.facet_id_string_fst.get(rtxn, &fid)? { |  | ||||||
|             Some(fst) => fst, |  | ||||||
|             None => return Ok(vec![]), |  | ||||||
|         }; |  | ||||||
|  |  | ||||||
|         let search_candidates = self |  | ||||||
|             .search_query |  | ||||||
|             .execute_for_candidates(self.is_hybrid || self.search_query.vector.is_some())?; |  | ||||||
|  |  | ||||||
|         match self.query.as_ref() { |  | ||||||
|             Some(query) => { |  | ||||||
|                 let options = NormalizerOption { lossy: true, ..Default::default() }; |  | ||||||
|                 let query = query.normalize(&options); |  | ||||||
|                 let query = query.as_ref(); |  | ||||||
|  |  | ||||||
|                 let authorize_typos = self.search_query.index.authorize_typos(rtxn)?; |  | ||||||
|                 let field_authorizes_typos = |  | ||||||
|                     !self.search_query.index.exact_attributes_ids(rtxn)?.contains(&fid); |  | ||||||
|  |  | ||||||
|                 if authorize_typos && field_authorizes_typos { |  | ||||||
|                     let exact_words_fst = self.search_query.index.exact_words(rtxn)?; |  | ||||||
|                     if exact_words_fst.map_or(false, |fst| fst.contains(query)) { |  | ||||||
|                         let mut results = vec![]; |  | ||||||
|                         if fst.contains(query) { |  | ||||||
|                             self.fetch_original_facets_using_normalized( |  | ||||||
|                                 fid, |  | ||||||
|                                 query, |  | ||||||
|                                 query, |  | ||||||
|                                 &search_candidates, |  | ||||||
|                                 &mut results, |  | ||||||
|                             )?; |  | ||||||
|                         } |  | ||||||
|                         Ok(results) |  | ||||||
|                     } else { |  | ||||||
|                         let one_typo = self.search_query.index.min_word_len_one_typo(rtxn)?; |  | ||||||
|                         let two_typos = self.search_query.index.min_word_len_two_typos(rtxn)?; |  | ||||||
|  |  | ||||||
|                         let is_prefix = true; |  | ||||||
|                         let automaton = if query.len() < one_typo as usize { |  | ||||||
|                             build_dfa(query, 0, is_prefix) |  | ||||||
|                         } else if query.len() < two_typos as usize { |  | ||||||
|                             build_dfa(query, 1, is_prefix) |  | ||||||
|                         } else { |  | ||||||
|                             build_dfa(query, 2, is_prefix) |  | ||||||
|                         }; |  | ||||||
|  |  | ||||||
|                         let mut stream = fst.search(automaton).into_stream(); |  | ||||||
|                         let mut results = vec![]; |  | ||||||
|                         while let Some(facet_value) = stream.next() { |  | ||||||
|                             let value = std::str::from_utf8(facet_value)?; |  | ||||||
|                             if self |  | ||||||
|                                 .fetch_original_facets_using_normalized( |  | ||||||
|                                     fid, |  | ||||||
|                                     value, |  | ||||||
|                                     query, |  | ||||||
|                                     &search_candidates, |  | ||||||
|                                     &mut results, |  | ||||||
|                                 )? |  | ||||||
|                                 .is_break() |  | ||||||
|                             { |  | ||||||
|                                 break; |  | ||||||
|                             } |  | ||||||
|                         } |  | ||||||
|  |  | ||||||
|                         Ok(results) |  | ||||||
|                     } |  | ||||||
|                 } else { |  | ||||||
|                     let automaton = Str::new(query).starts_with(); |  | ||||||
|                     let mut stream = fst.search(automaton).into_stream(); |  | ||||||
|                     let mut results = vec![]; |  | ||||||
|                     while let Some(facet_value) = stream.next() { |  | ||||||
|                         let value = std::str::from_utf8(facet_value)?; |  | ||||||
|                         if self |  | ||||||
|                             .fetch_original_facets_using_normalized( |  | ||||||
|                                 fid, |  | ||||||
|                                 value, |  | ||||||
|                                 query, |  | ||||||
|                                 &search_candidates, |  | ||||||
|                                 &mut results, |  | ||||||
|                             )? |  | ||||||
|                             .is_break() |  | ||||||
|                         { |  | ||||||
|                             break; |  | ||||||
|                         } |  | ||||||
|                     } |  | ||||||
|  |  | ||||||
|                     Ok(results) |  | ||||||
|                 } |  | ||||||
|             } |  | ||||||
|             None => { |  | ||||||
|                 let mut results = vec![]; |  | ||||||
|                 let prefix = FacetGroupKey { field_id: fid, level: 0, left_bound: "" }; |  | ||||||
|                 for result in index.facet_id_string_docids.prefix_iter(rtxn, &prefix)? { |  | ||||||
|                     let (FacetGroupKey { left_bound, .. }, FacetGroupValue { bitmap, .. }) = |  | ||||||
|                         result?; |  | ||||||
|                     let count = search_candidates.intersection_len(&bitmap); |  | ||||||
|                     if count != 0 { |  | ||||||
|                         let value = self |  | ||||||
|                             .one_original_value_of(fid, left_bound, bitmap.min().unwrap())? |  | ||||||
|                             .unwrap_or_else(|| left_bound.to_string()); |  | ||||||
|                         results.push(FacetValueHit { value, count }); |  | ||||||
|                     } |  | ||||||
|                     if results.len() >= self.max_values { |  | ||||||
|                         break; |  | ||||||
|                     } |  | ||||||
|                 } |  | ||||||
|                 Ok(results) |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     fn fetch_original_facets_using_normalized( |  | ||||||
|         &self, |  | ||||||
|         fid: FieldId, |  | ||||||
|         value: &str, |  | ||||||
|         query: &str, |  | ||||||
|         search_candidates: &RoaringBitmap, |  | ||||||
|         results: &mut Vec<FacetValueHit>, |  | ||||||
|     ) -> Result<ControlFlow<()>> { |  | ||||||
|         let index = self.search_query.index; |  | ||||||
|         let rtxn = self.search_query.rtxn; |  | ||||||
|  |  | ||||||
|         let database = index.facet_id_normalized_string_strings; |  | ||||||
|         let key = (fid, value); |  | ||||||
|         let original_strings = match database.get(rtxn, &key)? { |  | ||||||
|             Some(original_strings) => original_strings, |  | ||||||
|             None => { |  | ||||||
|                 error!("the facet value is missing from the facet database: {key:?}"); |  | ||||||
|                 return Ok(ControlFlow::Continue(())); |  | ||||||
|             } |  | ||||||
|         }; |  | ||||||
|         for original in original_strings { |  | ||||||
|             let key = FacetGroupKey { field_id: fid, level: 0, left_bound: original.as_str() }; |  | ||||||
|             let docids = match index.facet_id_string_docids.get(rtxn, &key)? { |  | ||||||
|                 Some(FacetGroupValue { bitmap, .. }) => bitmap, |  | ||||||
|                 None => { |  | ||||||
|                     error!("the facet value is missing from the facet database: {key:?}"); |  | ||||||
|                     return Ok(ControlFlow::Continue(())); |  | ||||||
|                 } |  | ||||||
|             }; |  | ||||||
|             let count = search_candidates.intersection_len(&docids); |  | ||||||
|             if count != 0 { |  | ||||||
|                 let value = self |  | ||||||
|                     .one_original_value_of(fid, &original, docids.min().unwrap())? |  | ||||||
|                     .unwrap_or_else(|| query.to_string()); |  | ||||||
|                 results.push(FacetValueHit { value, count }); |  | ||||||
|             } |  | ||||||
|             if results.len() >= self.max_values { |  | ||||||
|                 return Ok(ControlFlow::Break(())); |  | ||||||
|             } |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         Ok(ControlFlow::Continue(())) |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| #[derive(Debug, Clone, serde::Serialize, PartialEq)] |  | ||||||
| pub struct FacetValueHit { |  | ||||||
|     /// The original facet value |  | ||||||
|     pub value: String, |  | ||||||
|     /// The number of documents associated to this facet |  | ||||||
|     pub count: u64, |  | ||||||
| } |  | ||||||
|  |  | ||||||
| #[cfg(test)] | #[cfg(test)] | ||||||
| mod test { | mod test { | ||||||
|     #[allow(unused_imports)] |     #[allow(unused_imports)] | ||||||
|   | |||||||
| @@ -14,12 +14,13 @@ use super::IndexerConfig; | |||||||
| use crate::criterion::Criterion; | use crate::criterion::Criterion; | ||||||
| use crate::error::UserError; | use crate::error::UserError; | ||||||
| use crate::index::{DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS}; | use crate::index::{DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS}; | ||||||
|  | use crate::order_by_map::OrderByMap; | ||||||
| use crate::proximity::ProximityPrecision; | use crate::proximity::ProximityPrecision; | ||||||
| use crate::update::index_documents::IndexDocumentsMethod; | use crate::update::index_documents::IndexDocumentsMethod; | ||||||
| use crate::update::{IndexDocuments, UpdateIndexingStep}; | use crate::update::{IndexDocuments, UpdateIndexingStep}; | ||||||
| use crate::vector::settings::{check_set, check_unset, EmbedderSource, EmbeddingSettings}; | use crate::vector::settings::{check_set, check_unset, EmbedderSource, EmbeddingSettings}; | ||||||
| use crate::vector::{Embedder, EmbeddingConfig, EmbeddingConfigs}; | use crate::vector::{Embedder, EmbeddingConfig, EmbeddingConfigs}; | ||||||
| use crate::{FieldsIdsMap, Index, OrderBy, Result}; | use crate::{FieldsIdsMap, Index, Result}; | ||||||
|  |  | ||||||
| #[derive(Debug, Clone, PartialEq, Eq, Copy)] | #[derive(Debug, Clone, PartialEq, Eq, Copy)] | ||||||
| pub enum Setting<T> { | pub enum Setting<T> { | ||||||
| @@ -145,7 +146,7 @@ pub struct Settings<'a, 't, 'i> { | |||||||
|     /// Attributes on which typo tolerance is disabled. |     /// Attributes on which typo tolerance is disabled. | ||||||
|     exact_attributes: Setting<HashSet<String>>, |     exact_attributes: Setting<HashSet<String>>, | ||||||
|     max_values_per_facet: Setting<usize>, |     max_values_per_facet: Setting<usize>, | ||||||
|     sort_facet_values_by: Setting<HashMap<String, OrderBy>>, |     sort_facet_values_by: Setting<OrderByMap>, | ||||||
|     pagination_max_total_hits: Setting<usize>, |     pagination_max_total_hits: Setting<usize>, | ||||||
|     proximity_precision: Setting<ProximityPrecision>, |     proximity_precision: Setting<ProximityPrecision>, | ||||||
|     embedder_settings: Setting<BTreeMap<String, Setting<EmbeddingSettings>>>, |     embedder_settings: Setting<BTreeMap<String, Setting<EmbeddingSettings>>>, | ||||||
| @@ -340,7 +341,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { | |||||||
|         self.max_values_per_facet = Setting::Reset; |         self.max_values_per_facet = Setting::Reset; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     pub fn set_sort_facet_values_by(&mut self, value: HashMap<String, OrderBy>) { |     pub fn set_sort_facet_values_by(&mut self, value: OrderByMap) { | ||||||
|         self.sort_facet_values_by = Setting::Set(value); |         self.sort_facet_values_by = Setting::Set(value); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user