mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-24 20:46:27 +00:00 
			
		
		
		
	Merge pull request #66 from meilisearch/show-available-facets
Expose an API to compute facets distribution
This commit is contained in:
		
							
								
								
									
										4
									
								
								Cargo.lock
									
									
									
										generated
									
									
									
								
							
							
						
						
									
										4
									
								
								Cargo.lock
									
									
									
										generated
									
									
									
								
							| @@ -1211,9 +1211,9 @@ checksum = "21215c1b9d8f7832b433255bd9eea3e2779aa55b21b2f8e13aad62c74749b237" | ||||
|  | ||||
| [[package]] | ||||
| name = "roaring" | ||||
| version = "0.6.3" | ||||
| version = "0.6.4" | ||||
| source = "registry+https://github.com/rust-lang/crates.io-index" | ||||
| checksum = "f12bdbc3b9b2fd12148ee9f97f9e36438f1e84d3ce47fec0ad6b4bfbb62b3a35" | ||||
| checksum = "4d60b41c8f25d07cecab125cb46ebbf234fc055effc61ca2392a3ef4f9422304" | ||||
| dependencies = [ | ||||
|  "byteorder", | ||||
| ] | ||||
|   | ||||
| @@ -31,7 +31,7 @@ ordered-float = "2.0.0" | ||||
| rayon = "1.3.1" | ||||
| regex = "1.4.2" | ||||
| ringtail = "0.3.0" | ||||
| roaring = "0.6.1" | ||||
| roaring = "0.6.4" | ||||
| serde = { version = "1.0", features = ["derive"] } | ||||
| serde_json = { version = "1.0.59", features = ["preserve_order"] } | ||||
| slice-group-by = "0.2.6" | ||||
|   | ||||
							
								
								
									
										627
									
								
								http-ui/Cargo.lock
									
									
									
										generated
									
									
									
								
							
							
						
						
									
										627
									
								
								http-ui/Cargo.lock
									
									
									
										generated
									
									
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @@ -22,6 +22,7 @@ tempfile = "3.1.0" | ||||
| askama = "0.10.1" | ||||
| askama_warp = "0.10.0" | ||||
| bytes = "0.5.6" | ||||
| either = "1.6.1" | ||||
| flate2 = "1.0.19" | ||||
| futures = "0.3.6" | ||||
| serde = { version = "1.0", features = ["derive"] } | ||||
|   | ||||
| @@ -1,9 +1,9 @@ | ||||
| var request = null; | ||||
| var timeoutID = null; | ||||
|  | ||||
| $('#query, #facet').on('input', function () { | ||||
| $('#query, #filters').on('input', function () { | ||||
|   var query = $('#query').val(); | ||||
|   var facet = $('#facet').val(); | ||||
|   var filters = $('#filters').val(); | ||||
|   var timeoutMs = 100; | ||||
|  | ||||
|   if (timeoutID !== null) { | ||||
| @@ -15,18 +15,35 @@ $('#query, #facet').on('input', function () { | ||||
|       type: "POST", | ||||
|       url: "query", | ||||
|       contentType: 'application/json', | ||||
|       data: JSON.stringify({ 'query': query, 'facetCondition': facet }), | ||||
|       data: JSON.stringify({ | ||||
|         'query': query, | ||||
|         'filters': filters, | ||||
|         "facetDistribution": true, | ||||
|       }), | ||||
|       contentType: 'application/json', | ||||
|       success: function (data, textStatus, request) { | ||||
|         results.innerHTML = ''; | ||||
|         facets.innerHTML = ''; | ||||
|  | ||||
|         let timeSpent = request.getResponseHeader('Time-Ms'); | ||||
|         let numberOfDocuments = data.length; | ||||
|         count.innerHTML = `${numberOfDocuments}`; | ||||
|         let numberOfDocuments = data.documents.length; | ||||
|         count.innerHTML = data.numberOfCandidates.toLocaleString(); | ||||
|         time.innerHTML = `${timeSpent}ms`; | ||||
|         time.classList.remove('fade-in-out'); | ||||
|  | ||||
|         for (element of data) { | ||||
|         for (facet_name in data.facets) { | ||||
|           for (value in data.facets[facet_name]) { | ||||
|               const elem = document.createElement('span'); | ||||
|               const count = data.facets[facet_name][value]; | ||||
|               elem.classList.add("tag"); | ||||
|               elem.setAttribute('data-name', facet_name); | ||||
|               elem.setAttribute('data-value', value); | ||||
|               elem.innerHTML = `${facet_name}:${value} (${count})`; | ||||
|               facets.appendChild(elem); | ||||
|           } | ||||
|         } | ||||
|  | ||||
|         for (element of data.documents) { | ||||
|           const elem = document.createElement('li'); | ||||
|           elem.classList.add("document"); | ||||
|  | ||||
| @@ -54,6 +71,19 @@ $('#query, #facet').on('input', function () { | ||||
|           results.appendChild(elem); | ||||
|         } | ||||
|  | ||||
|         // When we click on a tag we append the facet value | ||||
|         // at the end of the facet query. | ||||
|         $('#facets .tag').on('click', function () { | ||||
|           let name = $(this).attr("data-name"); | ||||
|           let value = $(this).attr("data-value"); | ||||
|  | ||||
|           let facet_query = $('#filters').val().trim(); | ||||
|           if (facet_query === "") { | ||||
|             $('#filters').val(`${name} = "${value}"`).trigger('input'); | ||||
|           } else { | ||||
|             $('#filters').val(`${facet_query} AND ${name} = "${value}"`).trigger('input'); | ||||
|           } | ||||
|         }); | ||||
|       }, | ||||
|       beforeSend: function () { | ||||
|         if (request !== null) { | ||||
| @@ -65,6 +95,25 @@ $('#query, #facet').on('input', function () { | ||||
|   }, timeoutMs); | ||||
| }); | ||||
|  | ||||
| function diffArray(arr1, arr2) { | ||||
|   return arr1.concat(arr2).filter(function (val) { | ||||
|     if (!(arr1.includes(val) && arr2.includes(val))) | ||||
|       return val; | ||||
|   }); | ||||
| } | ||||
|  | ||||
| function selectedFacetsToArray(facets_obj) { | ||||
|   var array = []; | ||||
|   for (const facet_name in facets_obj) { | ||||
|     var subarray = []; | ||||
|     for (const facet_value of facets_obj[facet_name]) { | ||||
|       subarray.push(`${facet_name}:${facet_value}`); | ||||
|     } | ||||
|     array.push(subarray); | ||||
|   } | ||||
|   return array; | ||||
| } | ||||
|  | ||||
| // Make the number of document a little bit prettier | ||||
| $('#docs-count').text(function(index, text) { | ||||
|   return parseInt(text).toLocaleString() | ||||
| @@ -75,8 +124,8 @@ $('#db-size').text(function(index, text) { | ||||
|   return filesize(parseInt(text)) | ||||
| }); | ||||
|  | ||||
| // We trigger the input when we load the script, this way | ||||
| // we execute a placeholder search when the input is empty. | ||||
| // We trigger the input when we load the script. | ||||
| $(window).on('load', function () { | ||||
|   // We execute a placeholder search when the input is empty. | ||||
|   $('#query').trigger('input'); | ||||
| }); | ||||
|   | ||||
| @@ -4,6 +4,23 @@ | ||||
|   padding: 0; | ||||
| } | ||||
|  | ||||
| #facets .tag { | ||||
|     margin-right: 1em; | ||||
|     margin-bottom: 1em; | ||||
| } | ||||
|  | ||||
| #facets { | ||||
|     max-width: 900px; | ||||
|     margin: 20px auto 0 auto; | ||||
|     padding: 0; | ||||
|     max-height: 16em; | ||||
|   overflow: scroll; | ||||
| } | ||||
|  | ||||
| #facets .tag:hover { | ||||
|   cursor: pointer; | ||||
| } | ||||
|  | ||||
| #logo-white { | ||||
|   display: none; | ||||
| } | ||||
|   | ||||
| @@ -1,4 +1,4 @@ | ||||
| use std::collections::{HashMap, HashSet}; | ||||
| use std::collections::{BTreeMap, HashMap, HashSet}; | ||||
| use std::fmt::Display; | ||||
| use std::fs::{File, create_dir_all}; | ||||
| use std::net::SocketAddr; | ||||
| @@ -11,6 +11,7 @@ use std::{mem, io}; | ||||
|  | ||||
| use askama_warp::Template; | ||||
| use byte_unit::Byte; | ||||
| use either::Either; | ||||
| use flate2::read::GzDecoder; | ||||
| use futures::stream; | ||||
| use futures::{FutureExt, StreamExt}; | ||||
| @@ -28,6 +29,7 @@ use warp::filters::ws::Message; | ||||
| use warp::{Filter, http::Response}; | ||||
| use meilisearch_tokenizer::{Analyzer, AnalyzerConfig}; | ||||
|  | ||||
| use milli::facet::FacetValue; | ||||
| use milli::update::UpdateIndexingStep::*; | ||||
| use milli::update::{UpdateBuilder, IndexDocumentsMethod, UpdateFormat}; | ||||
| use milli::{obkv_to_json, Index, UpdateStore, SearchResult, FacetCondition}; | ||||
| @@ -620,12 +622,38 @@ async fn main() -> anyhow::Result<()> { | ||||
|             .body(include_str!("../public/logo-black.svg")) | ||||
|         ); | ||||
|  | ||||
|     #[derive(Debug, Deserialize)] | ||||
|     #[serde(untagged)] | ||||
|     enum UntaggedEither<L, R> { | ||||
|         Left(L), | ||||
|         Right(R), | ||||
|     } | ||||
|  | ||||
|     impl<L, R> From<UntaggedEither<L, R>> for Either<L, R> { | ||||
|         fn from(value: UntaggedEither<L, R>) -> Either<L, R> { | ||||
|             match value { | ||||
|                 UntaggedEither::Left(left) => Either::Left(left), | ||||
|                 UntaggedEither::Right(right) => Either::Right(right), | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     #[derive(Debug, Deserialize)] | ||||
|     #[serde(deny_unknown_fields)] | ||||
|     #[serde(rename_all = "camelCase")] | ||||
|     struct QueryBody { | ||||
|         query: Option<String>, | ||||
|         facet_condition: Option<String>, | ||||
|         filters: Option<String>, | ||||
|         facet_filters: Option<Vec<UntaggedEither<Vec<String>, String>>>, | ||||
|         facet_distribution: Option<bool>, | ||||
|     } | ||||
|  | ||||
|     #[derive(Debug, Serialize)] | ||||
|     #[serde(rename_all = "camelCase")] | ||||
|     struct Answer { | ||||
|         documents: Vec<Map<String, Value>>, | ||||
|         number_of_candidates: u64, | ||||
|         facets: BTreeMap<String, BTreeMap<FacetValue, u64>>, | ||||
|     } | ||||
|  | ||||
|     let disable_highlighting = opt.disable_highlighting; | ||||
| @@ -642,14 +670,42 @@ async fn main() -> anyhow::Result<()> { | ||||
|             if let Some(query) = query.query { | ||||
|                 search.query(query); | ||||
|             } | ||||
|             if let Some(condition) = query.facet_condition { | ||||
|                 if !condition.trim().is_empty() { | ||||
|                     let condition = FacetCondition::from_str(&rtxn, &index, &condition).unwrap(); | ||||
|  | ||||
|             let filters = match query.filters { | ||||
|                 Some(condition) if !condition.trim().is_empty() => { | ||||
|                     Some(FacetCondition::from_str(&rtxn, &index, &condition).unwrap()) | ||||
|                 }, | ||||
|                 _otherwise => None, | ||||
|             }; | ||||
|  | ||||
|             let facet_filters = match query.facet_filters { | ||||
|                 Some(array) => { | ||||
|                     let eithers = array.into_iter().map(Into::into); | ||||
|                     FacetCondition::from_array(&rtxn, &index, eithers).unwrap() | ||||
|                 }, | ||||
|                 _otherwise => None, | ||||
|             }; | ||||
|  | ||||
|             let condition = match (filters, facet_filters) { | ||||
|                 (Some(filters), Some(facet_filters)) => { | ||||
|                     Some(FacetCondition::And(Box::new(filters), Box::new(facet_filters))) | ||||
|                 }, | ||||
|                 (Some(condition), None) | (None, Some(condition)) => Some(condition), | ||||
|                 _otherwise => None, | ||||
|             }; | ||||
|  | ||||
|             if let Some(condition) = condition { | ||||
|                 search.facet_condition(condition); | ||||
|             } | ||||
|             } | ||||
|  | ||||
|             let SearchResult { found_words, documents_ids } = search.execute().unwrap(); | ||||
|             let SearchResult { found_words, candidates, documents_ids } = search.execute().unwrap(); | ||||
|  | ||||
|             let number_of_candidates = candidates.len(); | ||||
|             let facets = if query.facet_distribution == Some(true) { | ||||
|                 Some(index.facets_distribution(&rtxn).candidates(candidates).execute().unwrap()) | ||||
|             } else { | ||||
|                 None | ||||
|             }; | ||||
|  | ||||
|             let mut documents = Vec::new(); | ||||
|             let fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); | ||||
| @@ -674,10 +730,16 @@ async fn main() -> anyhow::Result<()> { | ||||
|                 documents.push(object); | ||||
|             } | ||||
|  | ||||
|             let answer = Answer { | ||||
|                 documents, | ||||
|                 number_of_candidates, | ||||
|                 facets: facets.unwrap_or_default(), | ||||
|             }; | ||||
|  | ||||
|             Response::builder() | ||||
|                 .header("Content-Type", "application/json") | ||||
|                 .header("Time-Ms", before_search.elapsed().as_millis().to_string()) | ||||
|                 .body(serde_json::to_string(&documents).unwrap()) | ||||
|                 .body(serde_json::to_string(&answer).unwrap()) | ||||
|         }); | ||||
|  | ||||
|     let index_cloned = index.clone(); | ||||
|   | ||||
| @@ -56,7 +56,7 @@ | ||||
|           <div class="level-item"> | ||||
|             <div class="field has-addons has-addons-right"> | ||||
|               <input id="query" class="input" type="text" autofocus placeholder="e.g. George Clooney"> | ||||
|               <input id="facet" class="input" type="text" placeholder="facet filter like released >= 1577836800"> | ||||
|               <input id="filters" class="input" type="text" placeholder="filters like released >= 1577836800"> | ||||
|             </div> | ||||
|           </div> | ||||
|           <div class="level-item"></div> | ||||
| @@ -66,7 +66,7 @@ | ||||
|         <nav class="level-right"> | ||||
|           <div class="level-item has-text-centered"> | ||||
|             <div> | ||||
|               <p class="heading">Documents</p> | ||||
|               <p class="heading">Candidates</p> | ||||
|               <p id="count" class="title">0</p> | ||||
|             </div> | ||||
|           </div> | ||||
| @@ -84,6 +84,10 @@ | ||||
|     </div> | ||||
|   </section> | ||||
|  | ||||
|   <section id="facets"> | ||||
|     <!-- facet values --> | ||||
|   </section> | ||||
|  | ||||
|   <section> | ||||
|     <ol id="results" class="content"> | ||||
|       <!-- documents matching requests --> | ||||
|   | ||||
							
								
								
									
										60
									
								
								src/facet/facet_value.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										60
									
								
								src/facet/facet_value.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,60 @@ | ||||
| use ordered_float::OrderedFloat; | ||||
| use serde::{Serialize, Serializer}; | ||||
|  | ||||
| #[derive(Debug, Clone, PartialOrd, Ord, PartialEq, Eq, Hash)] | ||||
| pub enum FacetValue { | ||||
|     String(String), | ||||
|     Float(OrderedFloat<f64>), | ||||
|     Integer(i64), | ||||
| } | ||||
|  | ||||
| impl From<String> for FacetValue { | ||||
|     fn from(string: String) -> FacetValue { | ||||
|         FacetValue::String(string) | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl From<&str> for FacetValue { | ||||
|     fn from(string: &str) -> FacetValue { | ||||
|         FacetValue::String(string.to_owned()) | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl From<f64> for FacetValue { | ||||
|     fn from(float: f64) -> FacetValue { | ||||
|         FacetValue::Float(OrderedFloat(float)) | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl From<OrderedFloat<f64>> for FacetValue { | ||||
|     fn from(float: OrderedFloat<f64>) -> FacetValue { | ||||
|         FacetValue::Float(float) | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl From<i64> for FacetValue { | ||||
|     fn from(integer: i64) -> FacetValue { | ||||
|         FacetValue::Integer(integer) | ||||
|     } | ||||
| } | ||||
|  | ||||
| /// We implement Serialize ourselves because we need to always serialize it as a string, | ||||
| /// JSON object keys must be strings not numbers. | ||||
| impl Serialize for FacetValue { | ||||
|     fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> | ||||
|     where | ||||
|         S: Serializer, | ||||
|     { | ||||
|         match self { | ||||
|             FacetValue::String(string) => serializer.serialize_str(string), | ||||
|             FacetValue::Float(float) => { | ||||
|                 let string = float.to_string(); | ||||
|                 serializer.serialize_str(&string) | ||||
|             }, | ||||
|             FacetValue::Integer(integer) => { | ||||
|                 let string = integer.to_string(); | ||||
|                 serializer.serialize_str(&string) | ||||
|             }, | ||||
|         } | ||||
|     } | ||||
| } | ||||
| @@ -1,4 +1,6 @@ | ||||
| mod facet_type; | ||||
| mod facet_value; | ||||
| pub mod value_encoding; | ||||
|  | ||||
| pub use self::facet_type::FacetType; | ||||
| pub use self::facet_value::FacetValue; | ||||
|   | ||||
| @@ -9,7 +9,7 @@ use roaring::RoaringBitmap; | ||||
|  | ||||
| use crate::facet::FacetType; | ||||
| use crate::fields_ids_map::FieldsIdsMap; | ||||
| use crate::{default_criteria, Criterion, Search}; | ||||
| use crate::{default_criteria, Criterion, Search, FacetDistribution}; | ||||
| use crate::{BEU32, DocumentId, FieldId, ExternalDocumentsIds}; | ||||
| use crate::{ | ||||
|     RoaringBitmapCodec, BEU32StrCodec, StrStrU8Codec, ObkvCodec, | ||||
| @@ -351,6 +351,10 @@ impl Index { | ||||
|         Ok(self.documents_ids(rtxn).map(|docids| docids.len() as usize)?) | ||||
|     } | ||||
|  | ||||
|     pub fn facets_distribution<'a>(&'a self, rtxn: &'a RoTxn) -> FacetDistribution<'a> { | ||||
|         FacetDistribution::new(rtxn, self) | ||||
|     } | ||||
|  | ||||
|     pub fn search<'a>(&'a self, rtxn: &'a RoTxn) -> Search<'a> { | ||||
|         Search::new(rtxn, self) | ||||
|     } | ||||
|   | ||||
| @@ -28,7 +28,7 @@ pub use self::fields_ids_map::FieldsIdsMap; | ||||
| pub use self::heed_codec::{BEU32StrCodec, StrStrU8Codec, ObkvCodec}; | ||||
| pub use self::heed_codec::{RoaringBitmapCodec, BoRoaringBitmapCodec, CboRoaringBitmapCodec}; | ||||
| pub use self::index::Index; | ||||
| pub use self::search::{Search, FacetCondition, SearchResult}; | ||||
| pub use self::search::{Search, FacetDistribution, FacetCondition, SearchResult}; | ||||
| pub use self::update_store::UpdateStore; | ||||
|  | ||||
| pub type FastMap4<K, V> = HashMap<K, V, BuildHasherDefault<FxHasher32>>; | ||||
|   | ||||
| @@ -3,6 +3,8 @@ use std::fmt::Debug; | ||||
| use std::ops::Bound::{self, Included, Excluded}; | ||||
| use std::str::FromStr; | ||||
|  | ||||
| use anyhow::Context; | ||||
| use either::Either; | ||||
| use heed::types::{ByteSlice, DecodeIgnore}; | ||||
| use log::debug; | ||||
| use num_traits::Bounded; | ||||
| @@ -141,6 +143,85 @@ where T: FromStr, | ||||
| } | ||||
|  | ||||
| impl FacetCondition { | ||||
|     pub fn from_array<I, J, A, B>( | ||||
|         rtxn: &heed::RoTxn, | ||||
|         index: &Index, | ||||
|         array: I, | ||||
|     ) -> anyhow::Result<Option<FacetCondition>> | ||||
|     where I: IntoIterator<Item=Either<J, B>>, | ||||
|           J: IntoIterator<Item=A>, | ||||
|           A: AsRef<str>, | ||||
|           B: AsRef<str>, | ||||
|     { | ||||
|         fn facet_condition( | ||||
|             fields_ids_map: &FieldsIdsMap, | ||||
|             faceted_fields: &HashMap<String, FacetType>, | ||||
|             key: &str, | ||||
|             value: &str, | ||||
|         ) -> anyhow::Result<FacetCondition> | ||||
|         { | ||||
|             let fid = fields_ids_map.id(key).with_context(|| { | ||||
|                 format!("{:?} isn't present in the fields ids map", key) | ||||
|             })?; | ||||
|             let ftype = faceted_fields.get(key).copied().with_context(|| { | ||||
|                 format!("{:?} isn't a faceted field", key) | ||||
|             })?; | ||||
|             let (neg, value) = match value.trim().strip_prefix('-') { | ||||
|                 Some(value) => (true, value.trim()), | ||||
|                 None => (false, value.trim()), | ||||
|             }; | ||||
|  | ||||
|             let operator = match ftype { | ||||
|                 FacetType::String => OperatorString(fid, FacetStringOperator::equal(value)), | ||||
|                 FacetType::Float => OperatorF64(fid, FacetNumberOperator::Equal(value.parse()?)), | ||||
|                 FacetType::Integer => OperatorI64(fid, FacetNumberOperator::Equal(value.parse()?)), | ||||
|             }; | ||||
|  | ||||
|             if neg { Ok(operator.negate()) } else { Ok(operator) } | ||||
|         } | ||||
|  | ||||
|         let fields_ids_map = index.fields_ids_map(rtxn)?; | ||||
|         let faceted_fields = index.faceted_fields(rtxn)?; | ||||
|         let mut ands = None; | ||||
|  | ||||
|         for either in array { | ||||
|             match either { | ||||
|                 Either::Left(array) => { | ||||
|                     let mut ors = None; | ||||
|                     for rule in array { | ||||
|                         let mut iter = rule.as_ref().splitn(2, ':'); | ||||
|                         let key = iter.next().context("missing facet condition key")?; | ||||
|                         let value = iter.next().context("missing facet condition value")?; | ||||
|                         let condition = facet_condition(&fields_ids_map, &faceted_fields, key, value)?; | ||||
|                         ors = match ors.take() { | ||||
|                             Some(ors) => Some(Or(Box::new(ors), Box::new(condition))), | ||||
|                             None => Some(condition), | ||||
|                         }; | ||||
|                     } | ||||
|  | ||||
|                     if let Some(rule) = ors { | ||||
|                         ands = match ands.take() { | ||||
|                             Some(ands) => Some(And(Box::new(ands), Box::new(rule))), | ||||
|                             None => Some(rule), | ||||
|                         }; | ||||
|                     } | ||||
|                 }, | ||||
|                 Either::Right(rule) => { | ||||
|                     let mut iter = rule.as_ref().splitn(2, ':'); | ||||
|                     let key = iter.next().context("missing facet condition key")?; | ||||
|                     let value = iter.next().context("missing facet condition value")?; | ||||
|                     let condition = facet_condition(&fields_ids_map, &faceted_fields, key, value)?; | ||||
|                     ands = match ands.take() { | ||||
|                         Some(ands) => Some(And(Box::new(ands), Box::new(condition))), | ||||
|                         None => Some(condition), | ||||
|                     }; | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         Ok(ands) | ||||
|     } | ||||
|  | ||||
|     pub fn from_str( | ||||
|         rtxn: &heed::RoTxn, | ||||
|         index: &Index, | ||||
| @@ -641,4 +722,35 @@ mod tests { | ||||
|         ); | ||||
|         assert_eq!(condition, expected); | ||||
|     } | ||||
|  | ||||
|     #[test] | ||||
|     fn from_array() { | ||||
|         let path = tempfile::tempdir().unwrap(); | ||||
|         let mut options = EnvOpenOptions::new(); | ||||
|         options.map_size(10 * 1024 * 1024); // 10 MB | ||||
|         let index = Index::new(options, &path).unwrap(); | ||||
|  | ||||
|         // Set the faceted fields to be the channel. | ||||
|         let mut wtxn = index.write_txn().unwrap(); | ||||
|         let mut builder = Settings::new(&mut wtxn, &index); | ||||
|         builder.set_searchable_fields(vec!["channel".into(), "timestamp".into()]); // to keep the fields order | ||||
|         builder.set_faceted_fields(hashmap!{ | ||||
|             "channel".into() => "string".into(), | ||||
|             "timestamp".into() => "integer".into(), | ||||
|         }); | ||||
|         builder.execute(|_| ()).unwrap(); | ||||
|         wtxn.commit().unwrap(); | ||||
|  | ||||
|         // Test that the facet condition is correctly generated. | ||||
|         let rtxn = index.read_txn().unwrap(); | ||||
|         let condition = FacetCondition::from_array( | ||||
|             &rtxn, &index, | ||||
|             vec![Either::Right("channel:gotaga"), Either::Left(vec!["timestamp:44", "channel:-ponce"])], | ||||
|         ).unwrap().unwrap(); | ||||
|         let expected = FacetCondition::from_str( | ||||
|             &rtxn, &index, | ||||
|             "channel = gotaga AND (timestamp = 44 OR channel != ponce)", | ||||
|         ).unwrap(); | ||||
|         assert_eq!(condition, expected); | ||||
|     } | ||||
| } | ||||
|   | ||||
							
								
								
									
										260
									
								
								src/search/facet/facet_distribution.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										260
									
								
								src/search/facet/facet_distribution.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,260 @@ | ||||
| use std::collections::{HashSet, BTreeMap}; | ||||
| use std::ops::Bound::Unbounded; | ||||
| use std::{cmp, fmt}; | ||||
|  | ||||
| use anyhow::Context; | ||||
| use heed::BytesDecode; | ||||
| use roaring::RoaringBitmap; | ||||
|  | ||||
| use crate::facet::{FacetType, FacetValue}; | ||||
| use crate::heed_codec::facet::{FacetValueStringCodec, FacetLevelValueF64Codec, FacetLevelValueI64Codec}; | ||||
| use crate::heed_codec::facet::{FieldDocIdFacetStringCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetI64Codec}; | ||||
| use crate::search::facet::{FacetIter, FacetRange}; | ||||
| use crate::{Index, FieldId, DocumentId}; | ||||
|  | ||||
| /// The default number of values by facets that will | ||||
| /// be fetched from the key-value store. | ||||
| const DEFAULT_VALUES_BY_FACET: usize = 100; | ||||
|  | ||||
| /// The hard limit in the number of values by facets that will be fetched from | ||||
| /// the key-value store. Searching for more values could slow down the engine. | ||||
| const MAX_VALUES_BY_FACET: usize = 1000; | ||||
|  | ||||
| /// Threshold on the number of candidates that will make | ||||
| /// the system to choose between one algorithm or another. | ||||
| const CANDIDATES_THRESHOLD: u64 = 1000; | ||||
|  | ||||
| pub struct FacetDistribution<'a> { | ||||
|     facets: Option<HashSet<String>>, | ||||
|     candidates: Option<RoaringBitmap>, | ||||
|     max_values_by_facet: usize, | ||||
|     rtxn: &'a heed::RoTxn<'a>, | ||||
|     index: &'a Index, | ||||
| } | ||||
|  | ||||
| impl<'a> FacetDistribution<'a> { | ||||
|     pub fn new(rtxn: &'a heed::RoTxn, index: &'a Index) -> FacetDistribution<'a> { | ||||
|         FacetDistribution { | ||||
|             facets: None, | ||||
|             candidates: None, | ||||
|             max_values_by_facet: DEFAULT_VALUES_BY_FACET, | ||||
|             rtxn, | ||||
|             index, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     pub fn facets<I: IntoIterator<Item=A>, A: AsRef<str>>(&mut self, names: I) -> &mut Self { | ||||
|         self.facets = Some(names.into_iter().map(|s| s.as_ref().to_string()).collect()); | ||||
|         self | ||||
|     } | ||||
|  | ||||
|     pub fn candidates(&mut self, candidates: RoaringBitmap) -> &mut Self { | ||||
|         self.candidates = Some(candidates); | ||||
|         self | ||||
|     } | ||||
|  | ||||
|     pub fn max_values_by_facet(&mut self, max: usize) -> &mut Self { | ||||
|         self.max_values_by_facet = cmp::min(max, MAX_VALUES_BY_FACET); | ||||
|         self | ||||
|     } | ||||
|  | ||||
|     /// There is a small amount of candidates OR we ask for facet string values so we | ||||
|     /// decide to iterate over the facet values of each one of them, one by one. | ||||
|     fn facet_values_from_documents( | ||||
|         &self, | ||||
|         field_id: FieldId, | ||||
|         facet_type: FacetType, | ||||
|         candidates: &RoaringBitmap, | ||||
|     ) -> heed::Result<BTreeMap<FacetValue, u64>> | ||||
|     { | ||||
|         fn fetch_facet_values<'t, KC, K: 't>( | ||||
|             index: &Index, | ||||
|             rtxn: &'t heed::RoTxn, | ||||
|             field_id: FieldId, | ||||
|             candidates: &RoaringBitmap, | ||||
|         ) -> heed::Result<BTreeMap<FacetValue, u64>> | ||||
|         where | ||||
|             KC: BytesDecode<'t, DItem = (FieldId, DocumentId, K)>, | ||||
|             K: Into<FacetValue>, | ||||
|         { | ||||
|             let mut facet_values = BTreeMap::new(); | ||||
|             let mut key_buffer = vec![field_id]; | ||||
|  | ||||
|             for docid in candidates.into_iter().take(CANDIDATES_THRESHOLD as usize) { | ||||
|                 key_buffer.truncate(1); | ||||
|                 key_buffer.extend_from_slice(&docid.to_be_bytes()); | ||||
|                 let iter = index.field_id_docid_facet_values | ||||
|                     .prefix_iter(rtxn, &key_buffer)? | ||||
|                     .remap_key_type::<KC>(); | ||||
|  | ||||
|                 for result in iter { | ||||
|                     let ((_, _, value), ()) = result?; | ||||
|                     *facet_values.entry(value.into()).or_insert(0) += 1; | ||||
|                 } | ||||
|             } | ||||
|  | ||||
|             Ok(facet_values) | ||||
|         } | ||||
|  | ||||
|         let index = self.index; | ||||
|         let rtxn = self.rtxn; | ||||
|         match facet_type { | ||||
|             FacetType::String => { | ||||
|                 fetch_facet_values::<FieldDocIdFacetStringCodec, _>(index, rtxn, field_id, candidates) | ||||
|             }, | ||||
|             FacetType::Float => { | ||||
|                 fetch_facet_values::<FieldDocIdFacetF64Codec, _>(index, rtxn, field_id, candidates) | ||||
|             }, | ||||
|             FacetType::Integer => { | ||||
|                 fetch_facet_values::<FieldDocIdFacetI64Codec, _>(index, rtxn, field_id, candidates) | ||||
|             }, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     /// There is too much documents, we use the facet levels to move throught | ||||
|     /// the facet values, to find the candidates and values associated. | ||||
|     fn facet_values_from_facet_levels( | ||||
|         &self, | ||||
|         field_id: FieldId, | ||||
|         facet_type: FacetType, | ||||
|         candidates: &RoaringBitmap, | ||||
|     ) -> heed::Result<BTreeMap<FacetValue, u64>> | ||||
|     { | ||||
|         let iter = match facet_type { | ||||
|             FacetType::String => unreachable!(), | ||||
|             FacetType::Float => { | ||||
|                 let iter = FacetIter::<f64, FacetLevelValueF64Codec>::new_non_reducing( | ||||
|                     self.rtxn, self.index, field_id, candidates.clone(), | ||||
|                 )?; | ||||
|                 let iter = iter.map(|r| r.map(|(v, docids)| (FacetValue::from(v), docids))); | ||||
|                 Box::new(iter) as Box::<dyn Iterator<Item=_>> | ||||
|             }, | ||||
|             FacetType::Integer => { | ||||
|                 let iter = FacetIter::<i64, FacetLevelValueI64Codec>::new_non_reducing( | ||||
|                     self.rtxn, self.index, field_id, candidates.clone(), | ||||
|                 )?; | ||||
|                 Box::new(iter.map(|r| r.map(|(v, docids)| (FacetValue::from(v), docids)))) | ||||
|             }, | ||||
|         }; | ||||
|  | ||||
|         let mut facet_values = BTreeMap::new(); | ||||
|         for result in iter { | ||||
|             let (value, mut docids) = result?; | ||||
|             docids.intersect_with(candidates); | ||||
|             if !docids.is_empty() { | ||||
|                 facet_values.insert(value, docids.len()); | ||||
|             } | ||||
|             if facet_values.len() == self.max_values_by_facet { | ||||
|                 break; | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         Ok(facet_values) | ||||
|     } | ||||
|  | ||||
|     /// Placeholder search, a.k.a. no candidates were specified. We iterate throught the | ||||
|     /// facet values one by one and iterate on the facet level 0 for numbers. | ||||
|     fn facet_values_from_raw_facet_database( | ||||
|         &self, | ||||
|         field_id: FieldId, | ||||
|         facet_type: FacetType, | ||||
|     ) -> heed::Result<BTreeMap<FacetValue, u64>> | ||||
|     { | ||||
|         let db = self.index.facet_field_id_value_docids; | ||||
|         let level = 0; | ||||
|         let iter = match facet_type { | ||||
|             FacetType::String => { | ||||
|                 let iter = db | ||||
|                     .prefix_iter(self.rtxn, &[field_id])? | ||||
|                     .remap_key_type::<FacetValueStringCodec>() | ||||
|                     .map(|r| r.map(|((_, v), docids)| (FacetValue::from(v), docids))); | ||||
|                 Box::new(iter) as Box::<dyn Iterator<Item=_>> | ||||
|             }, | ||||
|             FacetType::Float => { | ||||
|                 let db = db.remap_key_type::<FacetLevelValueF64Codec>(); | ||||
|                 let range = FacetRange::<f64, _>::new( | ||||
|                     self.rtxn, db, field_id, level, Unbounded, Unbounded, | ||||
|                 )?; | ||||
|                 Box::new(range.map(|r| r.map(|((_, _, v, _), docids)| (FacetValue::from(v), docids)))) | ||||
|             }, | ||||
|             FacetType::Integer => { | ||||
|                 let db = db.remap_key_type::<FacetLevelValueI64Codec>(); | ||||
|                 let range = FacetRange::<i64, _>::new( | ||||
|                     self.rtxn, db, field_id, level, Unbounded, Unbounded, | ||||
|                 )?; | ||||
|                 Box::new(range.map(|r| r.map(|((_, _, v, _), docids)| (FacetValue::from(v), docids)))) | ||||
|             }, | ||||
|         }; | ||||
|  | ||||
|         let mut facet_values = BTreeMap::new(); | ||||
|         for result in iter { | ||||
|             let (value, docids) = result?; | ||||
|             facet_values.insert(value, docids.len()); | ||||
|             if facet_values.len() == self.max_values_by_facet { | ||||
|                 break; | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         Ok(facet_values) | ||||
|     } | ||||
|  | ||||
|     fn facet_values( | ||||
|         &self, | ||||
|         field_id: FieldId, | ||||
|         facet_type: FacetType, | ||||
|     ) -> heed::Result<BTreeMap<FacetValue, u64>> | ||||
|     { | ||||
|         if let Some(candidates) = self.candidates.as_ref() { | ||||
|             // Classic search, candidates were specified, we must return facet values only related | ||||
|             // to those candidates. We also enter here for facet strings for performance reasons. | ||||
|             if candidates.len() <= CANDIDATES_THRESHOLD || facet_type == FacetType::String { | ||||
|                 self.facet_values_from_documents(field_id, facet_type, candidates) | ||||
|             } else { | ||||
|                 self.facet_values_from_facet_levels(field_id, facet_type, candidates) | ||||
|             } | ||||
|         } else { | ||||
|             self.facet_values_from_raw_facet_database(field_id, facet_type) | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     pub fn execute(&self) -> anyhow::Result<BTreeMap<String, BTreeMap<FacetValue, u64>>> { | ||||
|         let fields_ids_map = self.index.fields_ids_map(self.rtxn)?; | ||||
|         let faceted_fields = self.index.faceted_fields(self.rtxn)?; | ||||
|         let fields_ids: Vec<_> = match &self.facets { | ||||
|             Some(names) => names | ||||
|                 .iter() | ||||
|                 .filter_map(|n| faceted_fields.get(n).map(|t| (n.to_string(), *t))) | ||||
|                 .collect(), | ||||
|             None => faceted_fields.into_iter().collect(), | ||||
|         }; | ||||
|  | ||||
|         let mut facets_values = BTreeMap::new(); | ||||
|         for (name, ftype) in fields_ids { | ||||
|             let fid = fields_ids_map.id(&name).with_context(|| { | ||||
|                 format!("missing field name {:?} from the fields id map", name) | ||||
|             })?; | ||||
|             let values = self.facet_values(fid, ftype)?; | ||||
|             facets_values.insert(name, values); | ||||
|         } | ||||
|  | ||||
|         Ok(facets_values) | ||||
|     } | ||||
| } | ||||
|  | ||||
| impl fmt::Debug for FacetDistribution<'_> { | ||||
|     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | ||||
|         let FacetDistribution { | ||||
|             facets, | ||||
|             candidates, | ||||
|             max_values_by_facet, | ||||
|             rtxn: _, | ||||
|             index: _, | ||||
|         } = self; | ||||
|  | ||||
|         f.debug_struct("FacetDistribution") | ||||
|             .field("facets", facets) | ||||
|             .field("candidates", candidates) | ||||
|             .field("max_values_by_facet", max_values_by_facet) | ||||
|             .finish() | ||||
|     } | ||||
| } | ||||
| @@ -13,11 +13,13 @@ use crate::heed_codec::CboRoaringBitmapCodec; | ||||
| use crate::{Index, FieldId}; | ||||
|  | ||||
| pub use self::facet_condition::{FacetCondition, FacetNumberOperator, FacetStringOperator}; | ||||
| pub use self::facet_distribution::FacetDistribution; | ||||
|  | ||||
| mod facet_condition; | ||||
| mod facet_distribution; | ||||
| mod parser; | ||||
|  | ||||
| struct FacetRange<'t, T: 't, KC> { | ||||
| pub struct FacetRange<'t, T: 't, KC> { | ||||
|     iter: RoRange<'t, KC, LazyDecode<CboRoaringBitmapCodec>>, | ||||
|     end: Bound<T>, | ||||
| } | ||||
| @@ -27,7 +29,7 @@ where | ||||
|     KC: for<'a> BytesEncode<'a, EItem = (FieldId, u8, T, T)>, | ||||
|     T: PartialOrd + Copy + Bounded, | ||||
| { | ||||
|     fn new( | ||||
|     pub fn new( | ||||
|         rtxn: &'t heed::RoTxn, | ||||
|         db: Database<KC, CboRoaringBitmapCodec>, | ||||
|         field_id: FieldId, | ||||
| @@ -78,7 +80,7 @@ where | ||||
|     } | ||||
| } | ||||
|  | ||||
| struct FacetRevRange<'t, T: 't, KC> { | ||||
| pub struct FacetRevRange<'t, T: 't, KC> { | ||||
|     iter: RoRevRange<'t, KC, LazyDecode<CboRoaringBitmapCodec>>, | ||||
|     end: Bound<T>, | ||||
| } | ||||
| @@ -88,7 +90,7 @@ where | ||||
|     KC: for<'a> BytesEncode<'a, EItem = (FieldId, u8, T, T)>, | ||||
|     T: PartialOrd + Copy + Bounded, | ||||
| { | ||||
|     fn new( | ||||
|     pub fn new( | ||||
|         rtxn: &'t heed::RoTxn, | ||||
|         db: Database<KC, CboRoaringBitmapCodec>, | ||||
|         field_id: FieldId, | ||||
| @@ -145,6 +147,7 @@ pub struct FacetIter<'t, T: 't, KC> { | ||||
|     db: Database<KC, CboRoaringBitmapCodec>, | ||||
|     field_id: FieldId, | ||||
|     level_iters: Vec<(RoaringBitmap, Either<FacetRange<'t, T, KC>, FacetRevRange<'t, T, KC>>)>, | ||||
|     must_reduce: bool, | ||||
| } | ||||
|  | ||||
| impl<'t, T, KC> FacetIter<'t, T, KC> | ||||
| @@ -153,7 +156,10 @@ where | ||||
|     KC: for<'a> BytesEncode<'a, EItem = (FieldId, u8, T, T)>, | ||||
|     T: PartialOrd + Copy + Bounded, | ||||
| { | ||||
|     pub fn new( | ||||
|     /// Create a `FacetIter` that will iterate on the different facet entries | ||||
|     /// (facet value + documents ids) and that will reduce the given documents ids | ||||
|     /// while iterating on the different facet levels. | ||||
|     pub fn new_reducing( | ||||
|         rtxn: &'t heed::RoTxn, | ||||
|         index: &'t Index, | ||||
|         field_id: FieldId, | ||||
| @@ -163,10 +169,14 @@ where | ||||
|         let db = index.facet_field_id_value_docids.remap_key_type::<KC>(); | ||||
|         let highest_level = Self::highest_level(rtxn, db, field_id)?.unwrap_or(0); | ||||
|         let highest_iter = FacetRange::new(rtxn, db, field_id, highest_level, Unbounded, Unbounded)?; | ||||
|         Ok(FacetIter { rtxn, db, field_id, level_iters: vec![(documents_ids, Left(highest_iter))] }) | ||||
|         let level_iters = vec![(documents_ids, Left(highest_iter))]; | ||||
|         Ok(FacetIter { rtxn, db, field_id, level_iters, must_reduce: true }) | ||||
|     } | ||||
|  | ||||
|     pub fn new_reverse( | ||||
|     /// Create a `FacetIter` that will iterate on the different facet entries in reverse | ||||
|     /// (facet value + documents ids) and that will reduce the given documents ids | ||||
|     /// while iterating on the different facet levels. | ||||
|     pub fn new_reverse_reducing( | ||||
|         rtxn: &'t heed::RoTxn, | ||||
|         index: &'t Index, | ||||
|         field_id: FieldId, | ||||
| @@ -176,7 +186,26 @@ where | ||||
|         let db = index.facet_field_id_value_docids.remap_key_type::<KC>(); | ||||
|         let highest_level = Self::highest_level(rtxn, db, field_id)?.unwrap_or(0); | ||||
|         let highest_iter = FacetRevRange::new(rtxn, db, field_id, highest_level, Unbounded, Unbounded)?; | ||||
|         Ok(FacetIter { rtxn, db, field_id, level_iters: vec![(documents_ids, Right(highest_iter))] }) | ||||
|         let level_iters = vec![(documents_ids, Right(highest_iter))]; | ||||
|         Ok(FacetIter { rtxn, db, field_id, level_iters, must_reduce: true }) | ||||
|     } | ||||
|  | ||||
|     /// Create a `FacetIter` that will iterate on the different facet entries | ||||
|     /// (facet value + documents ids) and that will not reduce the given documents ids | ||||
|     /// while iterating on the different facet levels, possibly returning multiple times | ||||
|     /// a document id associated with multiple facet values. | ||||
|     pub fn new_non_reducing( | ||||
|         rtxn: &'t heed::RoTxn, | ||||
|         index: &'t Index, | ||||
|         field_id: FieldId, | ||||
|         documents_ids: RoaringBitmap, | ||||
|     ) -> heed::Result<FacetIter<'t, T, KC>> | ||||
|     { | ||||
|         let db = index.facet_field_id_value_docids.remap_key_type::<KC>(); | ||||
|         let highest_level = Self::highest_level(rtxn, db, field_id)?.unwrap_or(0); | ||||
|         let highest_iter = FacetRange::new(rtxn, db, field_id, highest_level, Unbounded, Unbounded)?; | ||||
|         let level_iters = vec![(documents_ids, Left(highest_iter))]; | ||||
|         Ok(FacetIter { rtxn, db, field_id, level_iters, must_reduce: false }) | ||||
|     } | ||||
|  | ||||
|     fn highest_level<X>(rtxn: &'t heed::RoTxn, db: Database<KC, X>, fid: FieldId) -> heed::Result<Option<u8>> { | ||||
| @@ -214,7 +243,9 @@ where | ||||
|  | ||||
|                         docids.intersect_with(&documents_ids); | ||||
|                         if !docids.is_empty() { | ||||
|                             if self.must_reduce { | ||||
|                                 documents_ids.difference_with(&docids); | ||||
|                             } | ||||
|  | ||||
|                             if level == 0 { | ||||
|                                 debug!("found {:?} at {:?}",  docids, left); | ||||
|   | ||||
| @@ -20,7 +20,7 @@ use crate::mdfs::Mdfs; | ||||
| use crate::query_tokens::{query_tokens, QueryToken}; | ||||
| use crate::{Index, FieldId, DocumentId, Criterion}; | ||||
|  | ||||
| pub use self::facet::{FacetCondition, FacetNumberOperator, FacetStringOperator}; | ||||
| pub use self::facet::{FacetCondition, FacetDistribution, FacetNumberOperator, FacetStringOperator}; | ||||
| pub use self::facet::{FacetIter}; | ||||
|  | ||||
| // Building these factories is not free. | ||||
| @@ -189,9 +189,9 @@ impl<'a> Search<'a> { | ||||
|                     } | ||||
|                 } else { | ||||
|                     let facet_fn = if ascending { | ||||
|                         FacetIter::<f64, FacetLevelValueF64Codec>::new | ||||
|                         FacetIter::<f64, FacetLevelValueF64Codec>::new_reducing | ||||
|                     } else { | ||||
|                         FacetIter::<f64, FacetLevelValueF64Codec>::new_reverse | ||||
|                         FacetIter::<f64, FacetLevelValueF64Codec>::new_reverse_reducing | ||||
|                     }; | ||||
|                     let mut limit_tmp = limit; | ||||
|                     let mut output = Vec::new(); | ||||
| @@ -226,9 +226,9 @@ impl<'a> Search<'a> { | ||||
|                     } | ||||
|                 } else { | ||||
|                     let facet_fn = if ascending { | ||||
|                         FacetIter::<i64, FacetLevelValueI64Codec>::new | ||||
|                         FacetIter::<i64, FacetLevelValueI64Codec>::new_reducing | ||||
|                     } else { | ||||
|                         FacetIter::<i64, FacetLevelValueI64Codec>::new_reverse | ||||
|                         FacetIter::<i64, FacetLevelValueI64Codec>::new_reverse_reducing | ||||
|                     }; | ||||
|                     let mut limit_tmp = limit; | ||||
|                     let mut output = Vec::new(); | ||||
| @@ -313,22 +313,26 @@ impl<'a> Search<'a> { | ||||
|                 // there is some facet conditions we return a placeholder. | ||||
|                 let documents_ids = match order_by_facet { | ||||
|                     Some((fid, ftype, is_ascending)) => { | ||||
|                         self.facet_ordered(fid, ftype, is_ascending, facet_candidates, limit)? | ||||
|                         self.facet_ordered(fid, ftype, is_ascending, facet_candidates.clone(), limit)? | ||||
|                     }, | ||||
|                     None => facet_candidates.iter().take(limit).collect(), | ||||
|                 }; | ||||
|                 return Ok(SearchResult { documents_ids, ..Default::default() }) | ||||
|                 return Ok(SearchResult { | ||||
|                     documents_ids, | ||||
|                     candidates: facet_candidates, | ||||
|                     ..Default::default() | ||||
|                 }) | ||||
|             }, | ||||
|             (None, None) => { | ||||
|                 // If the query is not set or results in no DFAs we return a placeholder. | ||||
|                 let documents_ids = self.index.documents_ids(self.rtxn)?; | ||||
|                 let all_docids = self.index.documents_ids(self.rtxn)?; | ||||
|                 let documents_ids = match order_by_facet { | ||||
|                     Some((fid, ftype, is_ascending)) => { | ||||
|                         self.facet_ordered(fid, ftype, is_ascending, documents_ids, limit)? | ||||
|                         self.facet_ordered(fid, ftype, is_ascending, all_docids.clone(), limit)? | ||||
|                     }, | ||||
|                     None => documents_ids.iter().take(limit).collect(), | ||||
|                     None => all_docids.iter().take(limit).collect(), | ||||
|                 }; | ||||
|                 return Ok(SearchResult { documents_ids, ..Default::default() }) | ||||
|                 return Ok(SearchResult { documents_ids, candidates: all_docids,..Default::default() }) | ||||
|             }, | ||||
|         }; | ||||
|  | ||||
| @@ -336,7 +340,7 @@ impl<'a> Search<'a> { | ||||
|  | ||||
|         // The mana depth first search is a revised DFS that explore | ||||
|         // solutions in the order of their proximities. | ||||
|         let mut mdfs = Mdfs::new(self.index, self.rtxn, &derived_words, candidates); | ||||
|         let mut mdfs = Mdfs::new(self.index, self.rtxn, &derived_words, candidates.clone()); | ||||
|         let mut documents = Vec::new(); | ||||
|  | ||||
|         // We execute the Mdfs iterator until we find enough documents. | ||||
| @@ -364,7 +368,7 @@ impl<'a> Search<'a> { | ||||
|             None => documents.into_iter().flatten().take(limit).collect(), | ||||
|         }; | ||||
|  | ||||
|         Ok(SearchResult { found_words, documents_ids }) | ||||
|         Ok(SearchResult { found_words, candidates, documents_ids }) | ||||
|     } | ||||
| } | ||||
|  | ||||
| @@ -383,6 +387,7 @@ impl fmt::Debug for Search<'_> { | ||||
| #[derive(Default)] | ||||
| pub struct SearchResult { | ||||
|     pub found_words: HashSet<String>, | ||||
|     pub candidates: RoaringBitmap, | ||||
|     // TODO those documents ids should be associated with their criteria scores. | ||||
|     pub documents_ids: Vec<DocumentId>, | ||||
| } | ||||
|   | ||||
| @@ -29,6 +29,10 @@ pub struct Opt { | ||||
|  | ||||
|     /// The query string to search for (doesn't support prefix search yet). | ||||
|     query: Option<String>, | ||||
|  | ||||
|     /// Compute and print the facet distribution of all the faceted fields. | ||||
|     #[structopt(long)] | ||||
|     print_facet_distribution: bool, | ||||
| } | ||||
|  | ||||
| pub fn run(opt: Opt) -> anyhow::Result<()> { | ||||
| @@ -71,6 +75,12 @@ pub fn run(opt: Opt) -> anyhow::Result<()> { | ||||
|             let _ = writeln!(&mut stdout); | ||||
|         } | ||||
|  | ||||
|         if opt.print_facet_distribution { | ||||
|             let facets = index.facets_distribution(&rtxn).candidates(result.candidates).execute()?; | ||||
|             serde_json::to_writer(&mut stdout, &facets)?; | ||||
|             let _ = writeln!(&mut stdout); | ||||
|         } | ||||
|  | ||||
|         debug!("Took {:.02?} to find {} documents", before.elapsed(), result.documents_ids.len()); | ||||
|     } | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user