204: Decorrelate Distinct, Asc/Desc, Filterable fields from the faceted fields r=Kerollmops a=Kerollmops

This PR decorrelates the fields that need to be stored in facet databases (big inverted indexes for fast access) from the filterable fields, the previously named faceted fields are now named filterable fields and are the union of the distinct attribute, all the Asc/Desc criteria and, the filterable fields.

I added two tests to make sure that the engine was correctly generating the faceted databases when a distinct attribute or an Asc/Desc criteria were added, and one to make sure that it was impossible to filter on a non-filterable field even if it was a faceted one.

Note that the `AttributesForFacetting` has also been renamed into `FilterableAttributes`. But it will be the Transplant's job to do that on the API, this change is only visible to the milli's library users.

- Related to https://github.com/meilisearch/transplant/issues/187.
- Fixes #161 by returning the documents that don't have the Asc/Desc field at the end of the bucket.
- Fixes #168.
- Fixes #152.

Co-authored-by: Kerollmops <clement@meilisearch.com>
Co-authored-by: Marin Postma <postma.marin@protonmail.com>
Co-authored-by: many <maxime@meilisearch.com>
This commit is contained in:
bors[bot]
2021-06-02 15:43:39 +00:00
committed by GitHub
15 changed files with 390 additions and 368 deletions

View File

@@ -29,7 +29,7 @@ use tokio::sync::broadcast;
use warp::{Filter, http::Response}; use warp::{Filter, http::Response};
use warp::filters::ws::Message; use warp::filters::ws::Message;
use milli::{FacetCondition, Index, MatchingWords, obkv_to_json, SearchResult, UpdateStore}; use milli::{FilterCondition, Index, MatchingWords, obkv_to_json, SearchResult, UpdateStore};
use milli::update::{IndexDocumentsMethod, Setting, UpdateBuilder, UpdateFormat}; use milli::update::{IndexDocumentsMethod, Setting, UpdateBuilder, UpdateFormat};
use milli::update::UpdateIndexingStep::*; use milli::update::UpdateIndexingStep::*;
@@ -251,7 +251,7 @@ struct Settings {
searchable_attributes: Setting<Vec<String>>, searchable_attributes: Setting<Vec<String>>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")] #[serde(default, skip_serializing_if = "Setting::is_not_set")]
faceted_attributes: Setting<HashSet<String>>, filterable_attributes: Setting<HashSet<String>>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")] #[serde(default, skip_serializing_if = "Setting::is_not_set")]
criteria: Setting<Vec<String>>, criteria: Setting<Vec<String>>,
@@ -420,9 +420,9 @@ async fn main() -> anyhow::Result<()> {
} }
// We transpose the settings JSON struct into a real setting update. // We transpose the settings JSON struct into a real setting update.
match settings.faceted_attributes { match settings.filterable_attributes {
Setting::Set(faceted_attributes) => builder.set_faceted_fields(faceted_attributes), Setting::Set(filterable_attributes) => builder.set_filterable_fields(filterable_attributes),
Setting::Reset => builder.reset_faceted_fields(), Setting::Reset => builder.reset_filterable_fields(),
Setting::NotSet => () Setting::NotSet => ()
} }
@@ -690,7 +690,7 @@ async fn main() -> anyhow::Result<()> {
let filters = match query.filters { let filters = match query.filters {
Some(condition) if !condition.trim().is_empty() => { Some(condition) if !condition.trim().is_empty() => {
Some(FacetCondition::from_str(&rtxn, &index, &condition).unwrap()) Some(FilterCondition::from_str(&rtxn, &index, &condition).unwrap())
} }
_otherwise => None, _otherwise => None,
}; };
@@ -698,21 +698,21 @@ async fn main() -> anyhow::Result<()> {
let facet_filters = match query.facet_filters { let facet_filters = match query.facet_filters {
Some(array) => { Some(array) => {
let eithers = array.into_iter().map(Into::into); let eithers = array.into_iter().map(Into::into);
FacetCondition::from_array(&rtxn, &index, eithers).unwrap() FilterCondition::from_array(&rtxn, &index, eithers).unwrap()
} }
_otherwise => None, _otherwise => None,
}; };
let condition = match (filters, facet_filters) { let condition = match (filters, facet_filters) {
(Some(filters), Some(facet_filters)) => { (Some(filters), Some(facet_filters)) => {
Some(FacetCondition::And(Box::new(filters), Box::new(facet_filters))) Some(FilterCondition::And(Box::new(filters), Box::new(facet_filters)))
} }
(Some(condition), None) | (None, Some(condition)) => Some(condition), (Some(condition), None) | (None, Some(condition)) => Some(condition),
_otherwise => None, _otherwise => None,
}; };
if let Some(condition) = condition { if let Some(condition) = condition {
search.facet_condition(condition); search.filter(condition);
} }
let SearchResult { matching_words, candidates, documents_ids } = search.execute().unwrap(); let SearchResult { matching_words, candidates, documents_ids } = search.execute().unwrap();
@@ -996,7 +996,7 @@ mod tests {
let settings = Settings { let settings = Settings {
displayed_attributes: Setting::Set(vec!["name".to_string()]), displayed_attributes: Setting::Set(vec!["name".to_string()]),
searchable_attributes: Setting::Set(vec!["age".to_string()]), searchable_attributes: Setting::Set(vec!["age".to_string()]),
faceted_attributes: Setting::Set(hashset!{ "age".to_string() }), filterable_attributes: Setting::Set(hashset!{ "age".to_string() }),
criteria: Setting::Set(vec!["asc(age)".to_string()]), criteria: Setting::Set(vec!["asc(age)".to_string()]),
stop_words: Setting::Set(btreeset! { "and".to_string() }), stop_words: Setting::Set(btreeset! { "and".to_string() }),
synonyms: Setting::Set(hashmap!{ "alex".to_string() => vec!["alexey".to_string()] }) synonyms: Setting::Set(hashmap!{ "alex".to_string() => vec!["alexey".to_string()] })
@@ -1047,7 +1047,7 @@ mod tests {
let settings = Settings { let settings = Settings {
displayed_attributes: Setting::Reset, displayed_attributes: Setting::Reset,
searchable_attributes: Setting::Reset, searchable_attributes: Setting::Reset,
faceted_attributes: Setting::Reset, filterable_attributes: Setting::Reset,
criteria: Setting::Reset, criteria: Setting::Reset,
stop_words: Setting::Reset, stop_words: Setting::Reset,
synonyms: Setting::Reset, synonyms: Setting::Reset,
@@ -1076,7 +1076,7 @@ mod tests {
let settings = Settings { let settings = Settings {
displayed_attributes: Setting::NotSet, displayed_attributes: Setting::NotSet,
searchable_attributes: Setting::NotSet, searchable_attributes: Setting::NotSet,
faceted_attributes: Setting::NotSet, filterable_attributes: Setting::NotSet,
criteria: Setting::NotSet, criteria: Setting::NotSet,
stop_words: Setting::NotSet, stop_words: Setting::NotSet,
synonyms: Setting::NotSet, synonyms: Setting::NotSet,

View File

@@ -1,5 +1,5 @@
use std::collections::HashSet;
use std::fmt; use std::fmt;
use std::str::FromStr;
use anyhow::{Context, bail}; use anyhow::{Context, bail};
use regex::Regex; use regex::Regex;
@@ -30,8 +30,10 @@ pub enum Criterion {
Desc(String), Desc(String),
} }
impl Criterion { impl FromStr for Criterion {
pub fn from_str(faceted_attributes: &HashSet<String>, txt: &str) -> anyhow::Result<Criterion> { type Err = anyhow::Error;
fn from_str(txt: &str) -> Result<Criterion, Self::Err> {
match txt { match txt {
"words" => Ok(Criterion::Words), "words" => Ok(Criterion::Words),
"typo" => Ok(Criterion::Typo), "typo" => Ok(Criterion::Typo),
@@ -42,9 +44,6 @@ impl Criterion {
let caps = ASC_DESC_REGEX.captures(text).with_context(|| format!("unknown criterion name: {}", text))?; let caps = ASC_DESC_REGEX.captures(text).with_context(|| format!("unknown criterion name: {}", text))?;
let order = caps.get(1).unwrap().as_str(); let order = caps.get(1).unwrap().as_str();
let field_name = caps.get(2).unwrap().as_str(); let field_name = caps.get(2).unwrap().as_str();
faceted_attributes.get(field_name).with_context(|| {
format!("Can't use {:?} as a criterion as it isn't a faceted field.", field_name)
})?;
match order { match order {
"asc" => Ok(Criterion::Asc(field_name.to_string())), "asc" => Ok(Criterion::Asc(field_name.to_string())),
"desc" => Ok(Criterion::Desc(field_name.to_string())), "desc" => Ok(Criterion::Desc(field_name.to_string())),

View File

@@ -23,9 +23,9 @@ use crate::fields_ids_map::FieldsIdsMap;
pub const CRITERIA_KEY: &str = "criteria"; pub const CRITERIA_KEY: &str = "criteria";
pub const DISPLAYED_FIELDS_KEY: &str = "displayed-fields"; pub const DISPLAYED_FIELDS_KEY: &str = "displayed-fields";
pub const DISTINCT_ATTRIBUTE_KEY: &str = "distinct-attribute-key"; pub const DISTINCT_FIELD_KEY: &str = "distinct-field-key";
pub const DOCUMENTS_IDS_KEY: &str = "documents-ids"; pub const DOCUMENTS_IDS_KEY: &str = "documents-ids";
pub const FACETED_FIELDS_KEY: &str = "faceted-fields"; pub const FILTERABLE_FIELDS_KEY: &str = "filterable-fields";
pub const FIELDS_DISTRIBUTION_KEY: &str = "fields-distribution"; pub const FIELDS_DISTRIBUTION_KEY: &str = "fields-distribution";
pub const FIELDS_IDS_MAP_KEY: &str = "fields-ids-map"; pub const FIELDS_IDS_MAP_KEY: &str = "fields-ids-map";
pub const HARD_EXTERNAL_DOCUMENTS_IDS_KEY: &str = "hard-external-documents-ids"; pub const HARD_EXTERNAL_DOCUMENTS_IDS_KEY: &str = "hard-external-documents-ids";
@@ -324,21 +324,62 @@ impl Index {
} }
} }
/* faceted fields */ /* filterable fields */
/// Writes the facet fields names in the database. /// Writes the filterable fields names in the database.
pub fn put_faceted_fields(&self, wtxn: &mut RwTxn, fields: &HashSet<String>) -> heed::Result<()> { pub fn put_filterable_fields(&self, wtxn: &mut RwTxn, fields: &HashSet<String>) -> heed::Result<()> {
self.main.put::<_, Str, SerdeJson<_>>(wtxn, FACETED_FIELDS_KEY, fields) self.main.put::<_, Str, SerdeJson<_>>(wtxn, FILTERABLE_FIELDS_KEY, fields)
} }
/// Deletes the facet fields ids in the database. /// Deletes the filterable fields ids in the database.
pub fn delete_faceted_fields(&self, wtxn: &mut RwTxn) -> heed::Result<bool> { pub fn delete_filterable_fields(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
self.main.delete::<_, Str>(wtxn, FACETED_FIELDS_KEY) self.main.delete::<_, Str>(wtxn, FILTERABLE_FIELDS_KEY)
} }
/// Returns the facet fields names. /// Returns the filterable fields names.
pub fn filterable_fields(&self, rtxn: &RoTxn) -> heed::Result<HashSet<String>> {
Ok(self.main.get::<_, Str, SerdeJson<_>>(rtxn, FILTERABLE_FIELDS_KEY)?.unwrap_or_default())
}
/// Same as `filterable_fields`, but returns ids instead.
pub fn filterable_fields_ids(&self, rtxn: &RoTxn) -> heed::Result<HashSet<FieldId>> {
let filterable_fields = self.filterable_fields(rtxn)?;
let fields_ids_map = self.fields_ids_map(rtxn)?;
let filterable_fields = filterable_fields
.iter()
.map(|k| {
fields_ids_map
.id(k)
.ok_or_else(|| format!("{:?} should be present in the field id map", k))
.expect("corrupted data: ")
})
.collect();
Ok(filterable_fields)
}
/* faceted documents ids */
/// Returns the faceted fields names.
///
/// Faceted fields are the union of all the filterable, distinct, and Asc/Desc fields.
pub fn faceted_fields(&self, rtxn: &RoTxn) -> heed::Result<HashSet<String>> { pub fn faceted_fields(&self, rtxn: &RoTxn) -> heed::Result<HashSet<String>> {
Ok(self.main.get::<_, Str, SerdeJson<_>>(rtxn, FACETED_FIELDS_KEY)?.unwrap_or_default()) let filterable_fields = self.filterable_fields(rtxn)?;
let distinct_field = self.distinct_field(rtxn)?;
let asc_desc_fields = self.criteria(rtxn)?
.into_iter()
.filter_map(|criterion| match criterion {
Criterion::Asc(field) | Criterion::Desc(field) => Some(field),
_otherwise => None,
});
let mut faceted_fields = filterable_fields;
faceted_fields.extend(asc_desc_fields);
if let Some(field) = distinct_field {
faceted_fields.insert(field.to_owned());
}
Ok(faceted_fields)
} }
/// Same as `faceted_fields`, but returns ids instead. /// Same as `faceted_fields`, but returns ids instead.
@@ -424,18 +465,18 @@ impl Index {
} }
} }
/* Distinct attribute */ /* distinct field */
pub(crate) fn put_distinct_attribute(&self, wtxn: &mut RwTxn, distinct_attribute: &str) -> heed::Result<()> { pub(crate) fn put_distinct_field(&self, wtxn: &mut RwTxn, distinct_field: &str) -> heed::Result<()> {
self.main.put::<_, Str, Str>(wtxn, DISTINCT_ATTRIBUTE_KEY, distinct_attribute) self.main.put::<_, Str, Str>(wtxn, DISTINCT_FIELD_KEY, distinct_field)
} }
pub fn distinct_attribute<'a>(&self, rtxn: &'a RoTxn) -> heed::Result<Option<&'a str>> { pub fn distinct_field<'a>(&self, rtxn: &'a RoTxn) -> heed::Result<Option<&'a str>> {
self.main.get::<_, Str, Str>(rtxn, DISTINCT_ATTRIBUTE_KEY) self.main.get::<_, Str, Str>(rtxn, DISTINCT_FIELD_KEY)
} }
pub(crate) fn delete_distinct_attribute(&self, wtxn: &mut RwTxn) -> heed::Result<bool> { pub(crate) fn delete_distinct_field(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
self.main.delete::<_, Str>(wtxn, DISTINCT_ATTRIBUTE_KEY) self.main.delete::<_, Str>(wtxn, DISTINCT_FIELD_KEY)
} }
/* criteria */ /* criteria */

View File

@@ -27,7 +27,7 @@ pub use self::heed_codec::{BEU32StrCodec, StrStrU8Codec, StrLevelPositionCodec,
pub use self::heed_codec::{RoaringBitmapCodec, BoRoaringBitmapCodec, CboRoaringBitmapCodec}; pub use self::heed_codec::{RoaringBitmapCodec, BoRoaringBitmapCodec, CboRoaringBitmapCodec};
pub use self::heed_codec::{RoaringBitmapLenCodec, BoRoaringBitmapLenCodec, CboRoaringBitmapLenCodec}; pub use self::heed_codec::{RoaringBitmapLenCodec, BoRoaringBitmapLenCodec, CboRoaringBitmapLenCodec};
pub use self::index::Index; pub use self::index::Index;
pub use self::search::{Search, FacetDistribution, FacetCondition, SearchResult, MatchingWords}; pub use self::search::{Search, FacetDistribution, FilterCondition, SearchResult, MatchingWords};
pub use self::tree_level::TreeLevel; pub use self::tree_level::TreeLevel;
pub use self::update_store::UpdateStore; pub use self::update_store::UpdateStore;

View File

@@ -24,6 +24,7 @@ pub struct AscDesc<'t> {
ascending: bool, ascending: bool,
query_tree: Option<Operation>, query_tree: Option<Operation>,
candidates: Box<dyn Iterator<Item = heed::Result<RoaringBitmap>> + 't>, candidates: Box<dyn Iterator<Item = heed::Result<RoaringBitmap>> + 't>,
allowed_candidates: RoaringBitmap,
bucket_candidates: RoaringBitmap, bucket_candidates: RoaringBitmap,
faceted_candidates: RoaringBitmap, faceted_candidates: RoaringBitmap,
parent: Box<dyn Criterion + 't>, parent: Box<dyn Criterion + 't>,
@@ -68,6 +69,7 @@ impl<'t> AscDesc<'t> {
ascending, ascending,
query_tree: None, query_tree: None,
candidates: Box::new(std::iter::empty()), candidates: Box::new(std::iter::empty()),
allowed_candidates: RoaringBitmap::new(),
faceted_candidates: index.number_faceted_documents_ids(rtxn, field_id)?, faceted_candidates: index.number_faceted_documents_ids(rtxn, field_id)?,
bucket_candidates: RoaringBitmap::new(), bucket_candidates: RoaringBitmap::new(),
parent, parent,
@@ -78,6 +80,9 @@ impl<'t> AscDesc<'t> {
impl<'t> Criterion for AscDesc<'t> { impl<'t> Criterion for AscDesc<'t> {
#[logging_timer::time("AscDesc::{}")] #[logging_timer::time("AscDesc::{}")]
fn next(&mut self, params: &mut CriterionParameters) -> anyhow::Result<Option<CriterionResult>> { fn next(&mut self, params: &mut CriterionParameters) -> anyhow::Result<Option<CriterionResult>> {
// remove excluded candidates when next is called, instead of doing it in the loop.
self.allowed_candidates -= params.excluded_candidates;
loop { loop {
debug!( debug!(
"Facet {}({}) iteration", "Facet {}({}) iteration",
@@ -86,18 +91,25 @@ impl<'t> Criterion for AscDesc<'t> {
); );
match self.candidates.next().transpose()? { match self.candidates.next().transpose()? {
None if !self.allowed_candidates.is_empty() => {
return Ok(Some(CriterionResult {
query_tree: self.query_tree.clone(),
candidates: Some(take(&mut self.allowed_candidates)),
filtered_candidates: None,
bucket_candidates: Some(take(&mut self.bucket_candidates)),
}));
},
None => { None => {
match self.parent.next(params)? { match self.parent.next(params)? {
Some(CriterionResult { query_tree, candidates, filtered_candidates, bucket_candidates }) => { Some(CriterionResult { query_tree, candidates, filtered_candidates, bucket_candidates }) => {
self.query_tree = query_tree; self.query_tree = query_tree;
let mut candidates = match (&self.query_tree, candidates) { let mut candidates = match (&self.query_tree, candidates) {
(_, Some(candidates)) => candidates & &self.faceted_candidates, (_, Some(candidates)) => candidates,
(Some(qt), None) => { (Some(qt), None) => {
let context = CriteriaBuilder::new(&self.rtxn, &self.index)?; let context = CriteriaBuilder::new(&self.rtxn, &self.index)?;
let candidates = resolve_query_tree(&context, qt, params.wdcache)?; resolve_query_tree(&context, qt, params.wdcache)?
candidates & &self.faceted_candidates
}, },
(None, None) => take(&mut self.faceted_candidates), (None, None) => self.index.documents_ids(self.rtxn)?,
}; };
if let Some(filtered_candidates) = filtered_candidates { if let Some(filtered_candidates) = filtered_candidates {
@@ -113,12 +125,13 @@ impl<'t> Criterion for AscDesc<'t> {
continue; continue;
} }
self.allowed_candidates = &candidates - params.excluded_candidates;
self.candidates = facet_ordered( self.candidates = facet_ordered(
self.index, self.index,
self.rtxn, self.rtxn,
self.field_id, self.field_id,
self.ascending, self.ascending,
candidates, candidates & &self.faceted_candidates,
)?; )?;
}, },
None => return Ok(None), None => return Ok(None),
@@ -126,6 +139,7 @@ impl<'t> Criterion for AscDesc<'t> {
}, },
Some(mut candidates) => { Some(mut candidates) => {
candidates -= params.excluded_candidates; candidates -= params.excluded_candidates;
self.allowed_candidates -= &candidates;
return Ok(Some(CriterionResult { return Ok(Some(CriterionResult {
query_tree: self.query_tree.clone(), query_tree: self.query_tree.clone(),
candidates: Some(candidates), candidates: Some(candidates),

View File

@@ -203,14 +203,14 @@ impl<'t> CriteriaBuilder<'t> {
&'t self, &'t self,
query_tree: Option<Operation>, query_tree: Option<Operation>,
primitive_query: Option<Vec<PrimitiveQueryPart>>, primitive_query: Option<Vec<PrimitiveQueryPart>>,
facet_candidates: Option<RoaringBitmap>, filtered_candidates: Option<RoaringBitmap>,
) -> anyhow::Result<Final<'t>> ) -> anyhow::Result<Final<'t>>
{ {
use crate::criterion::Criterion as Name; use crate::criterion::Criterion as Name;
let primitive_query = primitive_query.unwrap_or_default(); let primitive_query = primitive_query.unwrap_or_default();
let mut criterion = Box::new(Initial::new(query_tree, facet_candidates)) as Box<dyn Criterion>; let mut criterion = Box::new(Initial::new(query_tree, filtered_candidates)) as Box<dyn Criterion>;
for name in self.index.criteria(&self.rtxn)? { for name in self.index.criteria(&self.rtxn)? {
criterion = match name { criterion = match name {
Name::Typo => Box::new(Typo::new(self, criterion)), Name::Typo => Box::new(Typo::new(self, criterion)),

View File

@@ -172,7 +172,7 @@ impl DocIter for FacetDistinctIter<'_> {
} }
} }
impl<'a> Distinct<'_> for FacetDistinct<'a> { impl<'a> Distinct for FacetDistinct<'a> {
type Iter = FacetDistinctIter<'a>; type Iter = FacetDistinctIter<'a>;
fn distinct(&mut self, candidates: RoaringBitmap, excluded: RoaringBitmap) -> Self::Iter { fn distinct(&mut self, candidates: RoaringBitmap, excluded: RoaringBitmap) -> Self::Iter {
@@ -189,8 +189,6 @@ impl<'a> Distinct<'_> for FacetDistinct<'a> {
#[cfg(test)] #[cfg(test)]
mod test { mod test {
use std::collections::HashSet;
use super::super::test::{generate_index, validate_distinct_candidates}; use super::super::test::{generate_index, validate_distinct_candidates};
use super::*; use super::*;
@@ -198,10 +196,7 @@ mod test {
($name:ident, $distinct:literal) => { ($name:ident, $distinct:literal) => {
#[test] #[test]
fn $name() { fn $name() {
use std::iter::FromIterator; let (index, fid, candidates) = generate_index($distinct);
let facets = HashSet::from_iter(Some(($distinct.to_string())));
let (index, fid, candidates) = generate_index($distinct, facets);
let txn = index.read_txn().unwrap(); let txn = index.read_txn().unwrap();
let mut map_distinct = FacetDistinct::new(fid, &index, &txn); let mut map_distinct = FacetDistinct::new(fid, &index, &txn);
let excluded = RoaringBitmap::new(); let excluded = RoaringBitmap::new();

View File

@@ -1,138 +0,0 @@
use std::collections::HashMap;
use roaring::RoaringBitmap;
use serde_json::Value;
use super::{Distinct, DocIter};
use crate::{DocumentId, FieldId, Index};
/// A distinct implementer that is backed by an `HashMap`.
///
/// Each time a document is seen, the value
/// for its distinct field is added to the map. If the map already contains an entry for this
/// value, then the document is filtered out, and is added to the excluded set.
pub struct MapDistinct<'a> {
distinct: FieldId,
map: HashMap<String, usize>,
index: &'a Index,
txn: &'a heed::RoTxn<'a>,
}
impl<'a> MapDistinct<'a> {
pub fn new(distinct: FieldId, index: &'a Index, txn: &'a heed::RoTxn<'a>) -> Self {
Self {
distinct,
map: HashMap::new(),
index,
txn,
}
}
}
pub struct MapDistinctIter<'a, 'b> {
distinct: FieldId,
map: &'b mut HashMap<String, usize>,
index: &'a Index,
txn: &'a heed::RoTxn<'a>,
candidates: roaring::bitmap::IntoIter,
excluded: RoaringBitmap,
}
impl<'a, 'b> MapDistinctIter<'a, 'b> {
/// Performs the next iteration of the mafacetp distinct. This is a convenience method that is
/// called by the Iterator::next implementation that transposes the result. It makes error
/// handling easier.
fn next_inner(&mut self) -> anyhow::Result<Option<DocumentId>> {
let map = &mut self.map;
let mut filter = |value: Value| {
let entry = map.entry(value.to_string()).or_insert(0);
*entry += 1;
*entry <= 1
};
while let Some(id) = self.candidates.next() {
let document = self.index.documents(&self.txn, Some(id))?[0].1;
let value = document
.get(self.distinct)
.map(serde_json::from_slice::<Value>)
.transpose()?;
let accept = match value {
Some(Value::Array(values)) => {
let mut accept = true;
for value in values {
accept &= filter(value);
}
accept
}
Some(Value::Null) | Some(Value::Object(_)) | None => true,
Some(value) => filter(value),
};
if accept {
return Ok(Some(id));
} else {
self.excluded.insert(id);
}
}
Ok(None)
}
}
impl Iterator for MapDistinctIter<'_, '_> {
type Item = anyhow::Result<DocumentId>;
fn next(&mut self) -> Option<Self::Item> {
self.next_inner().transpose()
}
}
impl DocIter for MapDistinctIter<'_, '_> {
fn into_excluded(self) -> RoaringBitmap {
self.excluded
}
}
impl<'a, 'b> Distinct<'b> for MapDistinct<'a> {
type Iter = MapDistinctIter<'a, 'b>;
fn distinct(&'b mut self, candidates: RoaringBitmap, excluded: RoaringBitmap) -> Self::Iter {
MapDistinctIter {
distinct: self.distinct,
map: &mut self.map,
index: &self.index,
txn: &self.txn,
candidates: candidates.into_iter(),
excluded,
}
}
}
#[cfg(test)]
mod test {
use std::collections::HashSet;
use super::*;
use super::super::test::{generate_index, validate_distinct_candidates};
macro_rules! test_map_distinct {
($name:ident, $distinct:literal) => {
#[test]
fn $name() {
let (index, fid, candidates) = generate_index($distinct, HashSet::new());
let txn = index.read_txn().unwrap();
let mut map_distinct = MapDistinct::new(fid, &index, &txn);
let excluded = RoaringBitmap::new();
let mut iter = map_distinct.distinct(candidates.clone(), excluded);
let count = validate_distinct_candidates(iter.by_ref(), fid, &index);
let excluded = iter.into_excluded();
assert_eq!(count as u64 + excluded.len(), candidates.len());
}
};
}
test_map_distinct!(test_string, "txt");
test_map_distinct!(test_strings, "txts");
test_map_distinct!(test_int, "cat-int");
test_map_distinct!(test_ints, "cat-ints");
}

View File

@@ -1,12 +1,10 @@
mod facet_distinct; mod facet_distinct;
mod map_distinct;
mod noop_distinct; mod noop_distinct;
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use crate::DocumentId; use crate::DocumentId;
pub use facet_distinct::FacetDistinct; pub use facet_distinct::FacetDistinct;
pub use map_distinct::MapDistinct;
pub use noop_distinct::NoopDistinct; pub use noop_distinct::NoopDistinct;
/// A trait implemented by document interators that are returned by calls to `Distinct::distinct`. /// A trait implemented by document interators that are returned by calls to `Distinct::distinct`.
@@ -20,10 +18,10 @@ pub trait DocIter: Iterator<Item = anyhow::Result<DocumentId>> {
/// must return an iterator containing only distinct documents, and add the discarded documents to /// must return an iterator containing only distinct documents, and add the discarded documents to
/// the excluded set. The excluded set can later be retrieved by calling `DocIter::excluded` on the /// the excluded set. The excluded set can later be retrieved by calling `DocIter::excluded` on the
/// returned iterator. /// returned iterator.
pub trait Distinct<'a> { pub trait Distinct {
type Iter: DocIter; type Iter: DocIter;
fn distinct(&'a mut self, candidates: RoaringBitmap, excluded: RoaringBitmap) -> Self::Iter; fn distinct(&mut self, candidates: RoaringBitmap, excluded: RoaringBitmap) -> Self::Iter;
} }
#[cfg(test)] #[cfg(test)]
@@ -74,17 +72,14 @@ mod test {
/// Returns a temporary index populated with random test documents, the FieldId for the /// Returns a temporary index populated with random test documents, the FieldId for the
/// distinct attribute, and the RoaringBitmap with the document ids. /// distinct attribute, and the RoaringBitmap with the document ids.
pub(crate) fn generate_index(distinct: &str, facets: HashSet<String>) -> (TempIndex, FieldId, RoaringBitmap) { pub(crate) fn generate_index(distinct: &str) -> (TempIndex, FieldId, RoaringBitmap) {
let index = TempIndex::new(); let index = TempIndex::new();
let mut txn = index.write_txn().unwrap(); let mut txn = index.write_txn().unwrap();
// set distinct and faceted attributes for the index. // set distinct and faceted attributes for the index.
let builder = UpdateBuilder::new(0); let builder = UpdateBuilder::new(0);
let mut update = builder.settings(&mut txn, &index); let mut update = builder.settings(&mut txn, &index);
update.set_distinct_attribute(distinct.to_string()); update.set_distinct_field(distinct.to_string());
if !facets.is_empty() {
update.set_faceted_fields(facets)
}
update.execute(|_, _| ()).unwrap(); update.execute(|_, _| ()).unwrap();
// add documents to the index // add documents to the index

View File

@@ -26,7 +26,7 @@ impl DocIter for NoopDistinctIter {
} }
} }
impl Distinct<'_> for NoopDistinct { impl Distinct for NoopDistinct {
type Iter = NoopDistinctIter; type Iter = NoopDistinctIter;
fn distinct(&mut self, candidates: RoaringBitmap, excluded: RoaringBitmap) -> Self::Iter { fn distinct(&mut self, candidates: RoaringBitmap, excluded: RoaringBitmap) -> Self::Iter {

View File

@@ -197,10 +197,10 @@ impl<'a> FacetDistribution<'a> {
pub fn execute(&self) -> anyhow::Result<BTreeMap<String, BTreeMap<String, u64>>> { pub fn execute(&self) -> anyhow::Result<BTreeMap<String, BTreeMap<String, u64>>> {
let fields_ids_map = self.index.fields_ids_map(self.rtxn)?; let fields_ids_map = self.index.fields_ids_map(self.rtxn)?;
let faceted_fields = self.index.faceted_fields(self.rtxn)?; let filterable_fields = self.index.filterable_fields(self.rtxn)?;
let mut distribution = BTreeMap::new(); let mut distribution = BTreeMap::new();
for name in faceted_fields { for name in filterable_fields {
let fid = fields_ids_map.id(&name).with_context(|| { let fid = fields_ids_map.id(&name).with_context(|| {
format!("missing field name {:?} from the fields id map", name) format!("missing field name {:?} from the fields id map", name)
})?; })?;

View File

@@ -18,7 +18,7 @@ use super::FacetRange;
use super::parser::Rule; use super::parser::Rule;
use super::parser::{PREC_CLIMBER, FilterParser}; use super::parser::{PREC_CLIMBER, FilterParser};
use self::FacetCondition::*; use self::FilterCondition::*;
use self::Operator::*; use self::Operator::*;
#[derive(Debug, Clone, PartialEq)] #[derive(Debug, Clone, PartialEq)]
@@ -49,74 +49,18 @@ impl Operator {
} }
#[derive(Debug, Clone, PartialEq)] #[derive(Debug, Clone, PartialEq)]
pub enum FacetCondition { pub enum FilterCondition {
Operator(FieldId, Operator), Operator(FieldId, Operator),
Or(Box<Self>, Box<Self>), Or(Box<Self>, Box<Self>),
And(Box<Self>, Box<Self>), And(Box<Self>, Box<Self>),
} }
fn field_id( impl FilterCondition {
fields_ids_map: &FieldsIdsMap,
faceted_fields: &HashSet<FieldId>,
items: &mut Pairs<Rule>,
) -> Result<FieldId, PestError<Rule>>
{
// lexing ensures that we at least have a key
let key = items.next().unwrap();
let field_id = match fields_ids_map.id(key.as_str()) {
Some(field_id) => field_id,
None => return Err(PestError::new_from_span(
ErrorVariant::CustomError {
message: format!(
"attribute `{}` not found, available attributes are: {}",
key.as_str(),
fields_ids_map.iter().map(|(_, n)| n).collect::<Vec<_>>().join(", "),
),
},
key.as_span(),
)),
};
if !faceted_fields.contains(&field_id) {
return Err(PestError::new_from_span(
ErrorVariant::CustomError {
message: format!(
"attribute `{}` is not faceted, available faceted attributes are: {}",
key.as_str(),
faceted_fields.iter().flat_map(|id| {
fields_ids_map.name(*id)
}).collect::<Vec<_>>().join(", "),
),
},
key.as_span(),
));
}
Ok(field_id)
}
fn pest_parse<T>(pair: Pair<Rule>) -> (Result<T, pest::error::Error<Rule>>, String)
where T: FromStr,
T::Err: ToString,
{
let result = match pair.as_str().parse::<T>() {
Ok(value) => Ok(value),
Err(e) => Err(PestError::<Rule>::new_from_span(
ErrorVariant::CustomError { message: e.to_string() },
pair.as_span(),
)),
};
(result, pair.as_str().to_string())
}
impl FacetCondition {
pub fn from_array<I, J, A, B>( pub fn from_array<I, J, A, B>(
rtxn: &heed::RoTxn, rtxn: &heed::RoTxn,
index: &Index, index: &Index,
array: I, array: I,
) -> anyhow::Result<Option<FacetCondition>> ) -> anyhow::Result<Option<FilterCondition>>
where I: IntoIterator<Item=Either<J, B>>, where I: IntoIterator<Item=Either<J, B>>,
J: IntoIterator<Item=A>, J: IntoIterator<Item=A>,
A: AsRef<str>, A: AsRef<str>,
@@ -129,7 +73,7 @@ impl FacetCondition {
Either::Left(array) => { Either::Left(array) => {
let mut ors = None; let mut ors = None;
for rule in array { for rule in array {
let condition = FacetCondition::from_str(rtxn, index, rule.as_ref())?; let condition = FilterCondition::from_str(rtxn, index, rule.as_ref())?;
ors = match ors.take() { ors = match ors.take() {
Some(ors) => Some(Or(Box::new(ors), Box::new(condition))), Some(ors) => Some(Or(Box::new(ors), Box::new(condition))),
None => Some(condition), None => Some(condition),
@@ -144,7 +88,7 @@ impl FacetCondition {
} }
}, },
Either::Right(rule) => { Either::Right(rule) => {
let condition = FacetCondition::from_str(rtxn, index, rule.as_ref())?; let condition = FilterCondition::from_str(rtxn, index, rule.as_ref())?;
ands = match ands.take() { ands = match ands.take() {
Some(ands) => Some(And(Box::new(ands), Box::new(condition))), Some(ands) => Some(And(Box::new(ands), Box::new(condition))),
None => Some(condition), None => Some(condition),
@@ -160,12 +104,12 @@ impl FacetCondition {
rtxn: &heed::RoTxn, rtxn: &heed::RoTxn,
index: &Index, index: &Index,
expression: &str, expression: &str,
) -> anyhow::Result<FacetCondition> ) -> anyhow::Result<FilterCondition>
{ {
let fields_ids_map = index.fields_ids_map(rtxn)?; let fields_ids_map = index.fields_ids_map(rtxn)?;
let faceted_fields = index.faceted_fields_ids(rtxn)?; let filterable_fields = index.filterable_fields_ids(rtxn)?;
let lexed = FilterParser::parse(Rule::prgm, expression)?; let lexed = FilterParser::parse(Rule::prgm, expression)?;
FacetCondition::from_pairs(&fields_ids_map, &faceted_fields, lexed) FilterCondition::from_pairs(&fields_ids_map, &filterable_fields, lexed)
} }
fn from_pairs( fn from_pairs(
@@ -199,7 +143,7 @@ impl FacetCondition {
) )
} }
fn negate(self) -> FacetCondition { fn negate(self) -> FilterCondition {
match self { match self {
Operator(fid, op) => match op.negate() { Operator(fid, op) => match op.negate() {
(op, None) => Operator(fid, op), (op, None) => Operator(fid, op),
@@ -212,12 +156,12 @@ impl FacetCondition {
fn between( fn between(
fields_ids_map: &FieldsIdsMap, fields_ids_map: &FieldsIdsMap,
faceted_fields: &HashSet<FieldId>, filterable_fields: &HashSet<FieldId>,
item: Pair<Rule>, item: Pair<Rule>,
) -> anyhow::Result<FacetCondition> ) -> anyhow::Result<FilterCondition>
{ {
let mut items = item.into_inner(); let mut items = item.into_inner();
let fid = field_id(fields_ids_map, faceted_fields, &mut items)?; let fid = field_id(fields_ids_map, filterable_fields, &mut items)?;
let (lresult, _) = pest_parse(items.next().unwrap()); let (lresult, _) = pest_parse(items.next().unwrap());
let (rresult, _) = pest_parse(items.next().unwrap()); let (rresult, _) = pest_parse(items.next().unwrap());
@@ -230,12 +174,12 @@ impl FacetCondition {
fn equal( fn equal(
fields_ids_map: &FieldsIdsMap, fields_ids_map: &FieldsIdsMap,
faceted_fields: &HashSet<FieldId>, filterable_fields: &HashSet<FieldId>,
item: Pair<Rule>, item: Pair<Rule>,
) -> anyhow::Result<FacetCondition> ) -> anyhow::Result<FilterCondition>
{ {
let mut items = item.into_inner(); let mut items = item.into_inner();
let fid = field_id(fields_ids_map, faceted_fields, &mut items)?; let fid = field_id(fields_ids_map, filterable_fields, &mut items)?;
let value = items.next().unwrap(); let value = items.next().unwrap();
let (result, svalue) = pest_parse(value); let (result, svalue) = pest_parse(value);
@@ -246,12 +190,12 @@ impl FacetCondition {
fn greater_than( fn greater_than(
fields_ids_map: &FieldsIdsMap, fields_ids_map: &FieldsIdsMap,
faceted_fields: &HashSet<FieldId>, filterable_fields: &HashSet<FieldId>,
item: Pair<Rule>, item: Pair<Rule>,
) -> anyhow::Result<FacetCondition> ) -> anyhow::Result<FilterCondition>
{ {
let mut items = item.into_inner(); let mut items = item.into_inner();
let fid = field_id(fields_ids_map, faceted_fields, &mut items)?; let fid = field_id(fields_ids_map, filterable_fields, &mut items)?;
let value = items.next().unwrap(); let value = items.next().unwrap();
let (result, _svalue) = pest_parse(value); let (result, _svalue) = pest_parse(value);
@@ -261,12 +205,12 @@ impl FacetCondition {
fn greater_than_or_equal( fn greater_than_or_equal(
fields_ids_map: &FieldsIdsMap, fields_ids_map: &FieldsIdsMap,
faceted_fields: &HashSet<FieldId>, filterable_fields: &HashSet<FieldId>,
item: Pair<Rule>, item: Pair<Rule>,
) -> anyhow::Result<FacetCondition> ) -> anyhow::Result<FilterCondition>
{ {
let mut items = item.into_inner(); let mut items = item.into_inner();
let fid = field_id(fields_ids_map, faceted_fields, &mut items)?; let fid = field_id(fields_ids_map, filterable_fields, &mut items)?;
let value = items.next().unwrap(); let value = items.next().unwrap();
let (result, _svalue) = pest_parse(value); let (result, _svalue) = pest_parse(value);
@@ -276,12 +220,12 @@ impl FacetCondition {
fn lower_than( fn lower_than(
fields_ids_map: &FieldsIdsMap, fields_ids_map: &FieldsIdsMap,
faceted_fields: &HashSet<FieldId>, filterable_fields: &HashSet<FieldId>,
item: Pair<Rule>, item: Pair<Rule>,
) -> anyhow::Result<FacetCondition> ) -> anyhow::Result<FilterCondition>
{ {
let mut items = item.into_inner(); let mut items = item.into_inner();
let fid = field_id(fields_ids_map, faceted_fields, &mut items)?; let fid = field_id(fields_ids_map, filterable_fields, &mut items)?;
let value = items.next().unwrap(); let value = items.next().unwrap();
let (result, _svalue) = pest_parse(value); let (result, _svalue) = pest_parse(value);
@@ -291,12 +235,12 @@ impl FacetCondition {
fn lower_than_or_equal( fn lower_than_or_equal(
fields_ids_map: &FieldsIdsMap, fields_ids_map: &FieldsIdsMap,
faceted_fields: &HashSet<FieldId>, filterable_fields: &HashSet<FieldId>,
item: Pair<Rule>, item: Pair<Rule>,
) -> anyhow::Result<FacetCondition> ) -> anyhow::Result<FilterCondition>
{ {
let mut items = item.into_inner(); let mut items = item.into_inner();
let fid = field_id(fields_ids_map, faceted_fields, &mut items)?; let fid = field_id(fields_ids_map, filterable_fields, &mut items)?;
let value = items.next().unwrap(); let value = items.next().unwrap();
let (result, _svalue) = pest_parse(value); let (result, _svalue) = pest_parse(value);
@@ -305,7 +249,7 @@ impl FacetCondition {
} }
} }
impl FacetCondition { impl FilterCondition {
/// Aggregates the documents ids that are part of the specified range automatically /// Aggregates the documents ids that are part of the specified range automatically
/// going deeper through the levels. /// going deeper through the levels.
fn explore_facet_number_levels( fn explore_facet_number_levels(
@@ -469,6 +413,71 @@ impl FacetCondition {
} }
} }
/// Retrieve the field id base on the pest value, returns an error is
/// the field does not exist or is not filterable.
///
/// The pest pair is simply a string associated with a span, a location to highlight in
/// the error message.
fn field_id(
fields_ids_map: &FieldsIdsMap,
filterable_fields: &HashSet<FieldId>,
items: &mut Pairs<Rule>,
) -> Result<FieldId, PestError<Rule>>
{
// lexing ensures that we at least have a key
let key = items.next().unwrap();
let field_id = match fields_ids_map.id(key.as_str()) {
Some(field_id) => field_id,
None => return Err(PestError::new_from_span(
ErrorVariant::CustomError {
message: format!(
"attribute `{}` not found, available attributes are: {}",
key.as_str(),
fields_ids_map.iter().map(|(_, n)| n).collect::<Vec<_>>().join(", "),
),
},
key.as_span(),
)),
};
if !filterable_fields.contains(&field_id) {
return Err(PestError::new_from_span(
ErrorVariant::CustomError {
message: format!(
"attribute `{}` is not filterable, available filterable attributes are: {}",
key.as_str(),
filterable_fields.iter().flat_map(|id| {
fields_ids_map.name(*id)
}).collect::<Vec<_>>().join(", "),
),
},
key.as_span(),
));
}
Ok(field_id)
}
/// Tries to parse the pest pair into the type `T` specified, always returns
/// the original string that we tried to parse.
///
/// Returns the parsing error associated with the span if the conversion fails.
fn pest_parse<T>(pair: Pair<Rule>) -> (Result<T, pest::error::Error<Rule>>, String)
where T: FromStr,
T::Err: ToString,
{
let result = match pair.as_str().parse::<T>() {
Ok(value) => Ok(value),
Err(e) => Err(PestError::<Rule>::new_from_span(
ErrorVariant::CustomError { message: e.to_string() },
pair.as_span(),
)),
};
(result, pair.as_str().to_string())
}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;
@@ -484,24 +493,24 @@ mod tests {
options.map_size(10 * 1024 * 1024); // 10 MB options.map_size(10 * 1024 * 1024); // 10 MB
let index = Index::new(options, &path).unwrap(); let index = Index::new(options, &path).unwrap();
// Set the faceted fields to be the channel. // Set the filterable fields to be the channel.
let mut wtxn = index.write_txn().unwrap(); let mut wtxn = index.write_txn().unwrap();
let mut builder = Settings::new(&mut wtxn, &index, 0); let mut builder = Settings::new(&mut wtxn, &index, 0);
builder.set_faceted_fields(hashset!{ S("channel") }); builder.set_filterable_fields(hashset!{ S("channel") });
builder.execute(|_, _| ()).unwrap(); builder.execute(|_, _| ()).unwrap();
wtxn.commit().unwrap(); wtxn.commit().unwrap();
// Test that the facet condition is correctly generated. // Test that the facet condition is correctly generated.
let rtxn = index.read_txn().unwrap(); let rtxn = index.read_txn().unwrap();
let condition = FacetCondition::from_str(&rtxn, &index, "channel = Ponce").unwrap(); let condition = FilterCondition::from_str(&rtxn, &index, "channel = Ponce").unwrap();
let expected = Operator(0, Operator::Equal(None, S("ponce"))); let expected = Operator(0, Operator::Equal(None, S("ponce")));
assert_eq!(condition, expected); assert_eq!(condition, expected);
let condition = FacetCondition::from_str(&rtxn, &index, "channel != ponce").unwrap(); let condition = FilterCondition::from_str(&rtxn, &index, "channel != ponce").unwrap();
let expected = Operator(0, Operator::NotEqual(None, S("ponce"))); let expected = Operator(0, Operator::NotEqual(None, S("ponce")));
assert_eq!(condition, expected); assert_eq!(condition, expected);
let condition = FacetCondition::from_str(&rtxn, &index, "NOT channel = ponce").unwrap(); let condition = FilterCondition::from_str(&rtxn, &index, "NOT channel = ponce").unwrap();
let expected = Operator(0, Operator::NotEqual(None, S("ponce"))); let expected = Operator(0, Operator::NotEqual(None, S("ponce")));
assert_eq!(condition, expected); assert_eq!(condition, expected);
} }
@@ -513,20 +522,20 @@ mod tests {
options.map_size(10 * 1024 * 1024); // 10 MB options.map_size(10 * 1024 * 1024); // 10 MB
let index = Index::new(options, &path).unwrap(); let index = Index::new(options, &path).unwrap();
// Set the faceted fields to be the channel. // Set the filterable fields to be the channel.
let mut wtxn = index.write_txn().unwrap(); let mut wtxn = index.write_txn().unwrap();
let mut builder = Settings::new(&mut wtxn, &index, 0); let mut builder = Settings::new(&mut wtxn, &index, 0);
builder.set_faceted_fields(hashset!{ "timestamp".into() }); builder.set_filterable_fields(hashset!{ "timestamp".into() });
builder.execute(|_, _| ()).unwrap(); builder.execute(|_, _| ()).unwrap();
wtxn.commit().unwrap(); wtxn.commit().unwrap();
// Test that the facet condition is correctly generated. // Test that the facet condition is correctly generated.
let rtxn = index.read_txn().unwrap(); let rtxn = index.read_txn().unwrap();
let condition = FacetCondition::from_str(&rtxn, &index, "timestamp 22 TO 44").unwrap(); let condition = FilterCondition::from_str(&rtxn, &index, "timestamp 22 TO 44").unwrap();
let expected = Operator(0, Between(22.0, 44.0)); let expected = Operator(0, Between(22.0, 44.0));
assert_eq!(condition, expected); assert_eq!(condition, expected);
let condition = FacetCondition::from_str(&rtxn, &index, "NOT timestamp 22 TO 44").unwrap(); let condition = FilterCondition::from_str(&rtxn, &index, "NOT timestamp 22 TO 44").unwrap();
let expected = Or( let expected = Or(
Box::new(Operator(0, LowerThan(22.0))), Box::new(Operator(0, LowerThan(22.0))),
Box::new(Operator(0, GreaterThan(44.0))), Box::new(Operator(0, GreaterThan(44.0))),
@@ -541,17 +550,17 @@ mod tests {
options.map_size(10 * 1024 * 1024); // 10 MB options.map_size(10 * 1024 * 1024); // 10 MB
let index = Index::new(options, &path).unwrap(); let index = Index::new(options, &path).unwrap();
// Set the faceted fields to be the channel. // Set the filterable fields to be the channel.
let mut wtxn = index.write_txn().unwrap(); let mut wtxn = index.write_txn().unwrap();
let mut builder = Settings::new(&mut wtxn, &index, 0); let mut builder = Settings::new(&mut wtxn, &index, 0);
builder.set_searchable_fields(vec![S("channel"), S("timestamp")]); // to keep the fields order builder.set_searchable_fields(vec![S("channel"), S("timestamp")]); // to keep the fields order
builder.set_faceted_fields(hashset!{ S("channel"), S("timestamp") }); builder.set_filterable_fields(hashset!{ S("channel"), S("timestamp") });
builder.execute(|_, _| ()).unwrap(); builder.execute(|_, _| ()).unwrap();
wtxn.commit().unwrap(); wtxn.commit().unwrap();
// Test that the facet condition is correctly generated. // Test that the facet condition is correctly generated.
let rtxn = index.read_txn().unwrap(); let rtxn = index.read_txn().unwrap();
let condition = FacetCondition::from_str( let condition = FilterCondition::from_str(
&rtxn, &index, &rtxn, &index,
"channel = gotaga OR (timestamp 22 TO 44 AND channel != ponce)", "channel = gotaga OR (timestamp 22 TO 44 AND channel != ponce)",
).unwrap(); ).unwrap();
@@ -564,7 +573,7 @@ mod tests {
); );
assert_eq!(condition, expected); assert_eq!(condition, expected);
let condition = FacetCondition::from_str( let condition = FilterCondition::from_str(
&rtxn, &index, &rtxn, &index,
"channel = gotaga OR NOT (timestamp 22 TO 44 AND channel != ponce)", "channel = gotaga OR NOT (timestamp 22 TO 44 AND channel != ponce)",
).unwrap(); ).unwrap();
@@ -588,21 +597,21 @@ mod tests {
options.map_size(10 * 1024 * 1024); // 10 MB options.map_size(10 * 1024 * 1024); // 10 MB
let index = Index::new(options, &path).unwrap(); let index = Index::new(options, &path).unwrap();
// Set the faceted fields to be the channel. // Set the filterable fields to be the channel.
let mut wtxn = index.write_txn().unwrap(); let mut wtxn = index.write_txn().unwrap();
let mut builder = Settings::new(&mut wtxn, &index, 0); let mut builder = Settings::new(&mut wtxn, &index, 0);
builder.set_searchable_fields(vec![S("channel"), S("timestamp")]); // to keep the fields order builder.set_searchable_fields(vec![S("channel"), S("timestamp")]); // to keep the fields order
builder.set_faceted_fields(hashset!{ S("channel"), S("timestamp") }); builder.set_filterable_fields(hashset!{ S("channel"), S("timestamp") });
builder.execute(|_, _| ()).unwrap(); builder.execute(|_, _| ()).unwrap();
wtxn.commit().unwrap(); wtxn.commit().unwrap();
// Test that the facet condition is correctly generated. // Test that the facet condition is correctly generated.
let rtxn = index.read_txn().unwrap(); let rtxn = index.read_txn().unwrap();
let condition = FacetCondition::from_array( let condition = FilterCondition::from_array(
&rtxn, &index, &rtxn, &index,
vec![Either::Right("channel = gotaga"), Either::Left(vec!["timestamp = 44", "channel != ponce"])], vec![Either::Right("channel = gotaga"), Either::Left(vec!["timestamp = 44", "channel != ponce"])],
).unwrap().unwrap(); ).unwrap().unwrap();
let expected = FacetCondition::from_str( let expected = FilterCondition::from_str(
&rtxn, &index, &rtxn, &index,
"channel = gotaga AND (timestamp = 44 OR channel != ponce)", "channel = gotaga AND (timestamp = 44 OR channel != ponce)",
).unwrap(); ).unwrap();

View File

@@ -9,10 +9,10 @@ use crate::heed_codec::CboRoaringBitmapCodec;
use crate::heed_codec::facet::FacetLevelValueF64Codec; use crate::heed_codec::facet::FacetLevelValueF64Codec;
use crate::{Index, FieldId}; use crate::{Index, FieldId};
pub use self::facet_condition::{FacetCondition, Operator}; pub use self::filter_condition::{FilterCondition, Operator};
pub use self::facet_distribution::FacetDistribution; pub use self::facet_distribution::FacetDistribution;
mod facet_condition; mod filter_condition;
mod facet_distribution; mod facet_distribution;
mod parser; mod parser;

View File

@@ -12,11 +12,11 @@ use meilisearch_tokenizer::{Analyzer, AnalyzerConfig};
use once_cell::sync::Lazy; use once_cell::sync::Lazy;
use roaring::bitmap::RoaringBitmap; use roaring::bitmap::RoaringBitmap;
use distinct::{Distinct, DocIter, FacetDistinct, MapDistinct, NoopDistinct}; use distinct::{Distinct, DocIter, FacetDistinct, NoopDistinct};
use crate::search::criteria::r#final::{Final, FinalResult}; use crate::search::criteria::r#final::{Final, FinalResult};
use crate::{Index, DocumentId}; use crate::{Index, DocumentId};
pub use self::facet::{FacetCondition, FacetDistribution, FacetIter, Operator}; pub use self::facet::{FilterCondition, FacetDistribution, FacetIter, Operator};
pub use self::matching_words::MatchingWords; pub use self::matching_words::MatchingWords;
use self::query_tree::QueryTreeBuilder; use self::query_tree::QueryTreeBuilder;
@@ -33,7 +33,7 @@ mod matching_words;
pub struct Search<'a> { pub struct Search<'a> {
query: Option<String>, query: Option<String>,
facet_condition: Option<FacetCondition>, filter: Option<FilterCondition>,
offset: usize, offset: usize,
limit: usize, limit: usize,
optional_words: bool, optional_words: bool,
@@ -47,7 +47,7 @@ impl<'a> Search<'a> {
pub fn new(rtxn: &'a heed::RoTxn, index: &'a Index) -> Search<'a> { pub fn new(rtxn: &'a heed::RoTxn, index: &'a Index) -> Search<'a> {
Search { Search {
query: None, query: None,
facet_condition: None, filter: None,
offset: 0, offset: 0,
limit: 20, limit: 20,
optional_words: true, optional_words: true,
@@ -88,8 +88,8 @@ impl<'a> Search<'a> {
self self
} }
pub fn facet_condition(&mut self, condition: FacetCondition) -> &mut Search<'a> { pub fn filter(&mut self, condition: FilterCondition) -> &mut Search<'a> {
self.facet_condition = Some(condition); self.filter = Some(condition);
self self
} }
@@ -121,12 +121,12 @@ impl<'a> Search<'a> {
// We create the original candidates with the facet conditions results. // We create the original candidates with the facet conditions results.
let before = Instant::now(); let before = Instant::now();
let facet_candidates = match &self.facet_condition { let filtered_candidates = match &self.filter {
Some(condition) => Some(condition.evaluate(self.rtxn, self.index)?), Some(condition) => Some(condition.evaluate(self.rtxn, self.index)?),
None => None, None => None,
}; };
debug!("facet candidates: {:?} took {:.02?}", facet_candidates, before.elapsed()); debug!("facet candidates: {:?} took {:.02?}", filtered_candidates, before.elapsed());
let matching_words = match query_tree.as_ref() { let matching_words = match query_tree.as_ref() {
Some(query_tree) => MatchingWords::from_query_tree(&query_tree), Some(query_tree) => MatchingWords::from_query_tree(&query_tree),
@@ -134,31 +134,26 @@ impl<'a> Search<'a> {
}; };
let criteria_builder = criteria::CriteriaBuilder::new(self.rtxn, self.index)?; let criteria_builder = criteria::CriteriaBuilder::new(self.rtxn, self.index)?;
let criteria = criteria_builder.build(query_tree, primitive_query, facet_candidates)?; let criteria = criteria_builder.build(query_tree, primitive_query, filtered_candidates)?;
match self.index.distinct_attribute(self.rtxn)? { match self.index.distinct_field(self.rtxn)? {
None => self.perform_sort(NoopDistinct, matching_words, criteria), None => self.perform_sort(NoopDistinct, matching_words, criteria),
Some(name) => { Some(name) => {
let field_ids_map = self.index.fields_ids_map(self.rtxn)?; let field_ids_map = self.index.fields_ids_map(self.rtxn)?;
let id = field_ids_map.id(name).expect("distinct not present in field map"); let id = field_ids_map.id(name).expect("distinct not present in field map");
let faceted_fields = self.index.faceted_fields(self.rtxn)?; let distinct = FacetDistinct::new(id, self.index, self.rtxn);
if faceted_fields.contains(name) { self.perform_sort(distinct, matching_words, criteria)
let distinct = FacetDistinct::new(id, self.index, self.rtxn);
self.perform_sort(distinct, matching_words, criteria)
} else {
let distinct = MapDistinct::new(id, self.index, self.rtxn);
self.perform_sort(distinct, matching_words, criteria)
}
} }
} }
} }
fn perform_sort( fn perform_sort<D: Distinct>(
&self, &self,
mut distinct: impl for<'c> Distinct<'c>, mut distinct: D,
matching_words: MatchingWords, matching_words: MatchingWords,
mut criteria: Final, mut criteria: Final,
) -> anyhow::Result<SearchResult> { ) -> anyhow::Result<SearchResult>
{
let mut offset = self.offset; let mut offset = self.offset;
let mut initial_candidates = RoaringBitmap::new(); let mut initial_candidates = RoaringBitmap::new();
let mut excluded_candidates = RoaringBitmap::new(); let mut excluded_candidates = RoaringBitmap::new();
@@ -193,7 +188,7 @@ impl fmt::Debug for Search<'_> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let Search { let Search {
query, query,
facet_condition, filter,
offset, offset,
limit, limit,
optional_words, optional_words,
@@ -204,7 +199,7 @@ impl fmt::Debug for Search<'_> {
} = self; } = self;
f.debug_struct("Search") f.debug_struct("Search")
.field("query", query) .field("query", query)
.field("facet_condition", facet_condition) .field("filter", filter)
.field("offset", offset) .field("offset", offset)
.field("limit", limit) .field("limit", limit)
.field("optional_words", optional_words) .field("optional_words", optional_words)

View File

@@ -9,7 +9,6 @@ use rayon::ThreadPool;
use serde::{Deserialize, Deserializer, Serialize, Serializer}; use serde::{Deserialize, Deserializer, Serialize, Serializer};
use crate::{FieldsIdsMap, Index}; use crate::{FieldsIdsMap, Index};
use crate::criterion::Criterion;
use crate::update::{ClearDocuments, IndexDocuments, UpdateIndexingStep}; use crate::update::{ClearDocuments, IndexDocuments, UpdateIndexingStep};
use crate::update::index_documents::{IndexDocumentsMethod, Transform}; use crate::update::index_documents::{IndexDocumentsMethod, Transform};
@@ -66,10 +65,10 @@ pub struct Settings<'a, 't, 'u, 'i> {
searchable_fields: Setting<Vec<String>>, searchable_fields: Setting<Vec<String>>,
displayed_fields: Setting<Vec<String>>, displayed_fields: Setting<Vec<String>>,
faceted_fields: Setting<HashSet<String>>, filterable_fields: Setting<HashSet<String>>,
criteria: Setting<Vec<String>>, criteria: Setting<Vec<String>>,
stop_words: Setting<BTreeSet<String>>, stop_words: Setting<BTreeSet<String>>,
distinct_attribute: Setting<String>, distinct_field: Setting<String>,
synonyms: Setting<HashMap<String, Vec<String>>>, synonyms: Setting<HashMap<String, Vec<String>>>,
} }
@@ -92,10 +91,10 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
thread_pool: None, thread_pool: None,
searchable_fields: Setting::NotSet, searchable_fields: Setting::NotSet,
displayed_fields: Setting::NotSet, displayed_fields: Setting::NotSet,
faceted_fields: Setting::NotSet, filterable_fields: Setting::NotSet,
criteria: Setting::NotSet, criteria: Setting::NotSet,
stop_words: Setting::NotSet, stop_words: Setting::NotSet,
distinct_attribute: Setting::NotSet, distinct_field: Setting::NotSet,
synonyms: Setting::NotSet, synonyms: Setting::NotSet,
update_id, update_id,
} }
@@ -117,12 +116,12 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
self.displayed_fields = Setting::Set(names); self.displayed_fields = Setting::Set(names);
} }
pub fn reset_faceted_fields(&mut self) { pub fn reset_filterable_fields(&mut self) {
self.faceted_fields = Setting::Reset; self.filterable_fields = Setting::Reset;
} }
pub fn set_faceted_fields(&mut self, names_facet_types: HashSet<String>) { pub fn set_filterable_fields(&mut self, names: HashSet<String>) {
self.faceted_fields = Setting::Set(names_facet_types); self.filterable_fields = Setting::Set(names);
} }
pub fn reset_criteria(&mut self) { pub fn reset_criteria(&mut self) {
@@ -145,12 +144,12 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
} }
} }
pub fn reset_distinct_attribute(&mut self) { pub fn reset_distinct_field(&mut self) {
self.distinct_attribute = Setting::Reset; self.distinct_field = Setting::Reset;
} }
pub fn set_distinct_attribute(&mut self, distinct_attribute: String) { pub fn set_distinct_field(&mut self, distinct_field: String) {
self.distinct_attribute = Setting::Set(distinct_attribute); self.distinct_field = Setting::Set(distinct_field);
} }
pub fn reset_synonyms(&mut self) { pub fn reset_synonyms(&mut self) {
@@ -166,8 +165,8 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
} }
fn reindex<F>(&mut self, cb: &F, old_fields_ids_map: FieldsIdsMap) -> anyhow::Result<()> fn reindex<F>(&mut self, cb: &F, old_fields_ids_map: FieldsIdsMap) -> anyhow::Result<()>
where where
F: Fn(UpdateIndexingStep, u64) + Sync F: Fn(UpdateIndexingStep, u64) + Sync
{ {
let fields_ids_map = self.index.fields_ids_map(self.wtxn)?; let fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
let update_id = self.update_id; let update_id = self.update_id;
@@ -198,7 +197,8 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
let output = transform.remap_index_documents( let output = transform.remap_index_documents(
primary_key.to_string(), primary_key.to_string(),
old_fields_ids_map, old_fields_ids_map,
fields_ids_map.clone())?; fields_ids_map.clone(),
)?;
// We clear the full database (words-fst, documents ids and documents content). // We clear the full database (words-fst, documents ids and documents content).
ClearDocuments::new(self.wtxn, self.index, self.update_id).execute()?; ClearDocuments::new(self.wtxn, self.index, self.update_id).execute()?;
@@ -215,6 +215,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
indexing_builder.chunk_fusing_shrink_size = self.chunk_fusing_shrink_size; indexing_builder.chunk_fusing_shrink_size = self.chunk_fusing_shrink_size;
indexing_builder.thread_pool = self.thread_pool; indexing_builder.thread_pool = self.thread_pool;
indexing_builder.execute_raw(output, &cb)?; indexing_builder.execute_raw(output, &cb)?;
Ok(()) Ok(())
} }
@@ -243,18 +244,18 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
Ok(true) Ok(true)
} }
fn update_distinct_attribute(&mut self) -> anyhow::Result<bool> { fn update_distinct_field(&mut self) -> anyhow::Result<bool> {
match self.distinct_attribute { match self.distinct_field {
Setting::Set(ref attr) => { Setting::Set(ref attr) => {
let mut fields_ids_map = self.index.fields_ids_map(self.wtxn)?; let mut fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
fields_ids_map fields_ids_map
.insert(attr) .insert(attr)
.context("field id limit exceeded")?; .context("field id limit exceeded")?;
self.index.put_distinct_attribute(self.wtxn, &attr)?; self.index.put_distinct_field(self.wtxn, &attr)?;
self.index.put_fields_ids_map(self.wtxn, &fields_ids_map)?; self.index.put_fields_ids_map(self.wtxn, &fields_ids_map)?;
} }
Setting::Reset => { self.index.delete_distinct_attribute(self.wtxn)?; }, Setting::Reset => { self.index.delete_distinct_field(self.wtxn)?; },
Setting::NotSet => return Ok(false), Setting::NotSet => return Ok(false),
} }
Ok(true) Ok(true)
@@ -267,7 +268,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
Setting::Set(ref fields) => { Setting::Set(ref fields) => {
// every time the searchable attributes are updated, we need to update the // every time the searchable attributes are updated, we need to update the
// ids for any settings that uses the facets. (displayed_fields, // ids for any settings that uses the facets. (displayed_fields,
// faceted_fields) // filterable_fields)
let old_fields_ids_map = self.index.fields_ids_map(self.wtxn)?; let old_fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
let mut new_fields_ids_map = FieldsIdsMap::new(); let mut new_fields_ids_map = FieldsIdsMap::new();
@@ -381,8 +382,8 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
} }
} }
fn update_facets(&mut self) -> anyhow::Result<bool> { fn update_filterable(&mut self) -> anyhow::Result<()> {
match self.faceted_fields { match self.filterable_fields {
Setting::Set(ref fields) => { Setting::Set(ref fields) => {
let mut fields_ids_map = self.index.fields_ids_map(self.wtxn)?; let mut fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
let mut new_facets = HashSet::new(); let mut new_facets = HashSet::new();
@@ -390,22 +391,21 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
fields_ids_map.insert(name).context("field id limit exceeded")?; fields_ids_map.insert(name).context("field id limit exceeded")?;
new_facets.insert(name.clone()); new_facets.insert(name.clone());
} }
self.index.put_faceted_fields(self.wtxn, &new_facets)?; self.index.put_filterable_fields(self.wtxn, &new_facets)?;
self.index.put_fields_ids_map(self.wtxn, &fields_ids_map)?; self.index.put_fields_ids_map(self.wtxn, &fields_ids_map)?;
} }
Setting::Reset => { self.index.delete_faceted_fields(self.wtxn)?; } Setting::Reset => { self.index.delete_filterable_fields(self.wtxn)?; }
Setting::NotSet => return Ok(false) Setting::NotSet => (),
} }
Ok(true) Ok(())
} }
fn update_criteria(&mut self) -> anyhow::Result<()> { fn update_criteria(&mut self) -> anyhow::Result<()> {
match self.criteria { match self.criteria {
Setting::Set(ref fields) => { Setting::Set(ref fields) => {
let faceted_fields = self.index.faceted_fields(&self.wtxn)?;
let mut new_criteria = Vec::new(); let mut new_criteria = Vec::new();
for name in fields { for name in fields {
let criterion = Criterion::from_str(&faceted_fields, &name)?; let criterion = name.parse()?;
new_criteria.push(criterion); new_criteria.push(criterion);
} }
self.index.put_criteria(self.wtxn, &new_criteria)?; self.index.put_criteria(self.wtxn, &new_criteria)?;
@@ -421,20 +421,29 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
F: Fn(UpdateIndexingStep, u64) + Sync F: Fn(UpdateIndexingStep, u64) + Sync
{ {
self.index.set_updated_at(self.wtxn, &Utc::now())?; self.index.set_updated_at(self.wtxn, &Utc::now())?;
let old_faceted_fields = self.index.faceted_fields(&self.wtxn)?;
let old_fields_ids_map = self.index.fields_ids_map(&self.wtxn)?; let old_fields_ids_map = self.index.fields_ids_map(&self.wtxn)?;
self.update_displayed()?; self.update_displayed()?;
let stop_words_updated = self.update_stop_words()?; self.update_filterable()?;
let facets_updated = self.update_facets()?; self.update_distinct_field()?;
self.update_distinct_attribute()?;
// update_criteria MUST be called after update_facets, since criterion fields must be set
// as facets.
self.update_criteria()?; self.update_criteria()?;
// If there is new faceted fields we indicate that we must reindex as we must
// index new fields as facets. It means that the distinct attribute,
// an Asc/Desc criterion or a filtered attribute as be added or removed.
let new_faceted_fields = self.index.faceted_fields(&self.wtxn)?;
let faceted_updated = old_faceted_fields != new_faceted_fields;
let stop_words_updated = self.update_stop_words()?;
let synonyms_updated = self.update_synonyms()?; let synonyms_updated = self.update_synonyms()?;
let searchable_updated = self.update_searchable()?; let searchable_updated = self.update_searchable()?;
if stop_words_updated || facets_updated || synonyms_updated || searchable_updated { if stop_words_updated || faceted_updated || synonyms_updated || searchable_updated {
self.reindex(&progress_callback, old_fields_ids_map)?; self.reindex(&progress_callback, old_fields_ids_map)?;
} }
Ok(()) Ok(())
} }
} }
@@ -446,6 +455,7 @@ mod tests {
use maplit::{btreeset, hashmap, hashset}; use maplit::{btreeset, hashmap, hashset};
use big_s::S; use big_s::S;
use crate::{Criterion, FilterCondition, SearchResult};
use crate::update::{IndexDocuments, UpdateFormat}; use crate::update::{IndexDocuments, UpdateFormat};
use super::*; use super::*;
@@ -611,16 +621,16 @@ mod tests {
} }
#[test] #[test]
fn set_faceted_fields() { fn set_filterable_fields() {
let path = tempfile::tempdir().unwrap(); let path = tempfile::tempdir().unwrap();
let mut options = EnvOpenOptions::new(); let mut options = EnvOpenOptions::new();
options.map_size(10 * 1024 * 1024); // 10 MB options.map_size(10 * 1024 * 1024); // 10 MB
let index = Index::new(options, &path).unwrap(); let index = Index::new(options, &path).unwrap();
// Set the faceted fields to be the age. // Set the filterable fields to be the age.
let mut wtxn = index.write_txn().unwrap(); let mut wtxn = index.write_txn().unwrap();
let mut builder = Settings::new(&mut wtxn, &index, 0); let mut builder = Settings::new(&mut wtxn, &index, 0);
builder.set_faceted_fields(hashset!{ S("age") }); builder.set_filterable_fields(hashset!{ S("age") });
builder.execute(|_, _| ()).unwrap(); builder.execute(|_, _| ()).unwrap();
// Then index some documents. // Then index some documents.
@@ -637,7 +647,7 @@ mod tests {
// Check that the displayed fields are correctly set. // Check that the displayed fields are correctly set.
let rtxn = index.read_txn().unwrap(); let rtxn = index.read_txn().unwrap();
let fields_ids = index.faceted_fields(&rtxn).unwrap(); let fields_ids = index.filterable_fields(&rtxn).unwrap();
assert_eq!(fields_ids, hashset!{ S("age") }); assert_eq!(fields_ids, hashset!{ S("age") });
// Only count the field_id 0 and level 0 facet values. // Only count the field_id 0 and level 0 facet values.
// TODO we must support typed CSVs for numbers to be understood. // TODO we must support typed CSVs for numbers to be understood.
@@ -670,6 +680,88 @@ mod tests {
assert_eq!(count, 4); assert_eq!(count, 4);
} }
#[test]
fn set_asc_desc_field() {
let path = tempfile::tempdir().unwrap();
let mut options = EnvOpenOptions::new();
options.map_size(10 * 1024 * 1024); // 10 MB
let index = Index::new(options, &path).unwrap();
// Set the filterable fields to be the age.
let mut wtxn = index.write_txn().unwrap();
let mut builder = Settings::new(&mut wtxn, &index, 0);
// Don't display the generated `id` field.
builder.set_displayed_fields(vec![S("name"), S("age")]);
builder.set_criteria(vec![S("asc(age)")]);
builder.execute(|_, _| ()).unwrap();
// Then index some documents.
let content = &br#"[
{ "name": "kevin", "age": 23 },
{ "name": "kevina", "age": 21 },
{ "name": "benoit", "age": 34 }
]"#[..];
let mut builder = IndexDocuments::new(&mut wtxn, &index, 1);
builder.update_format(UpdateFormat::Json);
builder.enable_autogenerate_docids();
builder.execute(content, |_, _| ()).unwrap();
wtxn.commit().unwrap();
// Run an empty query just to ensure that the search results are ordered.
let rtxn = index.read_txn().unwrap();
let SearchResult { documents_ids, .. } = index.search(&rtxn).execute().unwrap();
let documents = index.documents(&rtxn, documents_ids).unwrap();
// Fetch the documents "age" field in the ordre in which the documents appear.
let age_field_id = index.fields_ids_map(&rtxn).unwrap().id("age").unwrap();
let iter = documents.into_iter().map(|(_, doc)| {
let bytes = doc.get(age_field_id).unwrap();
let string = std::str::from_utf8(bytes).unwrap();
string.parse::<u32>().unwrap()
});
assert_eq!(iter.collect::<Vec<_>>(), vec![21, 23, 34]);
}
#[test]
fn set_distinct_field() {
let path = tempfile::tempdir().unwrap();
let mut options = EnvOpenOptions::new();
options.map_size(10 * 1024 * 1024); // 10 MB
let index = Index::new(options, &path).unwrap();
// Set the filterable fields to be the age.
let mut wtxn = index.write_txn().unwrap();
let mut builder = Settings::new(&mut wtxn, &index, 0);
// Don't display the generated `id` field.
builder.set_displayed_fields(vec![S("name"), S("age")]);
builder.set_distinct_field(S("age"));
builder.execute(|_, _| ()).unwrap();
// Then index some documents.
let content = &br#"[
{ "name": "kevin", "age": 23 },
{ "name": "kevina", "age": 21 },
{ "name": "benoit", "age": 34 },
{ "name": "bernard", "age": 34 },
{ "name": "bertrand", "age": 34 },
{ "name": "bernie", "age": 34 },
{ "name": "ben", "age": 34 }
]"#[..];
let mut builder = IndexDocuments::new(&mut wtxn, &index, 1);
builder.update_format(UpdateFormat::Json);
builder.enable_autogenerate_docids();
builder.execute(content, |_, _| ()).unwrap();
wtxn.commit().unwrap();
// Run an empty query just to ensure that the search results are ordered.
let rtxn = index.read_txn().unwrap();
let SearchResult { documents_ids, .. } = index.search(&rtxn).execute().unwrap();
// There must be at least one document with a 34 as the age.
assert_eq!(documents_ids.len(), 3);
}
#[test] #[test]
fn default_stop_words() { fn default_stop_words() {
let path = tempfile::tempdir().unwrap(); let path = tempfile::tempdir().unwrap();
@@ -833,7 +925,7 @@ mod tests {
let mut wtxn = index.write_txn().unwrap(); let mut wtxn = index.write_txn().unwrap();
let mut builder = Settings::new(&mut wtxn, &index, 0); let mut builder = Settings::new(&mut wtxn, &index, 0);
builder.set_displayed_fields(vec!["hello".to_string()]); builder.set_displayed_fields(vec!["hello".to_string()]);
builder.set_faceted_fields(hashset!{ S("age"), S("toto") }); builder.set_filterable_fields(hashset!{ S("age"), S("toto") });
builder.set_criteria(vec!["asc(toto)".to_string()]); builder.set_criteria(vec!["asc(toto)".to_string()]);
builder.execute(|_, _| ()).unwrap(); builder.execute(|_, _| ()).unwrap();
wtxn.commit().unwrap(); wtxn.commit().unwrap();
@@ -858,4 +950,24 @@ mod tests {
assert!(index.primary_key(&rtxn).unwrap().is_none()); assert!(index.primary_key(&rtxn).unwrap().is_none());
assert_eq!(vec![Criterion::Asc("toto".to_string())], index.criteria(&rtxn).unwrap()); assert_eq!(vec![Criterion::Asc("toto".to_string())], index.criteria(&rtxn).unwrap());
} }
#[test]
fn setting_not_filterable_cant_filter() {
let path = tempfile::tempdir().unwrap();
let mut options = EnvOpenOptions::new();
options.map_size(10 * 1024 * 1024); // 10 MB
let index = Index::new(options, &path).unwrap();
// Set all the settings except searchable
let mut wtxn = index.write_txn().unwrap();
let mut builder = Settings::new(&mut wtxn, &index, 0);
builder.set_displayed_fields(vec!["hello".to_string()]);
// It is only Asc(toto), there is a facet database but it is denied to filter with toto.
builder.set_criteria(vec!["asc(toto)".to_string()]);
builder.execute(|_, _| ()).unwrap();
wtxn.commit().unwrap();
let rtxn = index.read_txn().unwrap();
FilterCondition::from_str(&rtxn, &index, "toto = 32").unwrap_err();
}
} }