Merge remote-tracking branch 'origin/main' into facet-levels-refactor

This commit is contained in:
Loïc Lecrenier
2022-10-26 15:13:34 +02:00
35 changed files with 132 additions and 149 deletions

View File

@ -120,7 +120,7 @@ impl<'t> Criterion for AscDesc<'t> {
let mut candidates = match (&self.query_tree, candidates) {
(_, Some(candidates)) => candidates,
(Some(qt), None) => {
let context = CriteriaBuilder::new(&self.rtxn, &self.index)?;
let context = CriteriaBuilder::new(self.rtxn, self.index)?;
resolve_query_tree(&context, qt, params.wdcache)?
}
(None, None) => self.index.documents_ids(self.rtxn)?,

View File

@ -89,7 +89,7 @@ impl<'t> Criterion for Attribute<'t> {
}
}
} else {
let mut set_buckets = match self.set_buckets.as_mut() {
let set_buckets = match self.set_buckets.as_mut() {
Some(set_buckets) => set_buckets,
None => {
let new_buckets = initialize_set_buckets(
@ -102,7 +102,7 @@ impl<'t> Criterion for Attribute<'t> {
}
};
match set_compute_candidates(&mut set_buckets, &allowed_candidates)? {
match set_compute_candidates(set_buckets, &allowed_candidates)? {
Some((_score, candidates)) => candidates,
None => {
return Ok(Some(CriterionResult {
@ -199,18 +199,18 @@ impl<'t> QueryPositionIterator<'t> {
let iter = ctx.word_position_iterator(word, in_prefix_cache)?;
inner.push(iter.peekable());
} else {
for (word, _) in word_derivations(&word, true, 0, ctx.words_fst(), wdcache)?
for (word, _) in word_derivations(word, true, 0, ctx.words_fst(), wdcache)?
{
let iter = ctx.word_position_iterator(&word, in_prefix_cache)?;
let iter = ctx.word_position_iterator(word, in_prefix_cache)?;
inner.push(iter.peekable());
}
}
}
QueryKind::Tolerant { typo, word } => {
for (word, _) in
word_derivations(&word, query.prefix, *typo, ctx.words_fst(), wdcache)?
word_derivations(word, query.prefix, *typo, ctx.words_fst(), wdcache)?
{
let iter = ctx.word_position_iterator(&word, in_prefix_cache)?;
let iter = ctx.word_position_iterator(word, in_prefix_cache)?;
inner.push(iter.peekable());
}
}
@ -476,8 +476,7 @@ fn initialize_linear_buckets(
} else {
words_positions
.get(word)
.map(|positions| positions.iter().next())
.flatten()
.and_then(|positions| positions.iter().next())
}
}
QueryKind::Tolerant { typo, word } => {
@ -574,7 +573,7 @@ fn flatten_query_tree(query_tree: &Operation) -> FlattenedQueryTree {
if ops.iter().all(|op| op.query().is_some()) {
vec![vec![ops.iter().flat_map(|op| op.query()).cloned().collect()]]
} else {
ops.iter().map(recurse).flatten().collect()
ops.iter().flat_map(recurse).collect()
}
}
Phrase(words) => {

View File

@ -90,7 +90,7 @@ impl Criterion for Geo<'_> {
let mut candidates = match (&query_tree, candidates) {
(_, Some(candidates)) => candidates,
(Some(qt), None) => {
let context = CriteriaBuilder::new(&self.rtxn, &self.index)?;
let context = CriteriaBuilder::new(self.rtxn, self.index)?;
resolve_query_tree(&context, qt, params.wdcache)?
}
(None, None) => self.index.documents_ids(self.rtxn)?,

View File

@ -44,7 +44,7 @@ impl<D: Distinct> Criterion for Initial<'_, D> {
let mut candidates = resolve_query_tree(
self.ctx,
answer.query_tree.as_ref().unwrap(),
&mut params.wdcache,
params.wdcache,
)?;
// Apply the filters on the documents retrieved with the query tree.

View File

@ -186,19 +186,19 @@ impl<'c> Context<'c> for CriteriaBuilder<'c> {
}
fn word_docids(&self, word: &str) -> heed::Result<Option<RoaringBitmap>> {
self.index.word_docids.get(self.rtxn, &word)
self.index.word_docids.get(self.rtxn, word)
}
fn exact_word_docids(&self, word: &str) -> heed::Result<Option<RoaringBitmap>> {
self.index.exact_word_docids.get(self.rtxn, &word)
self.index.exact_word_docids.get(self.rtxn, word)
}
fn word_prefix_docids(&self, word: &str) -> heed::Result<Option<RoaringBitmap>> {
self.index.word_prefix_docids.get(self.rtxn, &word)
self.index.word_prefix_docids.get(self.rtxn, word)
}
fn exact_word_prefix_docids(&self, word: &str) -> heed::Result<Option<RoaringBitmap>> {
self.index.exact_word_prefix_docids.get(self.rtxn, &word)
self.index.exact_word_prefix_docids.get(self.rtxn, word)
}
fn word_pair_proximity_docids(
@ -321,7 +321,7 @@ impl<'t> CriteriaBuilder<'t> {
exhaustive_number_hits,
distinct,
)) as Box<dyn Criterion>;
for name in self.index.criteria(&self.rtxn)? {
for name in self.index.criteria(self.rtxn)? {
criterion = match name {
Name::Words => Box::new(Words::new(self, criterion)),
Name::Typo => Box::new(Typo::new(self, criterion)),
@ -330,29 +330,23 @@ impl<'t> CriteriaBuilder<'t> {
for asc_desc in sort_criteria {
criterion = match asc_desc {
AscDescName::Asc(Member::Field(field)) => Box::new(AscDesc::asc(
&self.index,
&self.rtxn,
self.index,
self.rtxn,
criterion,
field.to_string(),
)?),
AscDescName::Desc(Member::Field(field)) => Box::new(AscDesc::desc(
&self.index,
&self.rtxn,
self.index,
self.rtxn,
criterion,
field.to_string(),
)?),
AscDescName::Asc(Member::Geo(point)) => Box::new(Geo::asc(
&self.index,
&self.rtxn,
criterion,
point.clone(),
)?),
AscDescName::Desc(Member::Geo(point)) => Box::new(Geo::desc(
&self.index,
&self.rtxn,
criterion,
point.clone(),
)?),
AscDescName::Asc(Member::Geo(point)) => {
Box::new(Geo::asc(self.index, self.rtxn, criterion, *point)?)
}
AscDescName::Desc(Member::Geo(point)) => {
Box::new(Geo::desc(self.index, self.rtxn, criterion, *point)?)
}
};
}
criterion
@ -363,10 +357,10 @@ impl<'t> CriteriaBuilder<'t> {
Name::Attribute => Box::new(Attribute::new(self, criterion)),
Name::Exactness => Box::new(Exactness::new(self, criterion, &primitive_query)?),
Name::Asc(field) => {
Box::new(AscDesc::asc(&self.index, &self.rtxn, criterion, field)?)
Box::new(AscDesc::asc(self.index, self.rtxn, criterion, field)?)
}
Name::Desc(field) => {
Box::new(AscDesc::desc(&self.index, &self.rtxn, criterion, field)?)
Box::new(AscDesc::desc(self.index, self.rtxn, criterion, field)?)
}
};
}
@ -408,7 +402,7 @@ pub fn resolve_query_tree(
}
Ok(candidates)
}
Phrase(words) => resolve_phrase(ctx, &words),
Phrase(words) => resolve_phrase(ctx, words),
Or(_, ops) => {
let mut candidates = RoaringBitmap::new();
for op in ops {
@ -457,7 +451,7 @@ pub fn resolve_phrase(ctx: &dyn Context, phrase: &[String]) -> Result<RoaringBit
}
// We sort the bitmaps so that we perform the small intersections first, which is faster.
bitmaps.sort_unstable_by(|a, b| a.len().cmp(&b.len()));
bitmaps.sort_unstable_by_key(|a| a.len());
for bitmap in bitmaps {
if first_iter {
@ -500,40 +494,40 @@ fn query_docids(
) -> Result<RoaringBitmap> {
match &query.kind {
QueryKind::Exact { word, original_typo } => {
if query.prefix && ctx.in_prefix_cache(&word) {
let mut docids = ctx.word_prefix_docids(&word)?.unwrap_or_default();
if query.prefix && ctx.in_prefix_cache(word) {
let mut docids = ctx.word_prefix_docids(word)?.unwrap_or_default();
// only add the exact docids if the word hasn't been derived
if *original_typo == 0 {
docids |= ctx.exact_word_prefix_docids(&word)?.unwrap_or_default();
docids |= ctx.exact_word_prefix_docids(word)?.unwrap_or_default();
}
Ok(docids)
} else if query.prefix {
let words = word_derivations(&word, true, 0, ctx.words_fst(), wdcache)?;
let words = word_derivations(word, true, 0, ctx.words_fst(), wdcache)?;
let mut docids = RoaringBitmap::new();
for (word, _typo) in words {
docids |= ctx.word_docids(&word)?.unwrap_or_default();
docids |= ctx.word_docids(word)?.unwrap_or_default();
// only add the exact docids if the word hasn't been derived
if *original_typo == 0 {
docids |= ctx.exact_word_docids(&word)?.unwrap_or_default();
docids |= ctx.exact_word_docids(word)?.unwrap_or_default();
}
}
Ok(docids)
} else {
let mut docids = ctx.word_docids(&word)?.unwrap_or_default();
let mut docids = ctx.word_docids(word)?.unwrap_or_default();
// only add the exact docids if the word hasn't been derived
if *original_typo == 0 {
docids |= ctx.exact_word_docids(&word)?.unwrap_or_default();
docids |= ctx.exact_word_docids(word)?.unwrap_or_default();
}
Ok(docids)
}
}
QueryKind::Tolerant { typo, word } => {
let words = word_derivations(&word, query.prefix, *typo, ctx.words_fst(), wdcache)?;
let words = word_derivations(word, query.prefix, *typo, ctx.words_fst(), wdcache)?;
let mut docids = RoaringBitmap::new();
for (word, typo) in words {
let mut current_docids = ctx.word_docids(&word)?.unwrap_or_default();
let mut current_docids = ctx.word_docids(word)?.unwrap_or_default();
if *typo == 0 {
current_docids |= ctx.exact_word_docids(&word)?.unwrap_or_default()
current_docids |= ctx.exact_word_docids(word)?.unwrap_or_default()
}
docids |= current_docids;
}
@ -568,11 +562,11 @@ fn query_pair_proximity_docids(
)? {
Some(docids) => Ok(docids),
None => {
let r_words = word_derivations(&right, true, 0, ctx.words_fst(), wdcache)?;
let r_words = word_derivations(right, true, 0, ctx.words_fst(), wdcache)?;
all_word_pair_overall_proximity_docids(
ctx,
&[(left, 0)],
&r_words,
r_words,
proximity,
)
}
@ -585,7 +579,7 @@ fn query_pair_proximity_docids(
}
(QueryKind::Tolerant { typo, word: left }, QueryKind::Exact { word: right, .. }) => {
let l_words =
word_derivations(&left, false, *typo, ctx.words_fst(), wdcache)?.to_owned();
word_derivations(left, false, *typo, ctx.words_fst(), wdcache)?.to_owned();
if prefix {
let mut docids = RoaringBitmap::new();
for (left, _) in l_words {
@ -598,11 +592,11 @@ fn query_pair_proximity_docids(
Some(docids) => Ok(docids),
None => {
let r_words =
word_derivations(&right, true, 0, ctx.words_fst(), wdcache)?;
word_derivations(right, true, 0, ctx.words_fst(), wdcache)?;
all_word_pair_overall_proximity_docids(
ctx,
&[(left, 0)],
&r_words,
r_words,
proximity,
)
}
@ -615,17 +609,17 @@ fn query_pair_proximity_docids(
}
}
(QueryKind::Exact { word: left, .. }, QueryKind::Tolerant { typo, word: right }) => {
let r_words = word_derivations(&right, prefix, *typo, ctx.words_fst(), wdcache)?;
all_word_pair_overall_proximity_docids(ctx, &[(left, 0)], &r_words, proximity)
let r_words = word_derivations(right, prefix, *typo, ctx.words_fst(), wdcache)?;
all_word_pair_overall_proximity_docids(ctx, &[(left, 0)], r_words, proximity)
}
(
QueryKind::Tolerant { typo: l_typo, word: left },
QueryKind::Tolerant { typo: r_typo, word: right },
) => {
let l_words =
word_derivations(&left, false, *l_typo, ctx.words_fst(), wdcache)?.to_owned();
let r_words = word_derivations(&right, prefix, *r_typo, ctx.words_fst(), wdcache)?;
all_word_pair_overall_proximity_docids(ctx, &l_words, &r_words, proximity)
word_derivations(left, false, *l_typo, ctx.words_fst(), wdcache)?.to_owned();
let r_words = word_derivations(right, prefix, *r_typo, ctx.words_fst(), wdcache)?;
all_word_pair_overall_proximity_docids(ctx, &l_words, r_words, proximity)
}
}
}

View File

@ -99,7 +99,7 @@ impl<'t> Criterion for Proximity<'t> {
// use set theory based algorithm
resolve_candidates(
self.ctx,
&query_tree,
query_tree,
self.proximity,
&mut self.candidates_cache,
params.wdcache,
@ -194,7 +194,7 @@ fn resolve_candidates<'t>(
.map(|w| Query { prefix: false, kind: QueryKind::exact(w.clone()) });
match (most_left, most_right) {
(Some(l), Some(r)) => vec![(l, r, resolve_phrase(ctx, &words)?)],
(Some(l), Some(r)) => vec![(l, r, resolve_phrase(ctx, words)?)],
_otherwise => Default::default(),
}
} else {
@ -496,7 +496,7 @@ fn resolve_plane_sweep_candidates(
match kind {
QueryKind::Exact { word, .. } => {
if *prefix {
let iter = word_derivations(word, true, 0, &words_positions)
let iter = word_derivations(word, true, 0, words_positions)
.flat_map(|positions| positions.iter().map(|p| (p, 0, p)));
result.extend(iter);
} else if let Some(positions) = words_positions.get(word) {
@ -504,7 +504,7 @@ fn resolve_plane_sweep_candidates(
}
}
QueryKind::Tolerant { typo, word } => {
let iter = word_derivations(word, *prefix, *typo, &words_positions)
let iter = word_derivations(word, *prefix, *typo, words_positions)
.flat_map(|positions| positions.iter().map(|p| (p, 0, p)));
result.extend(iter);
}

View File

@ -69,7 +69,7 @@ impl<'t> Criterion for Typo<'t> {
let fst = self.ctx.words_fst();
let new_query_tree = match self.typos {
typos if typos < MAX_TYPOS_PER_WORD => alterate_query_tree(
&fst,
fst,
query_tree.clone(),
self.typos,
params.wdcache,
@ -78,7 +78,7 @@ impl<'t> Criterion for Typo<'t> {
// When typos >= MAX_TYPOS_PER_WORD, no more alteration of the query tree is possible,
// we keep the altered query tree
*query_tree = alterate_query_tree(
&fst,
fst,
query_tree.clone(),
self.typos,
params.wdcache,
@ -199,7 +199,7 @@ fn alterate_query_tree(
ops.iter_mut().try_for_each(|op| recurse(words_fst, op, number_typos, wdcache))
}
// Because Phrases don't allow typos, no alteration can be done.
Phrase(_words) => return Ok(()),
Phrase(_words) => Ok(()),
Operation::Query(q) => {
if let QueryKind::Tolerant { typo, word } = &q.kind {
// if no typo is allowed we don't call word_derivations function,

View File

@ -53,10 +53,7 @@ impl<'t> Criterion for Words<'t> {
None => None,
};
let bucket_candidates = match self.bucket_candidates.as_mut() {
Some(bucket_candidates) => Some(take(bucket_candidates)),
None => None,
};
let bucket_candidates = self.bucket_candidates.as_mut().map(take);
return Ok(Some(CriterionResult {
query_tree: Some(query_tree),

View File

@ -69,7 +69,7 @@ impl<'a> FacetDistribution<'a> {
) -> heed::Result<()> {
match facet_type {
FacetType::Number => {
let mut key_buffer: Vec<_> = field_id.to_be_bytes().iter().copied().collect();
let mut key_buffer: Vec<_> = field_id.to_be_bytes().to_vec();
let distribution_prelength = distribution.len();
let db = self.index.field_id_docid_facet_f64s;
@ -94,7 +94,7 @@ impl<'a> FacetDistribution<'a> {
}
FacetType::String => {
let mut normalized_distribution = BTreeMap::new();
let mut key_buffer: Vec<_> = field_id.to_be_bytes().iter().copied().collect();
let mut key_buffer: Vec<_> = field_id.to_be_bytes().to_vec();
let db = self.index.field_id_docid_facet_strings;
'outer: for docid in candidates.into_iter() {

View File

@ -95,7 +95,7 @@ impl<'a> Filter<'a> {
Either::Left(array) => {
let mut ors = vec![];
for rule in array {
if let Some(filter) = Self::from_str(rule.as_ref())? {
if let Some(filter) = Self::from_str(rule)? {
ors.push(filter.condition);
}
}
@ -107,7 +107,7 @@ impl<'a> Filter<'a> {
}
}
Either::Right(rule) => {
if let Some(filter) = Self::from_str(rule.as_ref())? {
if let Some(filter) = Self::from_str(rule)? {
ands.push(filter.condition);
}
}
@ -300,7 +300,7 @@ impl<'a> Filter<'a> {
index,
filterable_fields,
)?;
return Ok(all_ids - selected);
Ok(all_ids - selected)
}
FilterCondition::In { fid, els } => {
if crate::is_faceted(fid.value(), filterable_fields) {
@ -319,38 +319,36 @@ impl<'a> Filter<'a> {
Ok(RoaringBitmap::new())
}
} else {
return Err(fid.as_external_error(FilterError::AttributeNotFilterable {
Err(fid.as_external_error(FilterError::AttributeNotFilterable {
attribute: fid.value(),
filterable_fields: filterable_fields.clone(),
}))?;
}))?
}
}
FilterCondition::Condition { fid, op } => {
if crate::is_faceted(fid.value(), filterable_fields) {
let field_ids_map = index.fields_ids_map(rtxn)?;
if let Some(fid) = field_ids_map.id(fid.value()) {
Self::evaluate_operator(rtxn, index, fid, &op)
Self::evaluate_operator(rtxn, index, fid, op)
} else {
return Ok(RoaringBitmap::new());
Ok(RoaringBitmap::new())
}
} else {
match fid.lexeme() {
attribute @ "_geo" => {
return Err(fid.as_external_error(FilterError::BadGeo(attribute)))?;
Err(fid.as_external_error(FilterError::BadGeo(attribute)))?
}
attribute if attribute.starts_with("_geoPoint(") => {
return Err(fid.as_external_error(FilterError::BadGeo("_geoPoint")))?;
Err(fid.as_external_error(FilterError::BadGeo("_geoPoint")))?
}
attribute @ "_geoDistance" => {
return Err(fid.as_external_error(FilterError::Reserved(attribute)))?;
Err(fid.as_external_error(FilterError::Reserved(attribute)))?
}
attribute => {
return Err(fid.as_external_error(
FilterError::AttributeNotFilterable {
attribute,
filterable_fields: filterable_fields.clone(),
},
))?;
Err(fid.as_external_error(FilterError::AttributeNotFilterable {
attribute,
filterable_fields: filterable_fields.clone(),
}))?
}
}
}
@ -419,10 +417,10 @@ impl<'a> Filter<'a> {
Ok(result)
} else {
return Err(point[0].as_external_error(FilterError::AttributeNotFilterable {
Err(point[0].as_external_error(FilterError::AttributeNotFilterable {
attribute: "_geo",
filterable_fields: filterable_fields.clone(),
}))?;
}))?
}
}
}

View File

@ -44,7 +44,7 @@ impl<'a> Iterator for MatchesIter<'a, '_> {
fn next(&mut self) -> Option<Self::Item> {
match self.inner.next() {
Some((matching_words, ids)) => match matching_words[0].match_token(&self.token) {
Some((matching_words, ids)) => match matching_words[0].match_token(self.token) {
Some(char_len) => {
if matching_words.len() > 1 {
Some(MatchType::Partial(PartialMatch {

View File

@ -49,16 +49,16 @@ impl<'a, A> MatcherBuilder<'a, A> {
pub fn build<'t, 'm>(&'m self, text: &'t str) -> Matcher<'t, 'm, A> {
let crop_marker = match &self.crop_marker {
Some(marker) => marker.as_str(),
None => &DEFAULT_CROP_MARKER,
None => DEFAULT_CROP_MARKER,
};
let highlight_prefix = match &self.highlight_prefix {
Some(marker) => marker.as_str(),
None => &DEFAULT_HIGHLIGHT_PREFIX,
None => DEFAULT_HIGHLIGHT_PREFIX,
};
let highlight_suffix = match &self.highlight_suffix {
Some(marker) => marker.as_str(),
None => &DEFAULT_HIGHLIGHT_SUFFIX,
None => DEFAULT_HIGHLIGHT_SUFFIX,
};
Matcher {
text,
@ -95,7 +95,7 @@ pub struct Match {
token_position: usize,
}
#[derive(Serialize, Debug, Clone, PartialEq)]
#[derive(Serialize, Debug, Clone, PartialEq, Eq)]
pub struct MatchBounds {
pub start: usize,
pub length: usize,
@ -131,7 +131,7 @@ impl<'t, A: AsRef<[u8]>> Matcher<'t, '_, A> {
potential_matches.push((token_position, word_position, partial.char_len()));
for (token_position, word_position, word) in words_positions {
partial = match partial.match_token(&word) {
partial = match partial.match_token(word) {
// token matches the partial match, but the match is not full,
// we temporarly save the current token then we try to match the next one.
Some(MatchType::Partial(partial)) => {

View File

@ -188,8 +188,8 @@ impl<'a> Context for QueryTreeBuilder<'a> {
}
fn min_word_len_for_typo(&self) -> heed::Result<(u8, u8)> {
let one = self.index.min_word_len_one_typo(&self.rtxn)?;
let two = self.index.min_word_len_two_typos(&self.rtxn)?;
let one = self.index.min_word_len_one_typo(self.rtxn)?;
let two = self.index.min_word_len_two_typos(self.rtxn)?;
Ok((one, two))
}
@ -207,7 +207,7 @@ impl<'a> Context for QueryTreeBuilder<'a> {
self.index
.word_pair_proximity_docids
.remap_data_type::<CboRoaringBitmapLenCodec>()
.get(&self.rtxn, &key)
.get(self.rtxn, &key)
}
}
@ -313,7 +313,7 @@ pub struct TypoConfig<'a> {
/// Return the `QueryKind` of a word depending on `authorize_typos`
/// and the provided word length.
fn typos<'a>(word: String, authorize_typos: bool, config: TypoConfig<'a>) -> QueryKind {
fn typos(word: String, authorize_typos: bool, config: TypoConfig) -> QueryKind {
if authorize_typos && !config.exact_words.map_or(false, |s| s.contains(&word)) {
let count = word.chars().count().min(u8::MAX as usize) as u8;
if count < config.word_len_one_typo {
@ -556,7 +556,7 @@ fn create_matching_words(
for synonym in synonyms {
let synonym = synonym
.into_iter()
.map(|syn| MatchingWord::new(syn.to_string(), 0, false))
.map(|syn| MatchingWord::new(syn, 0, false))
.collect();
matching_words.push((synonym, vec![id]));
}
@ -583,8 +583,7 @@ fn create_matching_words(
PrimitiveQueryPart::Phrase(words) => {
let ids: Vec<_> =
(0..words.len()).into_iter().map(|i| id + i as PrimitiveWordId).collect();
let words =
words.into_iter().map(|w| MatchingWord::new(w.to_string(), 0, false)).collect();
let words = words.into_iter().map(|w| MatchingWord::new(w, 0, false)).collect();
matching_words.push((words, ids));
}
}
@ -639,7 +638,7 @@ fn create_matching_words(
for synonym in synonyms {
let synonym = synonym
.into_iter()
.map(|syn| MatchingWord::new(syn.to_string(), 0, false))
.map(|syn| MatchingWord::new(syn, 0, false))
.collect();
matching_words.push((synonym, ids.clone()));
}