Merge branch 'search-refactor-typo-attributes' into search-refactor

This commit is contained in:
Loïc Lecrenier
2023-04-12 16:47:31 +02:00
10 changed files with 254 additions and 95 deletions

View File

@ -3,18 +3,18 @@ mod ntypo_subset;
mod parse_query;
mod phrase;
use super::interner::{DedupInterner, Interned};
use super::{limits, SearchContext};
use crate::Result;
use std::collections::BTreeSet;
use std::ops::RangeInclusive;
use compute_derivations::partially_initialized_term_from_word;
use either::Either;
pub use ntypo_subset::NTypoTermSubset;
pub use parse_query::{located_query_terms_from_string, make_ngram, number_of_typos_allowed};
pub use phrase::Phrase;
use compute_derivations::partially_initialized_term_from_word;
use super::interner::{DedupInterner, Interned};
use super::{limits, SearchContext, Word};
use crate::Result;
/// A set of word derivations attached to a location in the search query.
#[derive(Clone, PartialEq, Eq, Hash)]
@ -159,12 +159,12 @@ impl QueryTermSubset {
self.two_typo_subset.intersect(&other.two_typo_subset);
}
pub fn use_prefix_db(&self, ctx: &SearchContext) -> Option<Interned<String>> {
pub fn use_prefix_db(&self, ctx: &SearchContext) -> Option<Word> {
let original = ctx.term_interner.get(self.original);
let Some(use_prefix_db) = original.zero_typo.use_prefix_db else {
return None
};
match &self.zero_typo_subset {
let word = match &self.zero_typo_subset {
NTypoTermSubset::All => Some(use_prefix_db),
NTypoTermSubset::Subset { words, phrases: _ } => {
// TODO: use a subset of prefix words instead
@ -175,12 +175,19 @@ impl QueryTermSubset {
}
}
NTypoTermSubset::Nothing => None,
}
};
word.map(|word| {
if original.ngram_words.is_some() {
Word::Derived(word)
} else {
Word::Original(word)
}
})
}
pub fn all_single_words_except_prefix_db(
&self,
ctx: &mut SearchContext,
) -> Result<BTreeSet<Interned<String>>> {
) -> Result<BTreeSet<Word>> {
let mut result = BTreeSet::default();
// TODO: a compute_partially funtion
if !self.one_typo_subset.is_empty() || !self.two_typo_subset.is_empty() {
@ -197,8 +204,20 @@ impl QueryTermSubset {
synonyms: _,
use_prefix_db: _,
} = &original.zero_typo;
result.extend(zero_typo.iter().copied());
result.extend(prefix_of.iter().copied());
result.extend(zero_typo.iter().copied().map(|w| {
if original.ngram_words.is_some() {
Word::Derived(w)
} else {
Word::Original(w)
}
}));
result.extend(prefix_of.iter().copied().map(|w| {
if original.ngram_words.is_some() {
Word::Derived(w)
} else {
Word::Original(w)
}
}));
}
NTypoTermSubset::Subset { words, phrases: _ } => {
let ZeroTypoTerm {
@ -210,10 +229,20 @@ impl QueryTermSubset {
} = &original.zero_typo;
if let Some(zero_typo) = zero_typo {
if words.contains(zero_typo) {
result.insert(*zero_typo);
if original.ngram_words.is_some() {
result.insert(Word::Derived(*zero_typo));
} else {
result.insert(Word::Original(*zero_typo));
}
}
}
result.extend(prefix_of.intersection(words).copied());
result.extend(prefix_of.intersection(words).copied().map(|w| {
if original.ngram_words.is_some() {
Word::Derived(w)
} else {
Word::Original(w)
}
}));
}
NTypoTermSubset::Nothing => {}
}
@ -223,13 +252,13 @@ impl QueryTermSubset {
let Lazy::Init(OneTypoTerm { split_words: _, one_typo }) = &original.one_typo else {
panic!()
};
result.extend(one_typo.iter().copied())
result.extend(one_typo.iter().copied().map(Word::Derived))
}
NTypoTermSubset::Subset { words, phrases: _ } => {
let Lazy::Init(OneTypoTerm { split_words: _, one_typo }) = &original.one_typo else {
panic!()
};
result.extend(one_typo.intersection(words));
result.extend(one_typo.intersection(words).copied().map(Word::Derived));
}
NTypoTermSubset::Nothing => {}
};
@ -239,13 +268,13 @@ impl QueryTermSubset {
let Lazy::Init(TwoTypoTerm { two_typos }) = &original.two_typo else {
panic!()
};
result.extend(two_typos.iter().copied());
result.extend(two_typos.iter().copied().map(Word::Derived));
}
NTypoTermSubset::Subset { words, phrases: _ } => {
let Lazy::Init(TwoTypoTerm { two_typos }) = &original.two_typo else {
panic!()
};
result.extend(two_typos.intersection(words));
result.extend(two_typos.intersection(words).copied().map(Word::Derived));
}
NTypoTermSubset::Nothing => {}
};