Compute split words derivations of terms that don't accept typos

This commit is contained in:
Loïc Lecrenier
2023-05-10 13:31:19 +02:00
parent 4a4210c116
commit 4d352a21ac
3 changed files with 109 additions and 50 deletions

View File

@ -28,11 +28,9 @@ pub enum ZeroOrOneTypo {
impl Interned<QueryTerm> {
pub fn compute_fully_if_needed(self, ctx: &mut SearchContext) -> Result<()> {
let s = ctx.term_interner.get_mut(self);
if s.max_nbr_typos == 0 {
s.one_typo = Lazy::Init(OneTypoTerm::default());
s.two_typo = Lazy::Init(TwoTypoTerm::default());
} else if s.max_nbr_typos == 1 && s.one_typo.is_uninit() {
if s.max_nbr_typos <= 1 && s.one_typo.is_uninit() {
assert!(s.two_typo.is_uninit());
// Initialize one_typo subterm even if max_nbr_typo is 0 because of split words
self.initialize_one_typo_subterm(ctx)?;
let s = ctx.term_interner.get_mut(self);
assert!(s.one_typo.is_init());
@ -277,7 +275,7 @@ fn find_split_words(ctx: &mut SearchContext, word: &str) -> Result<Option<Intern
impl Interned<QueryTerm> {
fn initialize_one_typo_subterm(self, ctx: &mut SearchContext) -> Result<()> {
let self_mut = ctx.term_interner.get_mut(self);
let QueryTerm { original, is_prefix, one_typo, .. } = self_mut;
let QueryTerm { original, is_prefix, one_typo, max_nbr_typos, .. } = self_mut;
let original = *original;
let is_prefix = *is_prefix;
// let original_str = ctx.word_interner.get(*original).to_owned();
@ -286,19 +284,22 @@ impl Interned<QueryTerm> {
}
let mut one_typo_words = BTreeSet::new();
find_zero_one_typo_derivations(ctx, original, is_prefix, |derived_word, nbr_typos| {
match nbr_typos {
ZeroOrOneTypo::Zero => {}
ZeroOrOneTypo::One => {
if one_typo_words.len() < limits::MAX_ONE_TYPO_COUNT {
one_typo_words.insert(derived_word);
} else {
return Ok(ControlFlow::Break(()));
if *max_nbr_typos > 0 {
find_zero_one_typo_derivations(ctx, original, is_prefix, |derived_word, nbr_typos| {
match nbr_typos {
ZeroOrOneTypo::Zero => {}
ZeroOrOneTypo::One => {
if one_typo_words.len() < limits::MAX_ONE_TYPO_COUNT {
one_typo_words.insert(derived_word);
} else {
return Ok(ControlFlow::Break(()));
}
}
}
}
Ok(ControlFlow::Continue(()))
})?;
Ok(ControlFlow::Continue(()))
})?;
}
let original_str = ctx.word_interner.get(original).to_owned();
let split_words = find_split_words(ctx, original_str.as_str())?;
@ -327,7 +328,7 @@ impl Interned<QueryTerm> {
}
fn initialize_one_and_two_typo_subterm(self, ctx: &mut SearchContext) -> Result<()> {
let self_mut = ctx.term_interner.get_mut(self);
let QueryTerm { original, is_prefix, two_typo, .. } = self_mut;
let QueryTerm { original, is_prefix, two_typo, max_nbr_typos, .. } = self_mut;
let original_str = ctx.word_interner.get(*original).to_owned();
if two_typo.is_init() {
return Ok(());
@ -335,34 +336,37 @@ impl Interned<QueryTerm> {
let mut one_typo_words = BTreeSet::new();
let mut two_typo_words = BTreeSet::new();
find_zero_one_two_typo_derivations(
*original,
*is_prefix,
ctx.index.words_fst(ctx.txn)?,
&mut ctx.word_interner,
|derived_word, nbr_typos| {
if one_typo_words.len() >= limits::MAX_ONE_TYPO_COUNT
&& two_typo_words.len() >= limits::MAX_TWO_TYPOS_COUNT
{
// No chance we will add either one- or two-typo derivations anymore, stop iterating.
return Ok(ControlFlow::Break(()));
}
match nbr_typos {
NumberOfTypos::Zero => {}
NumberOfTypos::One => {
if one_typo_words.len() < limits::MAX_ONE_TYPO_COUNT {
one_typo_words.insert(derived_word);
if *max_nbr_typos > 0 {
find_zero_one_two_typo_derivations(
*original,
*is_prefix,
ctx.index.words_fst(ctx.txn)?,
&mut ctx.word_interner,
|derived_word, nbr_typos| {
if one_typo_words.len() >= limits::MAX_ONE_TYPO_COUNT
&& two_typo_words.len() >= limits::MAX_TWO_TYPOS_COUNT
{
// No chance we will add either one- or two-typo derivations anymore, stop iterating.
return Ok(ControlFlow::Break(()));
}
match nbr_typos {
NumberOfTypos::Zero => {}
NumberOfTypos::One => {
if one_typo_words.len() < limits::MAX_ONE_TYPO_COUNT {
one_typo_words.insert(derived_word);
}
}
NumberOfTypos::Two => {
if two_typo_words.len() < limits::MAX_TWO_TYPOS_COUNT {
two_typo_words.insert(derived_word);
}
}
}
NumberOfTypos::Two => {
if two_typo_words.len() < limits::MAX_TWO_TYPOS_COUNT {
two_typo_words.insert(derived_word);
}
}
}
Ok(ControlFlow::Continue(()))
},
)?;
Ok(ControlFlow::Continue(()))
},
)?;
}
let split_words = find_split_words(ctx, original_str.as_str())?;
let self_mut = ctx.term_interner.get_mut(self);