Merge branch 'main' into word-pair-proximity-docids-refactor

This commit is contained in:
Loïc Lecrenier
2022-10-24 15:23:00 +02:00
23 changed files with 135 additions and 154 deletions

View File

@ -7,9 +7,9 @@ use serde::Serialize;
pub mod matching_words;
const DEFAULT_CROP_MARKER: &'static str = "";
const DEFAULT_HIGHLIGHT_PREFIX: &'static str = "<em>";
const DEFAULT_HIGHLIGHT_SUFFIX: &'static str = "</em>";
const DEFAULT_CROP_MARKER: &str = "";
const DEFAULT_HIGHLIGHT_PREFIX: &str = "<em>";
const DEFAULT_HIGHLIGHT_SUFFIX: &str = "</em>";
/// Structure used to build a Matcher allowing to customize formating tags.
pub struct MatcherBuilder<'a, A> {

View File

@ -357,39 +357,36 @@ pub fn word_derivations<'c>(
} else if fst.contains(word) {
derived_words.push((word.to_string(), 0));
}
} else if max_typo == 1 {
let dfa = build_dfa(word, 1, is_prefix);
let starts = StartsWith(Str::new(get_first(word)));
let mut stream = fst.search_with_state(Intersection(starts, &dfa)).into_stream();
while let Some((word, state)) = stream.next() {
let word = std::str::from_utf8(word)?;
let d = dfa.distance(state.1);
derived_words.push((word.to_string(), d.to_u8()));
}
} else {
if max_typo == 1 {
let dfa = build_dfa(word, 1, is_prefix);
let starts = StartsWith(Str::new(get_first(word)));
let mut stream =
fst.search_with_state(Intersection(starts, &dfa)).into_stream();
let starts = StartsWith(Str::new(get_first(word)));
let first = Intersection(build_dfa(word, 1, is_prefix), Complement(&starts));
let second_dfa = build_dfa(word, 2, is_prefix);
let second = Intersection(&second_dfa, &starts);
let automaton = Union(first, &second);
while let Some((word, state)) = stream.next() {
let word = std::str::from_utf8(word)?;
let d = dfa.distance(state.1);
derived_words.push((word.to_string(), d.to_u8()));
}
} else {
let starts = StartsWith(Str::new(get_first(word)));
let first = Intersection(build_dfa(word, 1, is_prefix), Complement(&starts));
let second_dfa = build_dfa(word, 2, is_prefix);
let second = Intersection(&second_dfa, &starts);
let automaton = Union(first, &second);
let mut stream = fst.search_with_state(automaton).into_stream();
let mut stream = fst.search_with_state(automaton).into_stream();
while let Some((found_word, state)) = stream.next() {
let found_word = std::str::from_utf8(found_word)?;
// in the case the typo is on the first letter, we know the number of typo
// is two
if get_first(found_word) != get_first(word) {
derived_words.push((found_word.to_string(), 2));
} else {
// Else, we know that it is the second dfa that matched and compute the
// correct distance
let d = second_dfa.distance((state.1).0);
derived_words.push((found_word.to_string(), d.to_u8()));
}
while let Some((found_word, state)) = stream.next() {
let found_word = std::str::from_utf8(found_word)?;
// in the case the typo is on the first letter, we know the number of typo
// is two
if get_first(found_word) != get_first(word) {
derived_words.push((found_word.to_string(), 2));
} else {
// Else, we know that it is the second dfa that matched and compute the
// correct distance
let d = second_dfa.distance((state.1).0);
derived_words.push((found_word.to_string(), d.to_u8()));
}
}
}