mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-08-02 11:50:03 +00:00
Remove old query_tree code and make clippy happy
This commit is contained in:
@ -1,21 +1,14 @@
|
||||
pub use self::facet::{FacetDistribution, Filter, DEFAULT_VALUES_PER_FACET};
|
||||
use self::fst_utils::{Complement, Intersection, StartsWith, Union};
|
||||
pub use self::matches::{
|
||||
FormatOptions, MatchBounds, Matcher, MatcherBuilder, MatchingWord, MatchingWords,
|
||||
};
|
||||
use crate::{
|
||||
execute_search, AscDesc, DefaultSearchLogger, DocumentId, Index, Result, SearchContext,
|
||||
};
|
||||
use fst::automaton::Str;
|
||||
use fst::{Automaton, IntoStreamer, Streamer};
|
||||
use levenshtein_automata::{LevenshteinAutomatonBuilder as LevBuilder, DFA};
|
||||
use once_cell::sync::Lazy;
|
||||
use roaring::bitmap::RoaringBitmap;
|
||||
use std::borrow::Cow;
|
||||
use std::collections::hash_map::{Entry, HashMap};
|
||||
use std::fmt;
|
||||
use std::result::Result as StdResult;
|
||||
use std::str::Utf8Error;
|
||||
|
||||
// Building these factories is not free.
|
||||
static LEVDIST0: Lazy<LevBuilder> = Lazy::new(|| LevBuilder::new(0, true));
|
||||
@ -26,7 +19,6 @@ pub mod facet;
|
||||
mod fst_utils;
|
||||
mod matches;
|
||||
pub mod new;
|
||||
mod query_tree;
|
||||
|
||||
pub struct Search<'a> {
|
||||
query: Option<String>,
|
||||
@ -200,70 +192,6 @@ impl Default for TermsMatchingStrategy {
|
||||
}
|
||||
}
|
||||
|
||||
pub type WordDerivationsCache = HashMap<(String, bool, u8), Vec<(String, u8)>>;
|
||||
|
||||
pub fn word_derivations<'c>(
|
||||
word: &str,
|
||||
is_prefix: bool,
|
||||
max_typo: u8,
|
||||
fst: &fst::Set<Cow<[u8]>>,
|
||||
cache: &'c mut WordDerivationsCache,
|
||||
) -> StdResult<&'c [(String, u8)], Utf8Error> {
|
||||
match cache.entry((word.to_string(), is_prefix, max_typo)) {
|
||||
Entry::Occupied(entry) => Ok(entry.into_mut()),
|
||||
Entry::Vacant(entry) => {
|
||||
// println!("word derivations {word} {is_prefix} {max_typo}");
|
||||
let mut derived_words = Vec::new();
|
||||
if max_typo == 0 {
|
||||
if is_prefix {
|
||||
let prefix = Str::new(word).starts_with();
|
||||
let mut stream = fst.search(prefix).into_stream();
|
||||
|
||||
while let Some(word) = stream.next() {
|
||||
let word = std::str::from_utf8(word)?;
|
||||
derived_words.push((word.to_string(), 0));
|
||||
}
|
||||
} else if fst.contains(word) {
|
||||
derived_words.push((word.to_string(), 0));
|
||||
}
|
||||
} else if max_typo == 1 {
|
||||
let dfa = build_dfa(word, 1, is_prefix);
|
||||
let starts = StartsWith(Str::new(get_first(word)));
|
||||
let mut stream = fst.search_with_state(Intersection(starts, &dfa)).into_stream();
|
||||
|
||||
while let Some((word, state)) = stream.next() {
|
||||
let word = std::str::from_utf8(word)?;
|
||||
let d = dfa.distance(state.1);
|
||||
derived_words.push((word.to_string(), d.to_u8()));
|
||||
}
|
||||
} else {
|
||||
let starts = StartsWith(Str::new(get_first(word)));
|
||||
let first = Intersection(build_dfa(word, 1, is_prefix), Complement(&starts));
|
||||
let second_dfa = build_dfa(word, 2, is_prefix);
|
||||
let second = Intersection(&second_dfa, &starts);
|
||||
let automaton = Union(first, &second);
|
||||
|
||||
let mut stream = fst.search_with_state(automaton).into_stream();
|
||||
|
||||
while let Some((found_word, state)) = stream.next() {
|
||||
let found_word = std::str::from_utf8(found_word)?;
|
||||
// in the case the typo is on the first letter, we know the number of typo
|
||||
// is two
|
||||
if get_first(found_word) != get_first(word) {
|
||||
derived_words.push((found_word.to_string(), 2));
|
||||
} else {
|
||||
// Else, we know that it is the second dfa that matched and compute the
|
||||
// correct distance
|
||||
let d = second_dfa.distance((state.1).0);
|
||||
derived_words.push((found_word.to_string(), d.to_u8()));
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(entry.insert(derived_words))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn get_first(s: &str) -> &str {
|
||||
match s.chars().next() {
|
||||
Some(c) => &s[..c.len_utf8()],
|
||||
@ -337,66 +265,66 @@ mod test {
|
||||
assert!(!search.is_typo_authorized().unwrap());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_one_typos_tolerance() {
|
||||
let fst = fst::Set::from_iter(["zealand"].iter()).unwrap().map_data(Cow::Owned).unwrap();
|
||||
let mut cache = HashMap::new();
|
||||
let found = word_derivations("zealend", false, 1, &fst, &mut cache).unwrap();
|
||||
// #[test]
|
||||
// fn test_one_typos_tolerance() {
|
||||
// let fst = fst::Set::from_iter(["zealand"].iter()).unwrap().map_data(Cow::Owned).unwrap();
|
||||
// let mut cache = HashMap::new();
|
||||
// let found = word_derivations("zealend", false, 1, &fst, &mut cache).unwrap();
|
||||
|
||||
assert_eq!(found, &[("zealand".to_string(), 1)]);
|
||||
}
|
||||
// assert_eq!(found, &[("zealand".to_string(), 1)]);
|
||||
// }
|
||||
|
||||
#[test]
|
||||
fn test_one_typos_first_letter() {
|
||||
let fst = fst::Set::from_iter(["zealand"].iter()).unwrap().map_data(Cow::Owned).unwrap();
|
||||
let mut cache = HashMap::new();
|
||||
let found = word_derivations("sealand", false, 1, &fst, &mut cache).unwrap();
|
||||
// #[test]
|
||||
// fn test_one_typos_first_letter() {
|
||||
// let fst = fst::Set::from_iter(["zealand"].iter()).unwrap().map_data(Cow::Owned).unwrap();
|
||||
// let mut cache = HashMap::new();
|
||||
// let found = word_derivations("sealand", false, 1, &fst, &mut cache).unwrap();
|
||||
|
||||
assert_eq!(found, &[]);
|
||||
}
|
||||
// assert_eq!(found, &[]);
|
||||
// }
|
||||
|
||||
#[test]
|
||||
fn test_two_typos_tolerance() {
|
||||
let fst = fst::Set::from_iter(["zealand"].iter()).unwrap().map_data(Cow::Owned).unwrap();
|
||||
let mut cache = HashMap::new();
|
||||
let found = word_derivations("zealemd", false, 2, &fst, &mut cache).unwrap();
|
||||
// #[test]
|
||||
// fn test_two_typos_tolerance() {
|
||||
// let fst = fst::Set::from_iter(["zealand"].iter()).unwrap().map_data(Cow::Owned).unwrap();
|
||||
// let mut cache = HashMap::new();
|
||||
// let found = word_derivations("zealemd", false, 2, &fst, &mut cache).unwrap();
|
||||
|
||||
assert_eq!(found, &[("zealand".to_string(), 2)]);
|
||||
}
|
||||
// assert_eq!(found, &[("zealand".to_string(), 2)]);
|
||||
// }
|
||||
|
||||
#[test]
|
||||
fn test_two_typos_first_letter() {
|
||||
let fst = fst::Set::from_iter(["zealand"].iter()).unwrap().map_data(Cow::Owned).unwrap();
|
||||
let mut cache = HashMap::new();
|
||||
let found = word_derivations("sealand", false, 2, &fst, &mut cache).unwrap();
|
||||
// #[test]
|
||||
// fn test_two_typos_first_letter() {
|
||||
// let fst = fst::Set::from_iter(["zealand"].iter()).unwrap().map_data(Cow::Owned).unwrap();
|
||||
// let mut cache = HashMap::new();
|
||||
// let found = word_derivations("sealand", false, 2, &fst, &mut cache).unwrap();
|
||||
|
||||
assert_eq!(found, &[("zealand".to_string(), 2)]);
|
||||
}
|
||||
// assert_eq!(found, &[("zealand".to_string(), 2)]);
|
||||
// }
|
||||
|
||||
#[test]
|
||||
fn test_prefix() {
|
||||
let fst = fst::Set::from_iter(["zealand"].iter()).unwrap().map_data(Cow::Owned).unwrap();
|
||||
let mut cache = HashMap::new();
|
||||
let found = word_derivations("ze", true, 0, &fst, &mut cache).unwrap();
|
||||
// #[test]
|
||||
// fn test_prefix() {
|
||||
// let fst = fst::Set::from_iter(["zealand"].iter()).unwrap().map_data(Cow::Owned).unwrap();
|
||||
// let mut cache = HashMap::new();
|
||||
// let found = word_derivations("ze", true, 0, &fst, &mut cache).unwrap();
|
||||
|
||||
assert_eq!(found, &[("zealand".to_string(), 0)]);
|
||||
}
|
||||
// assert_eq!(found, &[("zealand".to_string(), 0)]);
|
||||
// }
|
||||
|
||||
#[test]
|
||||
fn test_bad_prefix() {
|
||||
let fst = fst::Set::from_iter(["zealand"].iter()).unwrap().map_data(Cow::Owned).unwrap();
|
||||
let mut cache = HashMap::new();
|
||||
let found = word_derivations("se", true, 0, &fst, &mut cache).unwrap();
|
||||
// #[test]
|
||||
// fn test_bad_prefix() {
|
||||
// let fst = fst::Set::from_iter(["zealand"].iter()).unwrap().map_data(Cow::Owned).unwrap();
|
||||
// let mut cache = HashMap::new();
|
||||
// let found = word_derivations("se", true, 0, &fst, &mut cache).unwrap();
|
||||
|
||||
assert_eq!(found, &[]);
|
||||
}
|
||||
// assert_eq!(found, &[]);
|
||||
// }
|
||||
|
||||
#[test]
|
||||
fn test_prefix_with_typo() {
|
||||
let fst = fst::Set::from_iter(["zealand"].iter()).unwrap().map_data(Cow::Owned).unwrap();
|
||||
let mut cache = HashMap::new();
|
||||
let found = word_derivations("zae", true, 1, &fst, &mut cache).unwrap();
|
||||
// #[test]
|
||||
// fn test_prefix_with_typo() {
|
||||
// let fst = fst::Set::from_iter(["zealand"].iter()).unwrap().map_data(Cow::Owned).unwrap();
|
||||
// let mut cache = HashMap::new();
|
||||
// let found = word_derivations("zae", true, 1, &fst, &mut cache).unwrap();
|
||||
|
||||
assert_eq!(found, &[("zealand".to_string(), 1)]);
|
||||
}
|
||||
// assert_eq!(found, &[("zealand".to_string(), 1)]);
|
||||
// }
|
||||
}
|
||||
|
Reference in New Issue
Block a user