Remove old query_tree code and make clippy happy

2025-10-11 22:26:25 +00:00 · 2023-03-23 09:39:16 +01:00
parent f5f5f03ec0
commit 7169d85115
8 changed files with 82 additions and 1590 deletions
--- a/milli/src/search/mod.rs
+++ b/milli/src/search/mod.rs
@@ -1,21 +1,14 @@
 pub use self::facet::{FacetDistribution, Filter, DEFAULT_VALUES_PER_FACET};
-use self::fst_utils::{Complement, Intersection, StartsWith, Union};
 pub use self::matches::{
    FormatOptions, MatchBounds, Matcher, MatcherBuilder, MatchingWord, MatchingWords,
 };
 use crate::{
    execute_search, AscDesc, DefaultSearchLogger, DocumentId, Index, Result, SearchContext,
 };
-use fst::automaton::Str;
-use fst::{Automaton, IntoStreamer, Streamer};
 use levenshtein_automata::{LevenshteinAutomatonBuilder as LevBuilder, DFA};
 use once_cell::sync::Lazy;
 use roaring::bitmap::RoaringBitmap;
-use std::borrow::Cow;
-use std::collections::hash_map::{Entry, HashMap};
 use std::fmt;
-use std::result::Result as StdResult;
-use std::str::Utf8Error;

 // Building these factories is not free.
 static LEVDIST0: Lazy<LevBuilder> = Lazy::new(|| LevBuilder::new(0, true));
@@ -26,7 +19,6 @@ pub mod facet;
 mod fst_utils;
 mod matches;
 pub mod new;
-mod query_tree;

 pub struct Search<'a> {
    query: Option<String>,
@@ -200,70 +192,6 @@ impl Default for TermsMatchingStrategy {
    }
 }

-pub type WordDerivationsCache = HashMap<(String, bool, u8), Vec<(String, u8)>>;
-
-pub fn word_derivations<'c>(
-    word: &str,
-    is_prefix: bool,
-    max_typo: u8,
-    fst: &fst::Set<Cow<[u8]>>,
-    cache: &'c mut WordDerivationsCache,
-) -> StdResult<&'c [(String, u8)], Utf8Error> {
-    match cache.entry((word.to_string(), is_prefix, max_typo)) {
-        Entry::Occupied(entry) => Ok(entry.into_mut()),
-        Entry::Vacant(entry) => {
-            // println!("word derivations {word} {is_prefix} {max_typo}");
-            let mut derived_words = Vec::new();
-            if max_typo == 0 {
-                if is_prefix {
-                    let prefix = Str::new(word).starts_with();
-                    let mut stream = fst.search(prefix).into_stream();
-
-                    while let Some(word) = stream.next() {
-                        let word = std::str::from_utf8(word)?;
-                        derived_words.push((word.to_string(), 0));
-                    }
-                } else if fst.contains(word) {
-                    derived_words.push((word.to_string(), 0));
-                }
-            } else if max_typo == 1 {
-                let dfa = build_dfa(word, 1, is_prefix);
-                let starts = StartsWith(Str::new(get_first(word)));
-                let mut stream = fst.search_with_state(Intersection(starts, &dfa)).into_stream();
-
-                while let Some((word, state)) = stream.next() {
-                    let word = std::str::from_utf8(word)?;
-                    let d = dfa.distance(state.1);
-                    derived_words.push((word.to_string(), d.to_u8()));
-                }
-            } else {
-                let starts = StartsWith(Str::new(get_first(word)));
-                let first = Intersection(build_dfa(word, 1, is_prefix), Complement(&starts));
-                let second_dfa = build_dfa(word, 2, is_prefix);
-                let second = Intersection(&second_dfa, &starts);
-                let automaton = Union(first, &second);
-
-                let mut stream = fst.search_with_state(automaton).into_stream();
-
-                while let Some((found_word, state)) = stream.next() {
-                    let found_word = std::str::from_utf8(found_word)?;
-                    // in the case the typo is on the first letter, we know the number of typo
-                    // is two
-                    if get_first(found_word) != get_first(word) {
-                        derived_words.push((found_word.to_string(), 2));
-                    } else {
-                        // Else, we know that it is the second dfa that matched and compute the
-                        // correct distance
-                        let d = second_dfa.distance((state.1).0);
-                        derived_words.push((found_word.to_string(), d.to_u8()));
-                    }
-                }
-            }
-            Ok(entry.insert(derived_words))
-        }
-    }
-}
-
 fn get_first(s: &str) -> &str {
    match s.chars().next() {
        Some(c) => &s[..c.len_utf8()],
@@ -337,66 +265,66 @@ mod test {
        assert!(!search.is_typo_authorized().unwrap());
    }

-    #[test]
-    fn test_one_typos_tolerance() {
-        let fst = fst::Set::from_iter(["zealand"].iter()).unwrap().map_data(Cow::Owned).unwrap();
-        let mut cache = HashMap::new();
-        let found = word_derivations("zealend", false, 1, &fst, &mut cache).unwrap();
+    // #[test]
+    // fn test_one_typos_tolerance() {
+    //     let fst = fst::Set::from_iter(["zealand"].iter()).unwrap().map_data(Cow::Owned).unwrap();
+    //     let mut cache = HashMap::new();
+    //     let found = word_derivations("zealend", false, 1, &fst, &mut cache).unwrap();

-        assert_eq!(found, &[("zealand".to_string(), 1)]);
-    }
+    //     assert_eq!(found, &[("zealand".to_string(), 1)]);
+    // }

-    #[test]
-    fn test_one_typos_first_letter() {
-        let fst = fst::Set::from_iter(["zealand"].iter()).unwrap().map_data(Cow::Owned).unwrap();
-        let mut cache = HashMap::new();
-        let found = word_derivations("sealand", false, 1, &fst, &mut cache).unwrap();
+    // #[test]
+    // fn test_one_typos_first_letter() {
+    //     let fst = fst::Set::from_iter(["zealand"].iter()).unwrap().map_data(Cow::Owned).unwrap();
+    //     let mut cache = HashMap::new();
+    //     let found = word_derivations("sealand", false, 1, &fst, &mut cache).unwrap();

-        assert_eq!(found, &[]);
-    }
+    //     assert_eq!(found, &[]);
+    // }

-    #[test]
-    fn test_two_typos_tolerance() {
-        let fst = fst::Set::from_iter(["zealand"].iter()).unwrap().map_data(Cow::Owned).unwrap();
-        let mut cache = HashMap::new();
-        let found = word_derivations("zealemd", false, 2, &fst, &mut cache).unwrap();
+    // #[test]
+    // fn test_two_typos_tolerance() {
+    //     let fst = fst::Set::from_iter(["zealand"].iter()).unwrap().map_data(Cow::Owned).unwrap();
+    //     let mut cache = HashMap::new();
+    //     let found = word_derivations("zealemd", false, 2, &fst, &mut cache).unwrap();

-        assert_eq!(found, &[("zealand".to_string(), 2)]);
-    }
+    //     assert_eq!(found, &[("zealand".to_string(), 2)]);
+    // }

-    #[test]
-    fn test_two_typos_first_letter() {
-        let fst = fst::Set::from_iter(["zealand"].iter()).unwrap().map_data(Cow::Owned).unwrap();
-        let mut cache = HashMap::new();
-        let found = word_derivations("sealand", false, 2, &fst, &mut cache).unwrap();
+    // #[test]
+    // fn test_two_typos_first_letter() {
+    //     let fst = fst::Set::from_iter(["zealand"].iter()).unwrap().map_data(Cow::Owned).unwrap();
+    //     let mut cache = HashMap::new();
+    //     let found = word_derivations("sealand", false, 2, &fst, &mut cache).unwrap();

-        assert_eq!(found, &[("zealand".to_string(), 2)]);
-    }
+    //     assert_eq!(found, &[("zealand".to_string(), 2)]);
+    // }

-    #[test]
-    fn test_prefix() {
-        let fst = fst::Set::from_iter(["zealand"].iter()).unwrap().map_data(Cow::Owned).unwrap();
-        let mut cache = HashMap::new();
-        let found = word_derivations("ze", true, 0, &fst, &mut cache).unwrap();
+    // #[test]
+    // fn test_prefix() {
+    //     let fst = fst::Set::from_iter(["zealand"].iter()).unwrap().map_data(Cow::Owned).unwrap();
+    //     let mut cache = HashMap::new();
+    //     let found = word_derivations("ze", true, 0, &fst, &mut cache).unwrap();

-        assert_eq!(found, &[("zealand".to_string(), 0)]);
-    }
+    //     assert_eq!(found, &[("zealand".to_string(), 0)]);
+    // }

-    #[test]
-    fn test_bad_prefix() {
-        let fst = fst::Set::from_iter(["zealand"].iter()).unwrap().map_data(Cow::Owned).unwrap();
-        let mut cache = HashMap::new();
-        let found = word_derivations("se", true, 0, &fst, &mut cache).unwrap();
+    // #[test]
+    // fn test_bad_prefix() {
+    //     let fst = fst::Set::from_iter(["zealand"].iter()).unwrap().map_data(Cow::Owned).unwrap();
+    //     let mut cache = HashMap::new();
+    //     let found = word_derivations("se", true, 0, &fst, &mut cache).unwrap();

-        assert_eq!(found, &[]);
-    }
+    //     assert_eq!(found, &[]);
+    // }

-    #[test]
-    fn test_prefix_with_typo() {
-        let fst = fst::Set::from_iter(["zealand"].iter()).unwrap().map_data(Cow::Owned).unwrap();
-        let mut cache = HashMap::new();
-        let found = word_derivations("zae", true, 1, &fst, &mut cache).unwrap();
+    // #[test]
+    // fn test_prefix_with_typo() {
+    //     let fst = fst::Set::from_iter(["zealand"].iter()).unwrap().map_data(Cow::Owned).unwrap();
+    //     let mut cache = HashMap::new();
+    //     let found = word_derivations("zae", true, 1, &fst, &mut cache).unwrap();

-        assert_eq!(found, &[("zealand".to_string(), 1)]);
-    }
+    //     assert_eq!(found, &[("zealand".to_string(), 1)]);
+    // }
 }