mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-25 21:16:28 +00:00 
			
		
		
		
	clean warnings
This commit is contained in:
		| @@ -1,7 +1,7 @@ | ||||
| use std::borrow::Cow; | ||||
|  | ||||
| use crate::Index; | ||||
| use crate::search::word_typos; | ||||
| use crate::search::word_derivations; | ||||
|  | ||||
| use roaring::RoaringBitmap; | ||||
|  | ||||
| @@ -124,7 +124,7 @@ fn query_docids(ctx: &dyn Context, query: &Query) -> anyhow::Result<RoaringBitma | ||||
|             if query.prefix && ctx.in_prefix_cache(&word) { | ||||
|                 Ok(ctx.word_prefix_docids(&word)?.unwrap_or_default()) | ||||
|             } else if query.prefix { | ||||
|                 let words = word_typos(&word, true, 0, ctx.words_fst())?; | ||||
|                 let words = word_derivations(&word, true, 0, ctx.words_fst())?; | ||||
|                 let mut docids = RoaringBitmap::new(); | ||||
|                 for (word, _typo) in words { | ||||
|                     let current_docids = ctx.word_docids(&word)?.unwrap_or_default(); | ||||
| @@ -136,7 +136,7 @@ fn query_docids(ctx: &dyn Context, query: &Query) -> anyhow::Result<RoaringBitma | ||||
|             } | ||||
|         }, | ||||
|         QueryKind::Tolerant { typo, word } => { | ||||
|             let words = word_typos(&word, query.prefix, *typo, ctx.words_fst())?; | ||||
|             let words = word_derivations(&word, query.prefix, *typo, ctx.words_fst())?; | ||||
|             let mut docids = RoaringBitmap::new(); | ||||
|             for (word, _typo) in words { | ||||
|                 let current_docids = ctx.word_docids(&word)?.unwrap_or_default(); | ||||
| @@ -155,14 +155,14 @@ fn query_pair_proximity_docids(ctx: &dyn Context, left: &Query, right: &Query, p | ||||
|             if prefix && ctx.in_prefix_cache(&right) { | ||||
|                 Ok(ctx.word_prefix_pair_proximity_docids(left.as_str(), right.as_str(), proximity)?.unwrap_or_default()) | ||||
|             } else if prefix { | ||||
|                 let r_words = word_typos(&right, true, 0, ctx.words_fst())?; | ||||
|                 let r_words = word_derivations(&right, true, 0, ctx.words_fst())?; | ||||
|                 all_word_pair_proximity_docids(ctx, &[(left, 0)], &r_words, proximity) | ||||
|             } else { | ||||
|                 Ok(ctx.word_pair_proximity_docids(left.as_str(), right.as_str(), proximity)?.unwrap_or_default()) | ||||
|             } | ||||
|         }, | ||||
|         (QueryKind::Tolerant { typo, word: left }, QueryKind::Exact { word: right, .. }) => { | ||||
|             let l_words = word_typos(&left, false, *typo, ctx.words_fst())?; | ||||
|             let l_words = word_derivations(&left, false, *typo, ctx.words_fst())?; | ||||
|             if prefix && ctx.in_prefix_cache(&right) { | ||||
|                 let mut docids = RoaringBitmap::new(); | ||||
|                 for (left, _) in l_words { | ||||
| @@ -171,19 +171,19 @@ fn query_pair_proximity_docids(ctx: &dyn Context, left: &Query, right: &Query, p | ||||
|                 } | ||||
|                 Ok(docids) | ||||
|             } else if prefix { | ||||
|                 let r_words = word_typos(&right, true, 0, ctx.words_fst())?; | ||||
|                 let r_words = word_derivations(&right, true, 0, ctx.words_fst())?; | ||||
|                 all_word_pair_proximity_docids(ctx, &l_words, &r_words, proximity) | ||||
|             } else { | ||||
|                 all_word_pair_proximity_docids(ctx, &l_words, &[(right, 0)], proximity) | ||||
|             } | ||||
|         }, | ||||
|         (QueryKind::Exact { word: left, .. }, QueryKind::Tolerant { typo, word: right }) => { | ||||
|             let r_words = word_typos(&right, prefix, *typo, ctx.words_fst())?; | ||||
|             let r_words = word_derivations(&right, prefix, *typo, ctx.words_fst())?; | ||||
|             all_word_pair_proximity_docids(ctx, &[(left, 0)], &r_words, proximity) | ||||
|         }, | ||||
|         (QueryKind::Tolerant { typo: l_typo, word: left }, QueryKind::Tolerant { typo: r_typo, word: right }) => { | ||||
|             let l_words = word_typos(&left, false, *l_typo, ctx.words_fst())?; | ||||
|             let r_words = word_typos(&right, prefix, *r_typo, ctx.words_fst())?; | ||||
|             let l_words = word_derivations(&left, false, *l_typo, ctx.words_fst())?; | ||||
|             let r_words = word_derivations(&right, prefix, *r_typo, ctx.words_fst())?; | ||||
|             all_word_pair_proximity_docids(ctx, &l_words, &r_words, proximity) | ||||
|         }, | ||||
|     } | ||||
|   | ||||
| @@ -4,7 +4,7 @@ use anyhow::bail; | ||||
| use roaring::RoaringBitmap; | ||||
|  | ||||
| use crate::search::query_tree::{Operation, Query, QueryKind}; | ||||
| use crate::search::word_typos; | ||||
| use crate::search::word_derivations; | ||||
| use super::{Candidates, Criterion, CriterionResult, Context, query_docids, query_pair_proximity_docids}; | ||||
|  | ||||
| // FIXME we must stop when the number of typos is equal to | ||||
| @@ -177,7 +177,7 @@ fn alterate_query_tree( | ||||
|             }, | ||||
|             Operation::Query(q) => { | ||||
|                 if let QueryKind::Tolerant { typo, word } = &q.kind { | ||||
|                     // if no typo is allowed we don't call word_typos(..), | ||||
|                     // if no typo is allowed we don't call word_derivations function, | ||||
|                     // and directly create an Exact query | ||||
|                     if number_typos == 0 { | ||||
|                         *operation = Operation::Query(Query { | ||||
| @@ -190,7 +190,7 @@ fn alterate_query_tree( | ||||
|                         let words = if let Some(derivations) = typo_cache.get(&cache_key) { | ||||
|                             derivations.clone() | ||||
|                         } else { | ||||
|                             let derivations = word_typos(word, q.prefix, typo, words_fst)?; | ||||
|                             let derivations = word_derivations(word, q.prefix, typo, words_fst)?; | ||||
|                             typo_cache.insert(cache_key, derivations.clone()); | ||||
|                             derivations | ||||
|                         }; | ||||
| @@ -222,10 +222,6 @@ fn resolve_candidates<'t>( | ||||
|     cache: &mut HashMap<(Operation, u8), RoaringBitmap>, | ||||
| ) -> anyhow::Result<RoaringBitmap> | ||||
| { | ||||
|     // FIXME add a cache | ||||
|     // FIXME keep the cache between typos iterations | ||||
|     // cache: HashMap<(&Operation, u8), RoaringBitmap>, | ||||
|  | ||||
|     fn resolve_operation<'t>( | ||||
|         ctx: &'t dyn Context, | ||||
|         query_tree: &Operation, | ||||
|   | ||||
| @@ -1,26 +1,18 @@ | ||||
| use std::borrow::Cow; | ||||
| use std::collections::{HashMap, HashSet}; | ||||
| use std::collections::HashSet; | ||||
| use std::fmt; | ||||
| use std::time::Instant; | ||||
|  | ||||
| use anyhow::{bail, Context}; | ||||
| use fst::{IntoStreamer, Streamer, Set}; | ||||
| use levenshtein_automata::DFA; | ||||
| use levenshtein_automata::LevenshteinAutomatonBuilder as LevBuilder; | ||||
| use log::debug; | ||||
| use meilisearch_tokenizer::{AnalyzerConfig, Analyzer}; | ||||
| use once_cell::sync::Lazy; | ||||
| use ordered_float::OrderedFloat; | ||||
| use roaring::bitmap::RoaringBitmap; | ||||
|  | ||||
| use crate::facet::FacetType; | ||||
| use crate::heed_codec::facet::{FacetLevelValueF64Codec, FacetLevelValueI64Codec}; | ||||
| use crate::heed_codec::facet::{FieldDocIdFacetF64Codec, FieldDocIdFacetI64Codec}; | ||||
| use crate::mdfs::Mdfs; | ||||
| use crate::query_tokens::{query_tokens, QueryToken}; | ||||
| use crate::search::criteria::{Criterion, CriterionResult}; | ||||
| use crate::search::criteria::typo::Typo; | ||||
| use crate::{Index, FieldId, DocumentId}; | ||||
| use crate::{Index, DocumentId}; | ||||
|  | ||||
| pub use self::facet::{FacetCondition, FacetDistribution, FacetNumberOperator, FacetStringOperator}; | ||||
| pub use self::facet::{FacetIter}; | ||||
| @@ -69,198 +61,6 @@ impl<'a> Search<'a> { | ||||
|         self | ||||
|     } | ||||
|  | ||||
|     /// Extracts the query words from the query string and returns the DFAs accordingly. | ||||
|     /// TODO introduce settings for the number of typos regarding the words lengths. | ||||
|     fn generate_query_dfas(query: &str) -> Vec<(String, bool, DFA)> { | ||||
|         let (lev0, lev1, lev2) = (&LEVDIST0, &LEVDIST1, &LEVDIST2); | ||||
|  | ||||
|         let stop_words = Set::default(); | ||||
|         let analyzer = Analyzer::new(AnalyzerConfig::default_with_stopwords(&stop_words)); | ||||
|         let analyzed = analyzer.analyze(query); | ||||
|         let tokens = analyzed.tokens(); | ||||
|         let words: Vec<_> = query_tokens(tokens).collect(); | ||||
|  | ||||
|         let ends_with_whitespace = query.chars().last().map_or(false, char::is_whitespace); | ||||
|         let number_of_words = words.len(); | ||||
|  | ||||
|         words.into_iter().enumerate().map(|(i, word)| { | ||||
|             let (word, quoted) = match word { | ||||
|                 QueryToken::Free(token) => (token.text().to_string(), token.text().len() <= 3), | ||||
|                 QueryToken::Quoted(token) => (token.text().to_string(), true), | ||||
|             }; | ||||
|             let is_last = i + 1 == number_of_words; | ||||
|             let is_prefix = is_last && !ends_with_whitespace && !quoted; | ||||
|             let lev = match word.len() { | ||||
|                 0..=4 => if quoted { lev0 } else { lev0 }, | ||||
|                 5..=8 => if quoted { lev0 } else { lev1 }, | ||||
|                 _     => if quoted { lev0 } else { lev2 }, | ||||
|             }; | ||||
|  | ||||
|             let dfa = if is_prefix { | ||||
|                 lev.build_prefix_dfa(&word) | ||||
|             } else { | ||||
|                 lev.build_dfa(&word) | ||||
|             }; | ||||
|  | ||||
|             (word, is_prefix, dfa) | ||||
|         }) | ||||
|         .collect() | ||||
|     } | ||||
|  | ||||
|     /// Fetch the words from the given FST related to the given DFAs along with | ||||
|     /// the associated documents ids. | ||||
|     fn fetch_words_docids( | ||||
|         &self, | ||||
|         fst: &fst::Set<Cow<[u8]>>, | ||||
|         dfas: Vec<(String, bool, DFA)>, | ||||
|     ) -> anyhow::Result<Vec<(HashMap<String, (u8, RoaringBitmap)>, RoaringBitmap)>> | ||||
|     { | ||||
|         // A Vec storing all the derived words from the original query words, associated | ||||
|         // with the distance from the original word and the docids where the words appears. | ||||
|         let mut derived_words = Vec::<(HashMap::<String, (u8, RoaringBitmap)>, RoaringBitmap)>::with_capacity(dfas.len()); | ||||
|  | ||||
|         for (_word, _is_prefix, dfa) in dfas { | ||||
|  | ||||
|             let mut acc_derived_words = HashMap::new(); | ||||
|             let mut unions_docids = RoaringBitmap::new(); | ||||
|             let mut stream = fst.search_with_state(&dfa).into_stream(); | ||||
|             while let Some((word, state)) = stream.next() { | ||||
|  | ||||
|                 let word = std::str::from_utf8(word)?; | ||||
|                 let docids = self.index.word_docids.get(self.rtxn, word)?.unwrap(); | ||||
|                 let distance = dfa.distance(state); | ||||
|                 unions_docids.union_with(&docids); | ||||
|                 acc_derived_words.insert(word.to_string(), (distance.to_u8(), docids)); | ||||
|             } | ||||
|             derived_words.push((acc_derived_words, unions_docids)); | ||||
|         } | ||||
|  | ||||
|         Ok(derived_words) | ||||
|     } | ||||
|  | ||||
|     /// Returns the set of docids that contains all of the query words. | ||||
|     fn compute_candidates( | ||||
|         derived_words: &[(HashMap<String, (u8, RoaringBitmap)>, RoaringBitmap)], | ||||
|     ) -> RoaringBitmap | ||||
|     { | ||||
|         // We sort the derived words by inverse popularity, this way intersections are faster. | ||||
|         let mut derived_words: Vec<_> = derived_words.iter().collect(); | ||||
|         derived_words.sort_unstable_by_key(|(_, docids)| docids.len()); | ||||
|  | ||||
|         // we do a union between all the docids of each of the derived words, | ||||
|         // we got N unions (the number of original query words), we then intersect them. | ||||
|         let mut candidates = RoaringBitmap::new(); | ||||
|  | ||||
|         for (i, (_, union_docids)) in derived_words.iter().enumerate() { | ||||
|             if i == 0 { | ||||
|                 candidates = union_docids.clone(); | ||||
|             } else { | ||||
|                 candidates.intersect_with(&union_docids); | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         candidates | ||||
|     } | ||||
|  | ||||
|     fn facet_ordered( | ||||
|         &self, | ||||
|         field_id: FieldId, | ||||
|         facet_type: FacetType, | ||||
|         ascending: bool, | ||||
|         mut documents_ids: RoaringBitmap, | ||||
|         limit: usize, | ||||
|     ) -> anyhow::Result<Vec<DocumentId>> | ||||
|     { | ||||
|         let mut output: Vec<_> = match facet_type { | ||||
|             FacetType::Float => { | ||||
|                 if documents_ids.len() <= 1000 { | ||||
|                     let db = self.index.field_id_docid_facet_values.remap_key_type::<FieldDocIdFacetF64Codec>(); | ||||
|                     let mut docids_values = Vec::with_capacity(documents_ids.len() as usize); | ||||
|                     for docid in documents_ids.iter() { | ||||
|                         let left = (field_id, docid, f64::MIN); | ||||
|                         let right = (field_id, docid, f64::MAX); | ||||
|                         let mut iter = db.range(self.rtxn, &(left..=right))?; | ||||
|                         let entry = if ascending { iter.next() } else { iter.last() }; | ||||
|                         if let Some(((_, _, value), ())) = entry.transpose()? { | ||||
|                             docids_values.push((docid, OrderedFloat(value))); | ||||
|                         } | ||||
|                     } | ||||
|                     docids_values.sort_unstable_by_key(|(_, value)| *value); | ||||
|                     let iter = docids_values.into_iter().map(|(id, _)| id); | ||||
|                     if ascending { | ||||
|                         iter.take(limit).collect() | ||||
|                     } else { | ||||
|                         iter.rev().take(limit).collect() | ||||
|                     } | ||||
|                 } else { | ||||
|                     let facet_fn = if ascending { | ||||
|                         FacetIter::<f64, FacetLevelValueF64Codec>::new_reducing | ||||
|                     } else { | ||||
|                         FacetIter::<f64, FacetLevelValueF64Codec>::new_reverse_reducing | ||||
|                     }; | ||||
|                     let mut limit_tmp = limit; | ||||
|                     let mut output = Vec::new(); | ||||
|                     for result in facet_fn(self.rtxn, self.index, field_id, documents_ids.clone())? { | ||||
|                         let (_val, docids) = result?; | ||||
|                         limit_tmp = limit_tmp.saturating_sub(docids.len() as usize); | ||||
|                         output.push(docids); | ||||
|                         if limit_tmp == 0 { break } | ||||
|                     } | ||||
|                     output.into_iter().flatten().take(limit).collect() | ||||
|                 } | ||||
|             }, | ||||
|             FacetType::Integer => { | ||||
|                 if documents_ids.len() <= 1000 { | ||||
|                     let db = self.index.field_id_docid_facet_values.remap_key_type::<FieldDocIdFacetI64Codec>(); | ||||
|                     let mut docids_values = Vec::with_capacity(documents_ids.len() as usize); | ||||
|                     for docid in documents_ids.iter() { | ||||
|                         let left = (field_id, docid, i64::MIN); | ||||
|                         let right = (field_id, docid, i64::MAX); | ||||
|                         let mut iter = db.range(self.rtxn, &(left..=right))?; | ||||
|                         let entry = if ascending { iter.next() } else { iter.last() }; | ||||
|                         if let Some(((_, _, value), ())) = entry.transpose()? { | ||||
|                             docids_values.push((docid, value)); | ||||
|                         } | ||||
|                     } | ||||
|                     docids_values.sort_unstable_by_key(|(_, value)| *value); | ||||
|                     let iter = docids_values.into_iter().map(|(id, _)| id); | ||||
|                     if ascending { | ||||
|                         iter.take(limit).collect() | ||||
|                     } else { | ||||
|                         iter.rev().take(limit).collect() | ||||
|                     } | ||||
|                 } else { | ||||
|                     let facet_fn = if ascending { | ||||
|                         FacetIter::<i64, FacetLevelValueI64Codec>::new_reducing | ||||
|                     } else { | ||||
|                         FacetIter::<i64, FacetLevelValueI64Codec>::new_reverse_reducing | ||||
|                     }; | ||||
|                     let mut limit_tmp = limit; | ||||
|                     let mut output = Vec::new(); | ||||
|                     for result in facet_fn(self.rtxn, self.index, field_id, documents_ids.clone())? { | ||||
|                         let (_val, docids) = result?; | ||||
|                         limit_tmp = limit_tmp.saturating_sub(docids.len() as usize); | ||||
|                         output.push(docids); | ||||
|                         if limit_tmp == 0 { break } | ||||
|                     } | ||||
|                     output.into_iter().flatten().take(limit).collect() | ||||
|                 } | ||||
|             }, | ||||
|             FacetType::String => bail!("criteria facet type must be a number"), | ||||
|         }; | ||||
|  | ||||
|         // if there isn't enough documents to return we try to complete that list | ||||
|         // with documents that are maybe not faceted under this field and therefore | ||||
|         // not returned by the previous facet iteration. | ||||
|         if output.len() < limit { | ||||
|             output.iter().for_each(|n| { documents_ids.remove(*n); }); | ||||
|             let remaining = documents_ids.iter().take(limit - output.len()); | ||||
|             output.extend(remaining); | ||||
|         } | ||||
|  | ||||
|         Ok(output) | ||||
|     } | ||||
|  | ||||
|     pub fn execute(&self) -> anyhow::Result<SearchResult> { | ||||
|         // We create the query tree by spliting the query into tokens. | ||||
|         let before = Instant::now(); | ||||
| @@ -320,101 +120,6 @@ impl<'a> Search<'a> { | ||||
|  | ||||
|         let found_words = HashSet::new(); | ||||
|         Ok(SearchResult { found_words, candidates: initial_candidates, documents_ids }) | ||||
|  | ||||
|         // let order_by_facet = { | ||||
|         //     let criteria = self.index.criteria(self.rtxn)?; | ||||
|         //     let result = criteria.into_iter().flat_map(|criterion| { | ||||
|         //         match criterion { | ||||
|         //             Criterion::Asc(fid) => Some((fid, true)), | ||||
|         //             Criterion::Desc(fid) => Some((fid, false)), | ||||
|         //             _ => None | ||||
|         //         } | ||||
|         //     }).next(); | ||||
|         //     match result { | ||||
|         //         Some((attr_name, is_ascending)) => { | ||||
|         //             let field_id_map = self.index.fields_ids_map(self.rtxn)?; | ||||
|         //             let fid = field_id_map.id(&attr_name).with_context(|| format!("unknown field: {:?}", attr_name))?; | ||||
|         //             let faceted_fields = self.index.faceted_fields_ids(self.rtxn)?; | ||||
|         //             let ftype = *faceted_fields.get(&fid) | ||||
|         //                 .with_context(|| format!("{:?} not found in the faceted fields.", attr_name)) | ||||
|         //                 .expect("corrupted data: "); | ||||
|         //             Some((fid, ftype, is_ascending)) | ||||
|         //         }, | ||||
|         //         None => None, | ||||
|         //     } | ||||
|         // }; | ||||
|  | ||||
|         // let before = Instant::now(); | ||||
|         // let (candidates, derived_words) = match (facet_candidates, derived_words) { | ||||
|         //     (Some(mut facet_candidates), Some(derived_words)) => { | ||||
|         //         let words_candidates = Self::compute_candidates(&derived_words); | ||||
|         //         facet_candidates.intersect_with(&words_candidates); | ||||
|         //         (facet_candidates, derived_words) | ||||
|         //     }, | ||||
|         //     (None, Some(derived_words)) => { | ||||
|         //         (Self::compute_candidates(&derived_words), derived_words) | ||||
|         //     }, | ||||
|         //     (Some(facet_candidates), None) => { | ||||
|         //         // If the query is not set or results in no DFAs but | ||||
|         //         // there is some facet conditions we return a placeholder. | ||||
|         //         let documents_ids = match order_by_facet { | ||||
|         //             Some((fid, ftype, is_ascending)) => { | ||||
|         //                 self.facet_ordered(fid, ftype, is_ascending, facet_candidates.clone(), limit)? | ||||
|         //             }, | ||||
|         //             None => facet_candidates.iter().take(limit).collect(), | ||||
|         //         }; | ||||
|         //         return Ok(SearchResult { | ||||
|         //             documents_ids, | ||||
|         //             candidates: facet_candidates, | ||||
|         //             ..Default::default() | ||||
|         //         }) | ||||
|         //     }, | ||||
|         //     (None, None) => { | ||||
|         //         // If the query is not set or results in no DFAs we return a placeholder. | ||||
|         //         let all_docids = self.index.documents_ids(self.rtxn)?; | ||||
|         //         let documents_ids = match order_by_facet { | ||||
|         //             Some((fid, ftype, is_ascending)) => { | ||||
|         //                 self.facet_ordered(fid, ftype, is_ascending, all_docids.clone(), limit)? | ||||
|         //             }, | ||||
|         //             None => all_docids.iter().take(limit).collect(), | ||||
|         //         }; | ||||
|         //         return Ok(SearchResult { documents_ids, candidates: all_docids,..Default::default() }) | ||||
|         //     }, | ||||
|         // }; | ||||
|  | ||||
|         // debug!("candidates: {:?} took {:.02?}", candidates, before.elapsed()); | ||||
|  | ||||
|         // // The mana depth first search is a revised DFS that explore | ||||
|         // // solutions in the order of their proximities. | ||||
|         // let mut mdfs = Mdfs::new(self.index, self.rtxn, &derived_words, candidates.clone()); | ||||
|         // let mut documents = Vec::new(); | ||||
|  | ||||
|         // // We execute the Mdfs iterator until we find enough documents. | ||||
|         // while documents.iter().map(RoaringBitmap::len).sum::<u64>() < limit as u64 { | ||||
|         //     match mdfs.next().transpose()? { | ||||
|         //         Some((proximity, answer)) => { | ||||
|         //             debug!("answer with a proximity of {}: {:?}", proximity, answer); | ||||
|         //             documents.push(answer); | ||||
|         //         }, | ||||
|         //         None => break, | ||||
|         //     } | ||||
|         // } | ||||
|  | ||||
|         // let found_words = derived_words.into_iter().flat_map(|(w, _)| w).map(|(w, _)| w).collect(); | ||||
|         // let documents_ids = match order_by_facet { | ||||
|         //     Some((fid, ftype, order)) => { | ||||
|         //         let mut ordered_documents = Vec::new(); | ||||
|         //         for documents_ids in documents { | ||||
|         //             let docids = self.facet_ordered(fid, ftype, order, documents_ids, limit)?; | ||||
|         //             ordered_documents.push(docids); | ||||
|         //             if ordered_documents.iter().map(Vec::len).sum::<usize>() >= limit { break } | ||||
|         //         } | ||||
|         //         ordered_documents.into_iter().flatten().take(limit).collect() | ||||
|         //     }, | ||||
|         //     None => documents.into_iter().flatten().take(limit).collect(), | ||||
|         // }; | ||||
|  | ||||
|         // Ok(SearchResult { found_words, candidates, documents_ids }) | ||||
|     } | ||||
| } | ||||
|  | ||||
| @@ -438,19 +143,17 @@ pub struct SearchResult { | ||||
|     pub documents_ids: Vec<DocumentId>, | ||||
| } | ||||
|  | ||||
| pub fn word_typos(word: &str, is_prefix: bool, max_typo: u8, fst: &fst::Set<Cow<[u8]>>) -> anyhow::Result<Vec<(String, u8)>> { | ||||
|     let dfa = { | ||||
|         let lev = match max_typo { | ||||
|             0 => &LEVDIST0, | ||||
|             1 => &LEVDIST1, | ||||
|             _ => &LEVDIST2, | ||||
|         }; | ||||
| pub fn word_derivations(word: &str, is_prefix: bool, max_typo: u8, fst: &fst::Set<Cow<[u8]>>) -> anyhow::Result<Vec<(String, u8)>> { | ||||
|     let lev = match max_typo { | ||||
|         0 => &LEVDIST0, | ||||
|         1 => &LEVDIST1, | ||||
|         _ => &LEVDIST2, | ||||
|     }; | ||||
|  | ||||
|         if is_prefix { | ||||
|             lev.build_prefix_dfa(&word) | ||||
|         } else { | ||||
|             lev.build_dfa(&word) | ||||
|         } | ||||
|     let dfa = if is_prefix { | ||||
|         lev.build_prefix_dfa(&word) | ||||
|     } else { | ||||
|         lev.build_dfa(&word) | ||||
|     }; | ||||
|  | ||||
|     let mut derived_words = Vec::new(); | ||||
|   | ||||
| @@ -303,7 +303,7 @@ fn fetch_words(tree: &Operation, fst: &fst::Set<Cow<[u8]>>) -> FetchedWords { | ||||
|         match query.kind.clone() { | ||||
|             QueryKind::Exact { word, .. } => vec![(word, query.prefix)], | ||||
|             QueryKind::Tolerant { typo, word } => { | ||||
|                 if let Ok(words) = super::word_typos(&word, query.prefix, typo, fst) { | ||||
|                 if let Ok(words) = super::word_derivations(&word, query.prefix, typo, fst) { | ||||
|                     words.into_iter().map(|(w, _)| (w, query.prefix)).collect() | ||||
|                 } else { | ||||
|                     vec![(word, query.prefix)] | ||||
|   | ||||
		Reference in New Issue
	
	Block a user