mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-24 20:46:27 +00:00 
			
		
		
		
	Merge #227
227: Replace Consecutive by Phrase in query tree r=Kerollmops a=ManyTheFish Replace `Consecutive` by `Phrase` in the query tree in order to remove theoretical bugs, due to the `Consecutive` enum type. Co-authored-by: many <maxime@meilisearch.com> Co-authored-by: Many <legendre.maxime.isn@gmail.com>
This commit is contained in:
		| @@ -578,7 +578,6 @@ fn linear_compute_candidates( | |||||||
|     fn compute_candidate_rank(branches: &FlattenedQueryTree, words_positions: HashMap<String, RoaringBitmap>) -> u64 { |     fn compute_candidate_rank(branches: &FlattenedQueryTree, words_positions: HashMap<String, RoaringBitmap>) -> u64 { | ||||||
|         let mut min_rank = u64::max_value(); |         let mut min_rank = u64::max_value(); | ||||||
|         for branch in branches { |         for branch in branches { | ||||||
|  |  | ||||||
|             let branch_len = branch.len(); |             let branch_len = branch.len(); | ||||||
|             let mut branch_rank = Vec::with_capacity(branch_len); |             let mut branch_rank = Vec::with_capacity(branch_len); | ||||||
|             for derivates in branch { |             for derivates in branch { | ||||||
| @@ -661,7 +660,7 @@ fn linear_compute_candidates( | |||||||
|  |  | ||||||
| // TODO can we keep refs of Query | // TODO can we keep refs of Query | ||||||
| fn flatten_query_tree(query_tree: &Operation) -> FlattenedQueryTree { | fn flatten_query_tree(query_tree: &Operation) -> FlattenedQueryTree { | ||||||
|     use crate::search::criteria::Operation::{And, Or, Consecutive}; |     use crate::search::criteria::Operation::{And, Or, Phrase}; | ||||||
|  |  | ||||||
|     fn and_recurse(head: &Operation, tail: &[Operation]) -> FlattenedQueryTree { |     fn and_recurse(head: &Operation, tail: &[Operation]) -> FlattenedQueryTree { | ||||||
|         match tail.split_first() { |         match tail.split_first() { | ||||||
| @@ -683,7 +682,7 @@ fn flatten_query_tree(query_tree: &Operation) -> FlattenedQueryTree { | |||||||
|  |  | ||||||
|     fn recurse(op: &Operation) -> FlattenedQueryTree { |     fn recurse(op: &Operation) -> FlattenedQueryTree { | ||||||
|         match op { |         match op { | ||||||
|             And(ops) | Consecutive(ops) => { |             And(ops) => { | ||||||
|                 ops.split_first().map_or_else(Vec::new, |(h, t)| and_recurse(h, t)) |                 ops.split_first().map_or_else(Vec::new, |(h, t)| and_recurse(h, t)) | ||||||
|             }, |             }, | ||||||
|             Or(_, ops) => if ops.iter().all(|op| op.query().is_some()) { |             Or(_, ops) => if ops.iter().all(|op| op.query().is_some()) { | ||||||
| @@ -691,6 +690,12 @@ fn flatten_query_tree(query_tree: &Operation) -> FlattenedQueryTree { | |||||||
|             } else { |             } else { | ||||||
|                 ops.iter().map(recurse).flatten().collect() |                 ops.iter().map(recurse).flatten().collect() | ||||||
|             }, |             }, | ||||||
|  |             Phrase(words) => { | ||||||
|  |                 let queries = words.iter().map(|word| { | ||||||
|  |                     vec![Query {prefix: false, kind: QueryKind::exact(word.clone())}] | ||||||
|  |                 }).collect(); | ||||||
|  |                 vec![queries] | ||||||
|  |             } | ||||||
|             Operation::Query(query) => vec![vec![vec![query.clone()]]], |             Operation::Query(query) => vec![vec![vec![query.clone()]]], | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|   | |||||||
| @@ -1,7 +1,6 @@ | |||||||
| use std::collections::HashMap; | use std::collections::HashMap; | ||||||
| use std::borrow::Cow; | use std::borrow::Cow; | ||||||
|  |  | ||||||
| use anyhow::bail; |  | ||||||
| use roaring::RoaringBitmap; | use roaring::RoaringBitmap; | ||||||
|  |  | ||||||
| use crate::{FieldId, TreeLevel, search::{word_derivations, WordDerivationsCache}}; | use crate::{FieldId, TreeLevel, search::{word_derivations, WordDerivationsCache}}; | ||||||
| @@ -239,7 +238,7 @@ pub fn resolve_query_tree<'t>( | |||||||
|         wdcache: &mut WordDerivationsCache, |         wdcache: &mut WordDerivationsCache, | ||||||
|     ) -> anyhow::Result<RoaringBitmap> |     ) -> anyhow::Result<RoaringBitmap> | ||||||
|     { |     { | ||||||
|         use Operation::{And, Consecutive, Or, Query}; |         use Operation::{And, Phrase, Or, Query}; | ||||||
|  |  | ||||||
|         match query_tree { |         match query_tree { | ||||||
|             And(ops) => { |             And(ops) => { | ||||||
| @@ -261,26 +260,23 @@ pub fn resolve_query_tree<'t>( | |||||||
|                 } |                 } | ||||||
|                 Ok(candidates) |                 Ok(candidates) | ||||||
|             }, |             }, | ||||||
|             Consecutive(ops) => { |             Phrase(words) => { | ||||||
|                 let mut candidates = RoaringBitmap::new(); |                 let mut candidates = RoaringBitmap::new(); | ||||||
|                 let mut first_loop = true; |                 let mut first_loop = true; | ||||||
|                 for slice in ops.windows(2) { |                 for slice in words.windows(2) { | ||||||
|                     match (&slice[0], &slice[1]) { |                     let (left, right) = (&slice[0], &slice[1]); | ||||||
|                         (Operation::Query(left), Operation::Query(right)) => { |                     match ctx.word_pair_proximity_docids(left, right, 1)? { | ||||||
|                             match query_pair_proximity_docids(ctx, left, right, 1, wdcache)? { |                         Some(pair_docids) => { | ||||||
|                                 pair_docids if pair_docids.is_empty() => { |                             if pair_docids.is_empty() { | ||||||
|                                     return Ok(RoaringBitmap::new()) |                                 return Ok(RoaringBitmap::new()); | ||||||
|                                 }, |                             } else if first_loop { | ||||||
|                                 pair_docids if first_loop => { |                                 candidates = pair_docids; | ||||||
|                                     candidates = pair_docids; |                                 first_loop = false; | ||||||
|                                     first_loop = false; |                             } else { | ||||||
|                                 }, |                                 candidates &= pair_docids; | ||||||
|                                 pair_docids => { |  | ||||||
|                                     candidates.intersect_with(&pair_docids); |  | ||||||
|                                 }, |  | ||||||
|                             } |                             } | ||||||
|                         }, |                         }, | ||||||
|                         _ => bail!("invalid consecutive query type"), |                         None => return Ok(RoaringBitmap::new()) | ||||||
|                     } |                     } | ||||||
|                 } |                 } | ||||||
|                 Ok(candidates) |                 Ok(candidates) | ||||||
|   | |||||||
| @@ -171,12 +171,33 @@ fn resolve_candidates<'t>( | |||||||
|         wdcache: &mut WordDerivationsCache, |         wdcache: &mut WordDerivationsCache, | ||||||
|     ) -> anyhow::Result<Vec<(Query, Query, RoaringBitmap)>> |     ) -> anyhow::Result<Vec<(Query, Query, RoaringBitmap)>> | ||||||
|     { |     { | ||||||
|         use Operation::{And, Consecutive, Or, Query}; |         use Operation::{And, Phrase, Or}; | ||||||
|  |  | ||||||
|         let result = match query_tree { |         let result = match query_tree { | ||||||
|             And(ops) => mdfs(ctx, ops, proximity, cache, wdcache)?, |             And(ops) => mdfs(ctx, ops, proximity, cache, wdcache)?, | ||||||
|             Consecutive(ops) => if proximity == 0 { |             Phrase(words) => if proximity == 0 { | ||||||
|                 mdfs(ctx, ops, 0, cache, wdcache)? |                 let most_left = words.first().map(|w| Query { prefix: false, kind: QueryKind::exact(w.clone()) }); | ||||||
|  |                 let most_right = words.last().map(|w| Query { prefix: false, kind: QueryKind::exact(w.clone()) }); | ||||||
|  |                 let mut candidates = None; | ||||||
|  |                 for slice in words.windows(2) { | ||||||
|  |                     let (left, right) = (&slice[0], &slice[1]); | ||||||
|  |                     match ctx.word_pair_proximity_docids(left, right, 1)? { | ||||||
|  |                         Some(pair_docids) => { | ||||||
|  |                             match candidates.as_mut() { | ||||||
|  |                                 Some(candidates) => *candidates &= pair_docids, | ||||||
|  |                                 None => candidates = Some(pair_docids), | ||||||
|  |                             } | ||||||
|  |                         }, | ||||||
|  |                         None => { | ||||||
|  |                             candidates = None; | ||||||
|  |                             break; | ||||||
|  |                         } | ||||||
|  |                     } | ||||||
|  |                 } | ||||||
|  |                 match (most_left, most_right, candidates) { | ||||||
|  |                     (Some(l), Some(r), Some(c)) => vec![(l, r, c)], | ||||||
|  |                     _otherwise => Default::default(), | ||||||
|  |                 } | ||||||
|             } else { |             } else { | ||||||
|                 Default::default() |                 Default::default() | ||||||
|             }, |             }, | ||||||
| @@ -188,7 +209,7 @@ fn resolve_candidates<'t>( | |||||||
|                 } |                 } | ||||||
|                 output |                 output | ||||||
|             }, |             }, | ||||||
|             Query(q) => if proximity == 0 { |             Operation::Query(q) => if proximity == 0 { | ||||||
|                 let candidates = query_docids(ctx, q, wdcache)?; |                 let candidates = query_docids(ctx, q, wdcache)?; | ||||||
|                 vec![(q.clone(), q.clone(), candidates)] |                 vec![(q.clone(), q.clone(), candidates)] | ||||||
|             } else { |             } else { | ||||||
| @@ -306,14 +327,9 @@ fn resolve_plane_sweep_candidates( | |||||||
| ) -> anyhow::Result<BTreeMap<u8, RoaringBitmap>> | ) -> anyhow::Result<BTreeMap<u8, RoaringBitmap>> | ||||||
| { | { | ||||||
|     /// FIXME may be buggy with query like "new new york" |     /// FIXME may be buggy with query like "new new york" | ||||||
|     fn plane_sweep<'a>( |     fn plane_sweep( | ||||||
|         ctx: &dyn Context, |         groups_positions: Vec<Vec<(Position, u8, Position)>>, | ||||||
|         operations: &'a [Operation], |  | ||||||
|         docid: DocumentId, |  | ||||||
|         consecutive: bool, |         consecutive: bool, | ||||||
|         rocache: &mut HashMap<&'a Operation, Vec<(Position, u8, Position)>>, |  | ||||||
|         words_positions: &HashMap<String, RoaringBitmap>, |  | ||||||
|         wdcache: &mut WordDerivationsCache, |  | ||||||
|     ) -> anyhow::Result<Vec<(Position, u8, Position)>> |     ) -> anyhow::Result<Vec<(Position, u8, Position)>> | ||||||
|     { |     { | ||||||
|         fn compute_groups_proximity( |         fn compute_groups_proximity( | ||||||
| @@ -362,13 +378,9 @@ fn resolve_plane_sweep_candidates( | |||||||
|             } |             } | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         let groups_len = operations.len(); |         let groups_len = groups_positions.len(); | ||||||
|         let mut groups_positions = Vec::with_capacity(groups_len); |  | ||||||
|  |  | ||||||
|         for operation in operations { |         let mut groups_positions: Vec<_> = groups_positions.into_iter().map(|pos| pos.into_iter()).collect(); | ||||||
|             let positions = resolve_operation(ctx, operation, docid, rocache, words_positions, wdcache)?; |  | ||||||
|             groups_positions.push(positions.into_iter()); |  | ||||||
|         } |  | ||||||
|  |  | ||||||
|         // Pop top elements of each list. |         // Pop top elements of each list. | ||||||
|         let mut current = Vec::with_capacity(groups_len); |         let mut current = Vec::with_capacity(groups_len); | ||||||
| @@ -441,15 +453,32 @@ fn resolve_plane_sweep_candidates( | |||||||
|         wdcache: &mut WordDerivationsCache, |         wdcache: &mut WordDerivationsCache, | ||||||
|     ) -> anyhow::Result<Vec<(Position, u8, Position)>> |     ) -> anyhow::Result<Vec<(Position, u8, Position)>> | ||||||
|     { |     { | ||||||
|         use Operation::{And, Consecutive, Or}; |         use Operation::{And, Phrase, Or}; | ||||||
|  |  | ||||||
|         if let Some(result) = rocache.get(query_tree) { |         if let Some(result) = rocache.get(query_tree) { | ||||||
|             return Ok(result.clone()); |             return Ok(result.clone()); | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         let result = match query_tree { |         let result = match query_tree { | ||||||
|             And(ops) => plane_sweep(ctx, ops, docid, false, rocache, words_positions, wdcache)?, |             And(ops) => { | ||||||
|             Consecutive(ops) => plane_sweep(ctx, ops, docid, true, rocache, words_positions, wdcache)?, |                  let mut groups_positions = Vec::with_capacity(ops.len()); | ||||||
|  |                 for operation in ops { | ||||||
|  |                     let positions = resolve_operation(ctx, operation, docid, rocache, words_positions, wdcache)?; | ||||||
|  |                     groups_positions.push(positions); | ||||||
|  |                 } | ||||||
|  |                 plane_sweep(groups_positions, false)? | ||||||
|  |             }, | ||||||
|  |             Phrase(words) => { | ||||||
|  |                 let mut groups_positions = Vec::with_capacity(words.len()); | ||||||
|  |                 for word in words { | ||||||
|  |                     let positions = match words_positions.get(word) { | ||||||
|  |                         Some(positions) => positions.iter().map(|p| (p, 0, p)).collect(), | ||||||
|  |                         None => vec![], | ||||||
|  |                     }; | ||||||
|  |                     groups_positions.push(positions); | ||||||
|  |                 } | ||||||
|  |                 plane_sweep(groups_positions, true)? | ||||||
|  |             }, | ||||||
|             Or(_, ops) => { |             Or(_, ops) => { | ||||||
|                 let mut result = Vec::new(); |                 let mut result = Vec::new(); | ||||||
|                 for op in ops { |                 for op in ops { | ||||||
|   | |||||||
| @@ -1,6 +1,5 @@ | |||||||
| use std::{borrow::Cow, collections::HashMap, mem::take}; | use std::{borrow::Cow, collections::HashMap, mem::take}; | ||||||
|  |  | ||||||
| use anyhow::bail; |  | ||||||
| use log::debug; | use log::debug; | ||||||
| use roaring::RoaringBitmap; | use roaring::RoaringBitmap; | ||||||
|  |  | ||||||
| @@ -13,7 +12,6 @@ use super::{ | |||||||
|     CriterionParameters, |     CriterionParameters, | ||||||
|     CriterionResult, |     CriterionResult, | ||||||
|     query_docids, |     query_docids, | ||||||
|     query_pair_proximity_docids, |  | ||||||
|     resolve_query_tree, |     resolve_query_tree, | ||||||
| }; | }; | ||||||
|  |  | ||||||
| @@ -174,12 +172,14 @@ fn alterate_query_tree( | |||||||
|         wdcache: &mut WordDerivationsCache, |         wdcache: &mut WordDerivationsCache, | ||||||
|     ) -> anyhow::Result<()> |     ) -> anyhow::Result<()> | ||||||
|     { |     { | ||||||
|         use Operation::{And, Consecutive, Or}; |         use Operation::{And, Phrase, Or}; | ||||||
|  |  | ||||||
|         match operation { |         match operation { | ||||||
|             And(ops) | Consecutive(ops) | Or(_, ops) => { |             And(ops) | Or(_, ops) => { | ||||||
|                 ops.iter_mut().try_for_each(|op| recurse(words_fst, op, number_typos, wdcache)) |                 ops.iter_mut().try_for_each(|op| recurse(words_fst, op, number_typos, wdcache)) | ||||||
|             }, |             }, | ||||||
|  |             // Because Phrases don't allow typos, no alteration can be done. | ||||||
|  |             Phrase(_words) => return Ok(()), | ||||||
|             Operation::Query(q) => { |             Operation::Query(q) => { | ||||||
|                 if let QueryKind::Tolerant { typo, word } = &q.kind { |                 if let QueryKind::Tolerant { typo, word } = &q.kind { | ||||||
|                     // if no typo is allowed we don't call word_derivations function, |                     // if no typo is allowed we don't call word_derivations function, | ||||||
| @@ -228,32 +228,29 @@ fn resolve_candidates<'t>( | |||||||
|         wdcache: &mut WordDerivationsCache, |         wdcache: &mut WordDerivationsCache, | ||||||
|     ) -> anyhow::Result<RoaringBitmap> |     ) -> anyhow::Result<RoaringBitmap> | ||||||
|     { |     { | ||||||
|         use Operation::{And, Consecutive, Or, Query}; |         use Operation::{And, Phrase, Or, Query}; | ||||||
|  |  | ||||||
|         match query_tree { |         match query_tree { | ||||||
|             And(ops) => { |             And(ops) => { | ||||||
|                 mdfs(ctx, ops, number_typos, cache, wdcache) |                 mdfs(ctx, ops, number_typos, cache, wdcache) | ||||||
|             }, |             }, | ||||||
|             Consecutive(ops) => { |             Phrase(words) => { | ||||||
|                 let mut candidates = RoaringBitmap::new(); |                 let mut candidates = RoaringBitmap::new(); | ||||||
|                 let mut first_loop = true; |                 let mut first_loop = true; | ||||||
|                 for slice in ops.windows(2) { |                 for slice in words.windows(2) { | ||||||
|                     match (&slice[0], &slice[1]) { |                     let (left, right) = (&slice[0], &slice[1]); | ||||||
|                         (Operation::Query(left), Operation::Query(right)) => { |                     match ctx.word_pair_proximity_docids(left, right, 1)? { | ||||||
|                             match query_pair_proximity_docids(ctx, left, right, 1, wdcache)? { |                         Some(pair_docids) => { | ||||||
|                                 pair_docids if pair_docids.is_empty() => { |                             if pair_docids.is_empty() { | ||||||
|                                     return Ok(RoaringBitmap::new()) |                                 return Ok(RoaringBitmap::new()); | ||||||
|                                 }, |                             } else if first_loop { | ||||||
|                                 pair_docids if first_loop => { |                                 candidates = pair_docids; | ||||||
|                                     candidates = pair_docids; |                                 first_loop = false; | ||||||
|                                     first_loop = false; |                             } else { | ||||||
|                                 }, |                                 candidates &= pair_docids; | ||||||
|                                 pair_docids => { |  | ||||||
|                                     candidates.intersect_with(&pair_docids); |  | ||||||
|                                 }, |  | ||||||
|                             } |                             } | ||||||
|                         }, |                         }, | ||||||
|                         _ => bail!("invalid consecutive query type"), |                         None => return Ok(RoaringBitmap::new()) | ||||||
|                     } |                     } | ||||||
|                 } |                 } | ||||||
|                 Ok(candidates) |                 Ok(candidates) | ||||||
|   | |||||||
| @@ -52,13 +52,18 @@ impl MatchingWords { | |||||||
| fn fetch_queries(tree: &Operation) -> HashSet<(&str, u8, IsPrefix)> { | fn fetch_queries(tree: &Operation) -> HashSet<(&str, u8, IsPrefix)> { | ||||||
|     fn resolve_ops<'a>(tree: &'a Operation, out: &mut HashSet<(&'a str, u8, IsPrefix)>) { |     fn resolve_ops<'a>(tree: &'a Operation, out: &mut HashSet<(&'a str, u8, IsPrefix)>) { | ||||||
|         match tree { |         match tree { | ||||||
|             Operation::Or(_, ops) | Operation::And(ops) | Operation::Consecutive(ops) => { |             Operation::Or(_, ops) | Operation::And(ops) => { | ||||||
|                 ops.as_slice().iter().for_each(|op| resolve_ops(op, out)); |                 ops.as_slice().iter().for_each(|op| resolve_ops(op, out)); | ||||||
|             }, |             }, | ||||||
|             Operation::Query(Query { prefix, kind }) => { |             Operation::Query(Query { prefix, kind }) => { | ||||||
|                 let typo = if kind.is_exact() { 0 } else { kind.typo() }; |                 let typo = if kind.is_exact() { 0 } else { kind.typo() }; | ||||||
|                 out.insert((kind.word(), typo, *prefix)); |                 out.insert((kind.word(), typo, *prefix)); | ||||||
|             }, |             }, | ||||||
|  |             Operation::Phrase(words) => { | ||||||
|  |                 for word in words { | ||||||
|  |                     out.insert((word, 0, false)); | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|   | |||||||
| @@ -15,7 +15,8 @@ type IsPrefix = bool; | |||||||
| #[derive(Clone, PartialEq, Eq, Hash)] | #[derive(Clone, PartialEq, Eq, Hash)] | ||||||
| pub enum Operation { | pub enum Operation { | ||||||
|     And(Vec<Operation>), |     And(Vec<Operation>), | ||||||
|     Consecutive(Vec<Operation>), |     // serie of consecutive non prefix and exact words | ||||||
|  |     Phrase(Vec<String>), | ||||||
|     Or(IsOptionalWord, Vec<Operation>), |     Or(IsOptionalWord, Vec<Operation>), | ||||||
|     Query(Query), |     Query(Query), | ||||||
| } | } | ||||||
| @@ -28,9 +29,8 @@ impl fmt::Debug for Operation { | |||||||
|                     writeln!(f, "{:1$}AND", "", depth * 2)?; |                     writeln!(f, "{:1$}AND", "", depth * 2)?; | ||||||
|                     children.iter().try_for_each(|c| pprint_tree(f, c, depth + 1)) |                     children.iter().try_for_each(|c| pprint_tree(f, c, depth + 1)) | ||||||
|                 }, |                 }, | ||||||
|                 Operation::Consecutive(children) => { |                 Operation::Phrase(children) => { | ||||||
|                     writeln!(f, "{:1$}CONSECUTIVE", "", depth * 2)?; |                     writeln!(f, "{:2$}PHRASE {:?}", "", children, depth * 2) | ||||||
|                     children.iter().try_for_each(|c| pprint_tree(f, c, depth + 1)) |  | ||||||
|                 }, |                 }, | ||||||
|                 Operation::Or(true, children) => { |                 Operation::Or(true, children) => { | ||||||
|                     writeln!(f, "{:1$}OR(WORD)", "", depth * 2)?; |                     writeln!(f, "{:1$}OR(WORD)", "", depth * 2)?; | ||||||
| @@ -49,14 +49,6 @@ impl fmt::Debug for Operation { | |||||||
| } | } | ||||||
|  |  | ||||||
| impl Operation { | impl Operation { | ||||||
|     fn phrase(words: Vec<String>) -> Operation { |  | ||||||
|         Operation::consecutive( |  | ||||||
|             words.into_iter().map(|s| { |  | ||||||
|                 Operation::Query(Query { prefix: false, kind: QueryKind::exact(s) }) |  | ||||||
|             }).collect() |  | ||||||
|         ) |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     fn and(mut ops: Vec<Self>) -> Self { |     fn and(mut ops: Vec<Self>) -> Self { | ||||||
|         if ops.len() == 1 { |         if ops.len() == 1 { | ||||||
|             ops.pop().unwrap() |             ops.pop().unwrap() | ||||||
| @@ -73,11 +65,11 @@ impl Operation { | |||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     fn consecutive(mut ops: Vec<Self>) -> Self { |     fn phrase(mut words: Vec<String>) -> Self { | ||||||
|         if ops.len() == 1 { |         if words.len() == 1 { | ||||||
|             ops.pop().unwrap() |             Self::Query(Query { prefix: false, kind: QueryKind::exact(words.pop().unwrap()) }) | ||||||
|         } else { |         } else { | ||||||
|             Self::Consecutive(ops) |             Self::Phrase(words) | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
| @@ -256,10 +248,10 @@ fn split_best_frequency(ctx: &impl Context, word: &str) -> heed::Result<Option<O | |||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     Ok(best.map(|(_, left, right)| Operation::Consecutive( |     Ok(best.map(|(_, left, right)| Operation::Phrase( | ||||||
|         vec![ |         vec![ | ||||||
|             Operation::Query(Query { prefix: false, kind: QueryKind::exact(left.to_string()) }), |             left.to_string(), | ||||||
|             Operation::Query(Query { prefix: false, kind: QueryKind::exact(right.to_string()) }) |             right.to_string() | ||||||
|         ] |         ] | ||||||
|     ))) |     ))) | ||||||
| } | } | ||||||
| @@ -494,24 +486,26 @@ fn create_primitive_query(query: TokenStream, stop_words: Option<Set<&[u8]>>, wo | |||||||
|  |  | ||||||
| /// Returns the maximum number of typos that this Operation allows. | /// Returns the maximum number of typos that this Operation allows. | ||||||
| pub fn maximum_typo(operation: &Operation) -> usize { | pub fn maximum_typo(operation: &Operation) -> usize { | ||||||
|     use Operation::{Or, And, Query, Consecutive}; |     use Operation::{Or, And, Query, Phrase}; | ||||||
|     match operation { |     match operation { | ||||||
|         Or(_, ops) => ops.iter().map(maximum_typo).max().unwrap_or(0), |         Or(_, ops) => ops.iter().map(maximum_typo).max().unwrap_or(0), | ||||||
|         And(ops) | Consecutive(ops) => ops.iter().map(maximum_typo).sum::<usize>(), |         And(ops) => ops.iter().map(maximum_typo).sum::<usize>(), | ||||||
|         Query(q) => q.kind.typo() as usize, |         Query(q) => q.kind.typo() as usize, | ||||||
|  |         // no typo allowed in phrases | ||||||
|  |         Phrase(_) => 0, | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| /// Returns the maximum proximity that this Operation allows. | /// Returns the maximum proximity that this Operation allows. | ||||||
| pub fn maximum_proximity(operation: &Operation) -> usize { | pub fn maximum_proximity(operation: &Operation) -> usize { | ||||||
|     use Operation::{Or, And, Query, Consecutive}; |     use Operation::{Or, And, Query, Phrase}; | ||||||
|     match operation { |     match operation { | ||||||
|         Or(_, ops) => ops.iter().map(maximum_proximity).max().unwrap_or(0), |         Or(_, ops) => ops.iter().map(maximum_proximity).max().unwrap_or(0), | ||||||
|         And(ops) => { |         And(ops) => { | ||||||
|             ops.iter().map(maximum_proximity).sum::<usize>() |             ops.iter().map(maximum_proximity).sum::<usize>() | ||||||
|             + ops.len().saturating_sub(1) * 7 |             + ops.len().saturating_sub(1) * 7 | ||||||
|         }, |         }, | ||||||
|         Query(_) | Consecutive(_) => 0, |         Query(_) | Phrase(_) => 0, | ||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -765,9 +759,9 @@ mod test { | |||||||
|         let expected = Operation::Or(false, vec![ |         let expected = Operation::Or(false, vec![ | ||||||
|             Operation::And(vec![ |             Operation::And(vec![ | ||||||
|                 Operation::Or(false, vec![ |                 Operation::Or(false, vec![ | ||||||
|                     Operation::Consecutive(vec![ |                     Operation::Phrase(vec![ | ||||||
|                         Operation::Query(Query { prefix: false, kind: QueryKind::exact("word".to_string()) }), |                         "word".to_string(), | ||||||
|                         Operation::Query(Query { prefix: false, kind: QueryKind::exact("split".to_string()) }), |                         "split".to_string(), | ||||||
|                     ]), |                     ]), | ||||||
|                     Operation::Query(Query { prefix: false, kind: QueryKind::tolerant(2, "wordsplit".to_string()) }), |                     Operation::Query(Query { prefix: false, kind: QueryKind::tolerant(2, "wordsplit".to_string()) }), | ||||||
|                 ]), |                 ]), | ||||||
| @@ -789,9 +783,9 @@ mod test { | |||||||
|         let tokens = result.tokens(); |         let tokens = result.tokens(); | ||||||
|  |  | ||||||
|         let expected = Operation::And(vec![ |         let expected = Operation::And(vec![ | ||||||
|             Operation::Consecutive(vec![ |             Operation::Phrase(vec![ | ||||||
|                 Operation::Query(Query { prefix: false, kind: QueryKind::exact("hey".to_string()) }), |                 "hey".to_string(), | ||||||
|                 Operation::Query(Query { prefix: false, kind: QueryKind::exact("friends".to_string()) }), |                 "friends".to_string(), | ||||||
|             ]), |             ]), | ||||||
|             Operation::Query(Query { prefix: false, kind: QueryKind::exact("wooop".to_string()) }), |             Operation::Query(Query { prefix: false, kind: QueryKind::exact("wooop".to_string()) }), | ||||||
|         ]); |         ]); | ||||||
| @@ -809,13 +803,13 @@ mod test { | |||||||
|         let tokens = result.tokens(); |         let tokens = result.tokens(); | ||||||
|  |  | ||||||
|         let expected = Operation::And(vec![ |         let expected = Operation::And(vec![ | ||||||
|             Operation::Consecutive(vec![ |             Operation::Phrase(vec![ | ||||||
|                 Operation::Query(Query { prefix: false, kind: QueryKind::exact("hey".to_string()) }), |                 "hey".to_string(), | ||||||
|                 Operation::Query(Query { prefix: false, kind: QueryKind::exact("friends".to_string()) }), |                 "friends".to_string(), | ||||||
|             ]), |             ]), | ||||||
|             Operation::Consecutive(vec![ |             Operation::Phrase(vec![ | ||||||
|                 Operation::Query(Query { prefix: false, kind: QueryKind::exact("wooop".to_string()) }), |                 "wooop".to_string(), | ||||||
|                 Operation::Query(Query { prefix: false, kind: QueryKind::exact("wooop".to_string()) }), |                 "wooop".to_string(), | ||||||
|             ]), |             ]), | ||||||
|         ]); |         ]); | ||||||
|  |  | ||||||
| @@ -870,9 +864,9 @@ mod test { | |||||||
|         let result = analyzer.analyze(query); |         let result = analyzer.analyze(query); | ||||||
|         let tokens = result.tokens(); |         let tokens = result.tokens(); | ||||||
|  |  | ||||||
|         let expected = Operation::Consecutive(vec![ |         let expected = Operation::Phrase(vec![ | ||||||
|             Operation::Query(Query { prefix: false, kind: QueryKind::exact("hey".to_string()) }), |             "hey".to_string(), | ||||||
|             Operation::Query(Query { prefix: false, kind: QueryKind::exact("my".to_string()) }), |             "my".to_string(), | ||||||
|         ]); |         ]); | ||||||
|         let (query_tree, _) = TestContext::default().build(true, true, None, tokens).unwrap().unwrap(); |         let (query_tree, _) = TestContext::default().build(true, true, None, tokens).unwrap().unwrap(); | ||||||
|  |  | ||||||
| @@ -940,9 +934,9 @@ mod test { | |||||||
|         let tokens = result.tokens(); |         let tokens = result.tokens(); | ||||||
|  |  | ||||||
|         let expected = Operation::And(vec![ |         let expected = Operation::And(vec![ | ||||||
|             Operation::Consecutive(vec![ |             Operation::Phrase(vec![ | ||||||
|                 Operation::Query(Query { prefix: false, kind: QueryKind::exact("hey".to_string()) }), |                 "hey".to_string(), | ||||||
|                 Operation::Query(Query { prefix: false, kind: QueryKind::exact("my".to_string()) }), |                 "my".to_string(), | ||||||
|             ]), |             ]), | ||||||
|             Operation::Query(Query { prefix: false, kind: QueryKind::exact("good".to_string()) }), |             Operation::Query(Query { prefix: false, kind: QueryKind::exact("good".to_string()) }), | ||||||
|         ]); |         ]); | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user