mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-31 07:56:28 +00:00 
			
		
		
		
	Create formater with some tests
This commit is contained in:
		| @@ -1,11 +1,11 @@ | ||||
| use std::cmp::{min, Reverse}; | ||||
| use std::collections::{BTreeMap, HashSet}; | ||||
| use std::collections::{BTreeMap, HashMap}; | ||||
| use std::ops::{Index, IndexMut}; | ||||
| 
 | ||||
| use levenshtein_automata::{Distance, DFA}; | ||||
| use meilisearch_tokenizer::Token; | ||||
| 
 | ||||
| use super::build_dfa; | ||||
| use crate::search::build_dfa; | ||||
| use crate::search::query_tree::{Operation, Query}; | ||||
| 
 | ||||
| type IsPrefix = bool; | ||||
| @@ -14,7 +14,7 @@ type IsPrefix = bool; | ||||
| /// referencing words that match the given query tree.
 | ||||
| #[derive(Default)] | ||||
| pub struct MatchingWords { | ||||
|     dfas: Vec<(DFA, String, u8, IsPrefix)>, | ||||
|     dfas: Vec<(DFA, String, u8, IsPrefix, usize)>, | ||||
| } | ||||
| 
 | ||||
| impl MatchingWords { | ||||
| @@ -23,11 +23,11 @@ impl MatchingWords { | ||||
|         let mut dfas: Vec<_> = fetch_queries(tree) | ||||
|             .into_iter() | ||||
|             // create DFAs for each word
 | ||||
|             .map(|(w, t, p)| (build_dfa(w, t, p), w.to_string(), t, p)) | ||||
|             .map(|((w, t, p), id)| (build_dfa(w, t, p), w.to_string(), t, p, id)) | ||||
|             .collect(); | ||||
|         // Sort word by len in DESC order prioritizing the longuest word,
 | ||||
|         // in order to highlight the longuest part of the matched word.
 | ||||
|         dfas.sort_unstable_by_key(|(_dfa, query_word, _typo, _is_prefix)| { | ||||
|         dfas.sort_unstable_by_key(|(_dfa, query_word, _typo, _is_prefix, _id)| { | ||||
|             Reverse(query_word.len()) | ||||
|         }); | ||||
|         Self { dfas } | ||||
| @@ -35,14 +35,21 @@ impl MatchingWords { | ||||
| 
 | ||||
|     /// Returns the number of matching bytes if the word matches one of the query words.
 | ||||
|     pub fn matching_bytes(&self, word_to_highlight: &Token) -> Option<usize> { | ||||
|         self.dfas.iter().find_map(|(dfa, query_word, typo, is_prefix)| { | ||||
|         self.matching_bytes_with_id(word_to_highlight).map(|(len, _)| len) | ||||
|     } | ||||
| 
 | ||||
|     pub fn matching_bytes_with_id(&self, word_to_highlight: &Token) -> Option<(usize, usize)> { | ||||
|         self.dfas.iter().find_map(|(dfa, query_word, typo, is_prefix, id)| { | ||||
|             match dfa.eval(word_to_highlight.text()) { | ||||
|                 Distance::Exact(t) if t <= *typo => { | ||||
|                     if *is_prefix { | ||||
|                         let len = bytes_to_highlight(word_to_highlight.text(), query_word); | ||||
|                         Some(word_to_highlight.num_chars_from_bytes(len)) | ||||
|                         Some((word_to_highlight.num_chars_from_bytes(len), *id)) | ||||
|                     } else { | ||||
|                         Some(word_to_highlight.num_chars_from_bytes(word_to_highlight.text().len())) | ||||
|                         Some(( | ||||
|                             word_to_highlight.num_chars_from_bytes(word_to_highlight.text().len()), | ||||
|                             *id, | ||||
|                         )) | ||||
|                     } | ||||
|                 } | ||||
|                 _otherwise => None, | ||||
| @@ -52,26 +59,37 @@ impl MatchingWords { | ||||
| } | ||||
| 
 | ||||
| /// Lists all words which can be considered as a match for the query tree.
 | ||||
| fn fetch_queries(tree: &Operation) -> HashSet<(&str, u8, IsPrefix)> { | ||||
|     fn resolve_ops<'a>(tree: &'a Operation, out: &mut HashSet<(&'a str, u8, IsPrefix)>) { | ||||
| fn fetch_queries(tree: &Operation) -> HashMap<(&str, u8, IsPrefix), usize> { | ||||
|     fn resolve_ops<'a>( | ||||
|         tree: &'a Operation, | ||||
|         out: &mut HashMap<(&'a str, u8, IsPrefix), usize>, | ||||
|         id: &mut usize, | ||||
|     ) { | ||||
|         match tree { | ||||
|             Operation::Or(_, ops) | Operation::And(ops) => { | ||||
|                 ops.as_slice().iter().for_each(|op| resolve_ops(op, out)); | ||||
|                 ops.as_slice().iter().for_each(|op| resolve_ops(op, out, id)); | ||||
|             } | ||||
|             Operation::Query(Query { prefix, kind }) => { | ||||
|                 let typo = if kind.is_exact() { 0 } else { kind.typo() }; | ||||
|                 out.insert((kind.word(), typo, *prefix)); | ||||
|                 out.entry((kind.word(), typo, *prefix)).or_insert_with(|| { | ||||
|                     *id += 1; | ||||
|                     *id | ||||
|                 }); | ||||
|             } | ||||
|             Operation::Phrase(words) => { | ||||
|                 for word in words { | ||||
|                     out.insert((word, 0, false)); | ||||
|                     out.entry((word, 0, false)).or_insert_with(|| { | ||||
|                         *id += 1; | ||||
|                         *id | ||||
|                     }); | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     let mut queries = HashSet::new(); | ||||
|     resolve_ops(tree, &mut queries); | ||||
|     let mut queries = HashMap::new(); | ||||
|     let mut id = 0; | ||||
|     resolve_ops(tree, &mut queries, &mut id); | ||||
|     queries | ||||
| } | ||||
| 
 | ||||
							
								
								
									
										434
									
								
								milli/src/search/matches/mod.rs
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										434
									
								
								milli/src/search/matches/mod.rs
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,434 @@ | ||||
| use std::borrow::Cow; | ||||
|  | ||||
| use matching_words::MatchingWords; | ||||
| use meilisearch_tokenizer::token::SeparatorKind; | ||||
| use meilisearch_tokenizer::{Analyzer, AnalyzerConfig, Token}; | ||||
|  | ||||
| use crate::search::query_tree::Operation; | ||||
|  | ||||
| pub mod matching_words; | ||||
|  | ||||
| const DEFAULT_CROP_SIZE: usize = 10; | ||||
| const DEFAULT_CROP_MARKER: &'static str = "…"; | ||||
| const DEFAULT_HIGHLIGHT_PREFIX: &'static str = "<em>"; | ||||
| const DEFAULT_HIGHLIGHT_SUFFIX: &'static str = "</em>"; | ||||
|  | ||||
| pub struct MatcherBuilder { | ||||
|     matching_words: MatchingWords, | ||||
|     crop_size: usize, | ||||
|     crop_marker: Option<String>, | ||||
|     highlight_prefix: Option<String>, | ||||
|     highlight_suffix: Option<String>, | ||||
| } | ||||
|  | ||||
| impl MatcherBuilder { | ||||
|     pub fn from_query_tree(query_tree: &Operation) -> Self { | ||||
|         let matching_words = MatchingWords::from_query_tree(query_tree); | ||||
|  | ||||
|         Self { | ||||
|             matching_words, | ||||
|             crop_size: DEFAULT_CROP_SIZE, | ||||
|             crop_marker: None, | ||||
|             highlight_prefix: None, | ||||
|             highlight_suffix: None, | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     pub fn crop_size(&mut self, word_count: usize) -> &Self { | ||||
|         self.crop_size = word_count; | ||||
|         self | ||||
|     } | ||||
|  | ||||
|     pub fn crop_marker(&mut self, marker: String) -> &Self { | ||||
|         self.crop_marker = Some(marker); | ||||
|         self | ||||
|     } | ||||
|  | ||||
|     pub fn highlight_prefix(&mut self, prefix: String) -> &Self { | ||||
|         self.highlight_prefix = Some(prefix); | ||||
|         self | ||||
|     } | ||||
|  | ||||
|     pub fn highlight_suffix(&mut self, suffix: String) -> &Self { | ||||
|         self.highlight_suffix = Some(suffix); | ||||
|         self | ||||
|     } | ||||
|  | ||||
|     pub fn build<'t, 'm>(&'m self, tokens: &'t [Token], text: &'t str) -> Matcher<'t, 'm> { | ||||
|         let crop_marker = match &self.crop_marker { | ||||
|             Some(marker) => marker.as_str(), | ||||
|             None => &DEFAULT_CROP_MARKER, | ||||
|         }; | ||||
|  | ||||
|         let highlight_prefix = match &self.highlight_prefix { | ||||
|             Some(marker) => marker.as_str(), | ||||
|             None => &DEFAULT_HIGHLIGHT_PREFIX, | ||||
|         }; | ||||
|         let highlight_suffix = match &self.highlight_suffix { | ||||
|             Some(marker) => marker.as_str(), | ||||
|             None => &DEFAULT_HIGHLIGHT_SUFFIX, | ||||
|         }; | ||||
|         Matcher { | ||||
|             text, | ||||
|             tokens, | ||||
|             matching_words: &self.matching_words, | ||||
|             crop_size: self.crop_size, | ||||
|             crop_marker, | ||||
|             highlight_prefix, | ||||
|             highlight_suffix, | ||||
|             matches: None, | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| // impl Default for MatcherBuilder { | ||||
| //     fn default() -> Self { | ||||
| //         Self { | ||||
| //             crop_size: DEFAULT_CROP_SIZE, | ||||
| //             crop_marker: None, | ||||
| //             highlight_prefix: None, | ||||
| //             highlight_suffix: None, | ||||
| //         } | ||||
| //     } | ||||
| // } | ||||
|  | ||||
| pub struct Match<'t> { | ||||
|     token: &'t Token<'t>, | ||||
|     match_len: usize, | ||||
|     // id of the query word that matches. | ||||
|     id: usize, | ||||
|     // position of the word in the whole text. | ||||
|     position: usize, | ||||
| } | ||||
|  | ||||
| pub struct MatchBounds { | ||||
|     start: usize, | ||||
|     length: usize, | ||||
| } | ||||
|  | ||||
| impl<'t> From<&Match<'t>> for MatchBounds { | ||||
|     fn from(m: &Match) -> Self { | ||||
|         MatchBounds { start: m.token.byte_start, length: m.match_len } | ||||
|     } | ||||
| } | ||||
|  | ||||
| pub struct Matcher<'t, 'm> { | ||||
|     text: &'t str, | ||||
|     tokens: &'t [Token<'t>], | ||||
|     matching_words: &'m MatchingWords, | ||||
|     crop_size: usize, | ||||
|     crop_marker: &'m str, | ||||
|     highlight_prefix: &'m str, | ||||
|     highlight_suffix: &'m str, | ||||
|     matches: Option<Vec<Match<'t>>>, | ||||
| } | ||||
|  | ||||
| impl<'t> Matcher<'t, '_> { | ||||
|     fn compute_matches(&mut self) -> &mut Self { | ||||
|         let mut matches = Vec::new(); | ||||
|         let mut position = 0; | ||||
|         for token in self.tokens { | ||||
|             match token.is_separator() { | ||||
|                 Some(SeparatorKind::Hard) => position += 7, | ||||
|                 None => { | ||||
|                     if let Some((match_len, id)) = | ||||
|                         self.matching_words.matching_bytes_with_id(&token) | ||||
|                     { | ||||
|                         matches.push(Match { token, match_len, id, position }); | ||||
|                     } | ||||
|                     position += 1; | ||||
|                 } | ||||
|                 _otherwise => {} | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         self.matches = Some(matches); | ||||
|         self | ||||
|     } | ||||
|  | ||||
|     pub fn matches(&mut self) -> Vec<MatchBounds> { | ||||
|         match &self.matches { | ||||
|             None => self.compute_matches().matches(), | ||||
|             Some(matches) => matches.iter().map(MatchBounds::from).collect(), | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     fn crop_bounds(&self, matches: &[Match<'t>]) -> (usize, usize) { | ||||
|         let byte_end = self | ||||
|             .tokens | ||||
|             .iter() | ||||
|             .filter(|t| t.is_separator().is_none()) | ||||
|             .enumerate() | ||||
|             .take_while(|(i, _)| *i < self.crop_size) | ||||
|             .last() | ||||
|             .map_or(self.text.len(), |(_, t)| t.byte_end); | ||||
|  | ||||
|         (0, byte_end) | ||||
|     } | ||||
|  | ||||
|     pub fn format(&mut self, highlight: bool, crop: bool) -> Cow<'t, str> { | ||||
|         if !highlight && !crop { | ||||
|             // compute matches is not needed if no highlight or crop is requested. | ||||
|             Cow::Borrowed(self.text) | ||||
|         } else { | ||||
|             match &self.matches { | ||||
|                 Some(matches) => { | ||||
|                     let (byte_start, byte_end) = | ||||
|                         if crop { self.crop_bounds(matches) } else { (0, self.text.len()) }; | ||||
|  | ||||
|                     let mut formatted = Vec::new(); | ||||
|  | ||||
|                     // push crop marker if it's not the start of the text. | ||||
|                     if byte_start > 0 && !self.crop_marker.is_empty() { | ||||
|                         formatted.push(self.crop_marker); | ||||
|                     } | ||||
|  | ||||
|                     let mut byte_index = byte_start; | ||||
|  | ||||
|                     if highlight { | ||||
|                         // insert highlight markers around matches. | ||||
|                         for m in matches | ||||
|                             .iter() | ||||
|                             .skip_while(|m| m.token.byte_start < byte_start) | ||||
|                             .take_while(|m| m.token.byte_start < byte_end) | ||||
|                         { | ||||
|                             if byte_index < m.token.byte_start { | ||||
|                                 formatted.push(&self.text[byte_index..m.token.byte_start]); | ||||
|                             } | ||||
|  | ||||
|                             formatted.push(self.highlight_prefix); | ||||
|                             formatted.push(&self.text[m.token.byte_start..m.token.byte_end]); | ||||
|                             formatted.push(self.highlight_suffix); | ||||
|  | ||||
|                             byte_index = m.token.byte_end; | ||||
|                         } | ||||
|                     } | ||||
|  | ||||
|                     // push the rest of the text between last match and the end of crop. | ||||
|                     if byte_index < byte_end { | ||||
|                         formatted.push(&self.text[byte_index..byte_end]); | ||||
|                     } | ||||
|  | ||||
|                     // push crop marker if it's not the end of the text. | ||||
|                     if byte_end < self.text.len() && !self.crop_marker.is_empty() { | ||||
|                         formatted.push(self.crop_marker); | ||||
|                     } | ||||
|  | ||||
|                     if formatted.len() == 1 { | ||||
|                         // avoid concatenating if there is already 1 slice. | ||||
|                         Cow::Borrowed(&self.text[byte_start..byte_end]) | ||||
|                     } else { | ||||
|                         Cow::Owned(formatted.concat()) | ||||
|                     } | ||||
|                 } | ||||
|                 None => self.compute_matches().format(highlight, crop), | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| #[cfg(test)] | ||||
| mod tests { | ||||
|     use super::*; | ||||
|     use crate::search::query_tree::{Query, QueryKind}; | ||||
|  | ||||
|     fn query_tree() -> Operation { | ||||
|         Operation::Or( | ||||
|             false, | ||||
|             vec![Operation::And(vec![ | ||||
|                 Operation::Query(Query { | ||||
|                     prefix: true, | ||||
|                     kind: QueryKind::exact("split".to_string()), | ||||
|                 }), | ||||
|                 Operation::Query(Query { | ||||
|                     prefix: false, | ||||
|                     kind: QueryKind::exact("the".to_string()), | ||||
|                 }), | ||||
|                 Operation::Query(Query { | ||||
|                     prefix: true, | ||||
|                     kind: QueryKind::tolerant(1, "world".to_string()), | ||||
|                 }), | ||||
|             ])], | ||||
|         ) | ||||
|     } | ||||
|  | ||||
|     #[test] | ||||
|     fn format_identity() { | ||||
|         let query_tree = query_tree(); | ||||
|  | ||||
|         let builder = MatcherBuilder::from_query_tree(&query_tree); | ||||
|         let analyzer = Analyzer::new(AnalyzerConfig::<Vec<u8>>::default()); | ||||
|  | ||||
|         let highlight = false; | ||||
|         let crop = false; | ||||
|  | ||||
|         // Text without any match. | ||||
|         let text = "A quick brown fox can not jump 32 feet, right? Brr, it is cold!"; | ||||
|         let analyzed = analyzer.analyze(&text); | ||||
|         let tokens: Vec<_> = analyzed.tokens().collect(); | ||||
|         let mut matcher = builder.build(&tokens[..], text); | ||||
|         // no crop and no highlight should return complete text. | ||||
|         assert_eq!(&matcher.format(highlight, crop), &text); | ||||
|  | ||||
|         // Text containing all matches. | ||||
|         let text = "Natalie risk her future to build a world with the boy she loves. Emily Henry: The Love That Split The World"; | ||||
|         let analyzed = analyzer.analyze(&text); | ||||
|         let tokens: Vec<_> = analyzed.tokens().collect(); | ||||
|         let mut matcher = builder.build(&tokens[..], text); | ||||
|         // no crop and no highlight should return complete text. | ||||
|         assert_eq!(&matcher.format(highlight, crop), &text); | ||||
|  | ||||
|         // Text containing some matches. | ||||
|         let text = "Natalie risk her future to build a world with the boy she loves."; | ||||
|         let analyzed = analyzer.analyze(&text); | ||||
|         let tokens: Vec<_> = analyzed.tokens().collect(); | ||||
|         let mut matcher = builder.build(&tokens[..], text); | ||||
|         // no crop and no highlight should return complete text. | ||||
|         assert_eq!(&matcher.format(highlight, crop), &text); | ||||
|     } | ||||
|  | ||||
|     #[test] | ||||
|     fn format_highlight() { | ||||
|         let query_tree = query_tree(); | ||||
|  | ||||
|         let builder = MatcherBuilder::from_query_tree(&query_tree); | ||||
|         let analyzer = Analyzer::new(AnalyzerConfig::<Vec<u8>>::default()); | ||||
|  | ||||
|         let highlight = true; | ||||
|         let crop = false; | ||||
|  | ||||
|         // Text without any match. | ||||
|         let text = "A quick brown fox can not jump 32 feet, right? Brr, it is cold!"; | ||||
|         let analyzed = analyzer.analyze(&text); | ||||
|         let tokens: Vec<_> = analyzed.tokens().collect(); | ||||
|         let mut matcher = builder.build(&tokens[..], text); | ||||
|         // no crop should return complete text, because there is no matches. | ||||
|         assert_eq!(&matcher.format(highlight, crop), &text); | ||||
|  | ||||
|         // Text containing all matches. | ||||
|         let text = "Natalie risk her future to build a world with the boy she loves. Emily Henry: The Love That Split The World"; | ||||
|         let analyzed = analyzer.analyze(&text); | ||||
|         let tokens: Vec<_> = analyzed.tokens().collect(); | ||||
|         let mut matcher = builder.build(&tokens[..], text); | ||||
|         // no crop should return complete text with highlighted matches. | ||||
|         assert_eq!(&matcher.format(highlight, crop), "Natalie risk her future to build a <em>world</em> with <em>the</em> boy she loves. Emily Henry: <em>The</em> Love That <em>Split</em> <em>The</em> <em>World</em>"); | ||||
|  | ||||
|         // Text containing some matches. | ||||
|         let text = "Natalie risk her future to build a world with the boy she loves."; | ||||
|         let analyzed = analyzer.analyze(&text); | ||||
|         let tokens: Vec<_> = analyzed.tokens().collect(); | ||||
|         let mut matcher = builder.build(&tokens[..], text); | ||||
|         // no crop should return complete text with highlighted matches. | ||||
|         assert_eq!( | ||||
|             &matcher.format(highlight, crop), | ||||
|             "Natalie risk her future to build a <em>world</em> with <em>the</em> boy she loves." | ||||
|         ); | ||||
|     } | ||||
|  | ||||
|     #[test] | ||||
|     fn format_crop() { | ||||
|         let query_tree = query_tree(); | ||||
|  | ||||
|         let builder = MatcherBuilder::from_query_tree(&query_tree); | ||||
|         let analyzer = Analyzer::new(AnalyzerConfig::<Vec<u8>>::default()); | ||||
|  | ||||
|         let highlight = false; | ||||
|         let crop = true; | ||||
|  | ||||
|         // Text without any match. | ||||
|         let text = "A quick brown fox can not jump 32 feet, right? Brr, it is cold!"; | ||||
|         let analyzed = analyzer.analyze(&text); | ||||
|         let tokens: Vec<_> = analyzed.tokens().collect(); | ||||
|         let mut matcher = builder.build(&tokens[..], text); | ||||
|         // no highlight should return 10 first words with a marker at the end. | ||||
|         assert_eq!( | ||||
|             &matcher.format(highlight, crop), | ||||
|             "A quick brown fox can not jump 32 feet, right…" | ||||
|         ); | ||||
|  | ||||
|         // Text containing all matches. | ||||
|         let text = "Natalie risk her future to build a world with the boy she loves. Emily Henry: The Love That Split The World"; | ||||
|         let analyzed = analyzer.analyze(&text); | ||||
|         let tokens: Vec<_> = analyzed.tokens().collect(); | ||||
|         let mut matcher = builder.build(&tokens[..], text); | ||||
|         // no highlight should return 10 last words with a marker at the start. | ||||
|         assert_eq!( | ||||
|             &matcher.format(highlight, crop), | ||||
|             "…she loves. Emily Henry: The Love That Split The World" | ||||
|         ); | ||||
|  | ||||
|         // Text containing some matches. | ||||
|         let text = "Natalie risk her future to build a world with the boy she loves."; | ||||
|         let analyzed = analyzer.analyze(&text); | ||||
|         let tokens: Vec<_> = analyzed.tokens().collect(); | ||||
|         let mut matcher = builder.build(&tokens[..], text); | ||||
|         // no highlight should return 10 last words with a marker at the start. | ||||
|         assert_eq!( | ||||
|             &matcher.format(highlight, crop), | ||||
|             "…future to build a world with the boy she loves." | ||||
|         ); | ||||
|  | ||||
|         // Text containing a match unordered and a match ordered. | ||||
|         let text = "The world split void void void void void void void void void split the world void void"; | ||||
|         let analyzed = analyzer.analyze(&text); | ||||
|         let tokens: Vec<_> = analyzed.tokens().collect(); | ||||
|         let mut matcher = builder.build(&tokens[..], text); | ||||
|         // crop should return 10 last words with a marker at the start. | ||||
|         assert_eq!( | ||||
|             &matcher.format(highlight, crop), | ||||
|             "…void void void void void split the world void void" | ||||
|         ); | ||||
|     } | ||||
|  | ||||
|     #[test] | ||||
|     fn format_highlight_crop() { | ||||
|         let query_tree = query_tree(); | ||||
|  | ||||
|         let builder = MatcherBuilder::from_query_tree(&query_tree); | ||||
|         let analyzer = Analyzer::new(AnalyzerConfig::<Vec<u8>>::default()); | ||||
|  | ||||
|         let highlight = true; | ||||
|         let crop = true; | ||||
|  | ||||
|         // Text without any match. | ||||
|         let text = "A quick brown fox can not jump 32 feet, right? Brr, it is cold!"; | ||||
|         let analyzed = analyzer.analyze(&text); | ||||
|         let tokens: Vec<_> = analyzed.tokens().collect(); | ||||
|         let mut matcher = builder.build(&tokens[..], text); | ||||
|         // both should return 10 first words with a marker at the end. | ||||
|         assert_eq!( | ||||
|             &matcher.format(highlight, crop), | ||||
|             "A quick brown fox can not jump 32 feet, right…" | ||||
|         ); | ||||
|  | ||||
|         // Text containing all matches. | ||||
|         let text = "Natalie risk her future to build a world with the boy she loves. Emily Henry: The Love That Split The World"; | ||||
|         let analyzed = analyzer.analyze(&text); | ||||
|         let tokens: Vec<_> = analyzed.tokens().collect(); | ||||
|         let mut matcher = builder.build(&tokens[..], text); | ||||
|         // both should return 10 last words with a marker at the start and highlighted matches. | ||||
|         assert_eq!(&matcher.format(highlight, crop), "…she loves. Emily Henry: <em>The</em> Love That <em>Split</em> <em>The</em> <em>World</em>"); | ||||
|  | ||||
|         // Text containing some matches. | ||||
|         let text = "Natalie risk her future to build a world with the boy she loves."; | ||||
|         let analyzed = analyzer.analyze(&text); | ||||
|         let tokens: Vec<_> = analyzed.tokens().collect(); | ||||
|         let mut matcher = builder.build(&tokens[..], text); | ||||
|         // both should return 10 last words with a marker at the start and highlighted matches. | ||||
|         assert_eq!( | ||||
|             &matcher.format(highlight, crop), | ||||
|             "…future to build a <em>world</em> with <em>the</em> boy she loves." | ||||
|         ); | ||||
|  | ||||
|         // Text containing a match unordered and a match ordered. | ||||
|         let text = "The world split void void void void void void void void void split the world void void"; | ||||
|         let analyzed = analyzer.analyze(&text); | ||||
|         let tokens: Vec<_> = analyzed.tokens().collect(); | ||||
|         let mut matcher = builder.build(&tokens[..], text); | ||||
|         // crop should return 10 last words with a marker at the start. | ||||
|         assert_eq!( | ||||
|             &matcher.format(highlight, crop), | ||||
|             "…void void void void void <em>split</em> <em>the</em> <em>world</em> void void" | ||||
|         ); | ||||
|     } | ||||
| } | ||||
| @@ -17,7 +17,7 @@ use roaring::bitmap::RoaringBitmap; | ||||
|  | ||||
| pub use self::facet::{FacetDistribution, FacetNumberIter, Filter}; | ||||
| use self::fst_utils::{Complement, Intersection, StartsWith, Union}; | ||||
| pub use self::matching_words::MatchingWords; | ||||
| pub use self::matches::matching_words::MatchingWords; | ||||
| use self::query_tree::QueryTreeBuilder; | ||||
| use crate::error::UserError; | ||||
| use crate::search::criteria::r#final::{Final, FinalResult}; | ||||
| @@ -32,7 +32,7 @@ mod criteria; | ||||
| mod distinct; | ||||
| mod facet; | ||||
| mod fst_utils; | ||||
| mod matching_words; | ||||
| mod matches; | ||||
| mod query_tree; | ||||
|  | ||||
| pub struct Search<'a> { | ||||
|   | ||||
		Reference in New Issue
	
	Block a user