mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-07-29 18:04:47 +00:00
Avoid creating a MatchingWord for words that exceed the length limit
This commit is contained in:
@ -8,6 +8,7 @@ use charabia::Token;
|
||||
use levenshtein_automata::{Distance, DFA};
|
||||
|
||||
use crate::search::build_dfa;
|
||||
use crate::MAX_WORD_LENGTH;
|
||||
|
||||
type IsPrefix = bool;
|
||||
|
||||
@ -18,6 +19,17 @@ pub struct MatchingWords {
|
||||
inner: Vec<(Vec<Rc<MatchingWord>>, Vec<PrimitiveWordId>)>,
|
||||
}
|
||||
|
||||
impl fmt::Debug for MatchingWords {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
writeln!(f, "[")?;
|
||||
for (matching_words, primitive_word_id) in self.inner.iter() {
|
||||
writeln!(f, "({matching_words:?}, {primitive_word_id:?})")?;
|
||||
}
|
||||
writeln!(f, "]")?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl MatchingWords {
|
||||
pub fn new(mut matching_words: Vec<(Vec<Rc<MatchingWord>>, Vec<PrimitiveWordId>)>) -> Self {
|
||||
// Sort word by len in DESC order prioritizing the longuest matches,
|
||||
@ -93,10 +105,13 @@ impl PartialEq for MatchingWord {
|
||||
}
|
||||
|
||||
impl MatchingWord {
|
||||
pub fn new(word: String, typo: u8, prefix: IsPrefix) -> Self {
|
||||
pub fn new(word: String, typo: u8, prefix: IsPrefix) -> Option<Self> {
|
||||
if word.len() > MAX_WORD_LENGTH {
|
||||
return None;
|
||||
}
|
||||
let dfa = build_dfa(&word, typo, prefix);
|
||||
|
||||
Self { dfa, word, typo, prefix }
|
||||
Some(Self { dfa, word, typo, prefix })
|
||||
}
|
||||
|
||||
/// Returns the lenght in chars of the match in case of the token matches the term.
|
||||
@ -335,9 +350,9 @@ mod tests {
|
||||
#[test]
|
||||
fn matching_words() {
|
||||
let all = vec![
|
||||
Rc::new(MatchingWord::new("split".to_string(), 1, true)),
|
||||
Rc::new(MatchingWord::new("this".to_string(), 0, false)),
|
||||
Rc::new(MatchingWord::new("world".to_string(), 1, true)),
|
||||
Rc::new(MatchingWord::new("split".to_string(), 1, true).unwrap()),
|
||||
Rc::new(MatchingWord::new("this".to_string(), 0, false).unwrap()),
|
||||
Rc::new(MatchingWord::new("world".to_string(), 1, true).unwrap()),
|
||||
];
|
||||
let matching_words = vec![
|
||||
(vec![all[0].clone()], vec![0]),
|
||||
|
@ -503,9 +503,9 @@ mod tests {
|
||||
|
||||
fn matching_words() -> MatchingWords {
|
||||
let all = vec![
|
||||
Rc::new(MatchingWord::new("split".to_string(), 0, false)),
|
||||
Rc::new(MatchingWord::new("the".to_string(), 0, false)),
|
||||
Rc::new(MatchingWord::new("world".to_string(), 1, true)),
|
||||
Rc::new(MatchingWord::new("split".to_string(), 0, false).unwrap()),
|
||||
Rc::new(MatchingWord::new("the".to_string(), 0, false).unwrap()),
|
||||
Rc::new(MatchingWord::new("world".to_string(), 1, true).unwrap()),
|
||||
];
|
||||
let matching_words = vec![
|
||||
(vec![all[0].clone()], vec![0]),
|
||||
@ -595,8 +595,8 @@ mod tests {
|
||||
#[test]
|
||||
fn highlight_unicode() {
|
||||
let all = vec![
|
||||
Rc::new(MatchingWord::new("wessfali".to_string(), 1, true)),
|
||||
Rc::new(MatchingWord::new("world".to_string(), 1, true)),
|
||||
Rc::new(MatchingWord::new("wessfali".to_string(), 1, true).unwrap()),
|
||||
Rc::new(MatchingWord::new("world".to_string(), 1, true).unwrap()),
|
||||
];
|
||||
let matching_words = vec![(vec![all[0].clone()], vec![0]), (vec![all[1].clone()], vec![1])];
|
||||
|
||||
@ -832,12 +832,12 @@ mod tests {
|
||||
#[test]
|
||||
fn partial_matches() {
|
||||
let all = vec![
|
||||
Rc::new(MatchingWord::new("the".to_string(), 0, false)),
|
||||
Rc::new(MatchingWord::new("t".to_string(), 0, false)),
|
||||
Rc::new(MatchingWord::new("he".to_string(), 0, false)),
|
||||
Rc::new(MatchingWord::new("door".to_string(), 0, false)),
|
||||
Rc::new(MatchingWord::new("do".to_string(), 0, false)),
|
||||
Rc::new(MatchingWord::new("or".to_string(), 0, false)),
|
||||
Rc::new(MatchingWord::new("the".to_string(), 0, false).unwrap()),
|
||||
Rc::new(MatchingWord::new("t".to_string(), 0, false).unwrap()),
|
||||
Rc::new(MatchingWord::new("he".to_string(), 0, false).unwrap()),
|
||||
Rc::new(MatchingWord::new("door".to_string(), 0, false).unwrap()),
|
||||
Rc::new(MatchingWord::new("do".to_string(), 0, false).unwrap()),
|
||||
Rc::new(MatchingWord::new("or".to_string(), 0, false).unwrap()),
|
||||
];
|
||||
let matching_words = vec![
|
||||
(vec![all[0].clone()], vec![0]),
|
||||
|
Reference in New Issue
Block a user