mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-25 21:16:28 +00:00 
			
		
		
		
	Merge #3525
3525: Fix phrase search containing stop words r=ManyTheFish a=ManyTheFish # Summary A search with a phrase containing only stop words was returning an HTTP error 500, this PR filters the phrase containing only stop words dropping them before the search starts, a query with a phrase containing only stop words now behaves like a placeholder search. fixes https://github.com/meilisearch/meilisearch/issues/3521 related v1.0.2 PR on milli: https://github.com/meilisearch/milli/pull/779 Co-authored-by: ManyTheFish <many@meilisearch.com>
This commit is contained in:
		| @@ -59,6 +59,8 @@ pub enum InternalError { | ||||
|     Utf8(#[from] str::Utf8Error), | ||||
|     #[error("An indexation process was explicitly aborted.")] | ||||
|     AbortedIndexation, | ||||
|     #[error("The matching words list contains at least one invalid member.")] | ||||
|     InvalidMatchingWords, | ||||
| } | ||||
|  | ||||
| #[derive(Error, Debug)] | ||||
|   | ||||
| @@ -7,6 +7,7 @@ use std::rc::Rc; | ||||
| use charabia::Token; | ||||
| use levenshtein_automata::{Distance, DFA}; | ||||
|  | ||||
| use crate::error::InternalError; | ||||
| use crate::search::build_dfa; | ||||
| use crate::MAX_WORD_LENGTH; | ||||
|  | ||||
| @@ -31,12 +32,19 @@ impl fmt::Debug for MatchingWords { | ||||
| } | ||||
|  | ||||
| impl MatchingWords { | ||||
|     pub fn new(mut matching_words: Vec<(Vec<Rc<MatchingWord>>, Vec<PrimitiveWordId>)>) -> Self { | ||||
|     pub fn new( | ||||
|         mut matching_words: Vec<(Vec<Rc<MatchingWord>>, Vec<PrimitiveWordId>)>, | ||||
|     ) -> crate::Result<Self> { | ||||
|         // if one of the matching_words vec doesn't contain a word. | ||||
|         if matching_words.iter().any(|(mw, _)| mw.is_empty()) { | ||||
|             return Err(InternalError::InvalidMatchingWords.into()); | ||||
|         } | ||||
|  | ||||
|         // Sort word by len in DESC order prioritizing the longuest matches, | ||||
|         // in order to highlight the longuest part of the matched word. | ||||
|         matching_words.sort_unstable_by_key(|(mw, _)| Reverse((mw.len(), mw[0].word.len()))); | ||||
|  | ||||
|         Self { inner: matching_words } | ||||
|         Ok(Self { inner: matching_words }) | ||||
|     } | ||||
|  | ||||
|     /// Returns an iterator over terms that match or partially match the given token. | ||||
| @@ -360,7 +368,7 @@ mod tests { | ||||
|             (vec![all[2].clone()], vec![2]), | ||||
|         ]; | ||||
|  | ||||
|         let matching_words = MatchingWords::new(matching_words); | ||||
|         let matching_words = MatchingWords::new(matching_words).unwrap(); | ||||
|  | ||||
|         assert_eq!( | ||||
|             matching_words | ||||
|   | ||||
| @@ -513,7 +513,7 @@ mod tests { | ||||
|             (vec![all[2].clone()], vec![2]), | ||||
|         ]; | ||||
|  | ||||
|         MatchingWords::new(matching_words) | ||||
|         MatchingWords::new(matching_words).unwrap() | ||||
|     } | ||||
|  | ||||
|     impl MatcherBuilder<'_, Vec<u8>> { | ||||
| @@ -600,7 +600,7 @@ mod tests { | ||||
|         ]; | ||||
|         let matching_words = vec![(vec![all[0].clone()], vec![0]), (vec![all[1].clone()], vec![1])]; | ||||
|  | ||||
|         let matching_words = MatchingWords::new(matching_words); | ||||
|         let matching_words = MatchingWords::new(matching_words).unwrap(); | ||||
|  | ||||
|         let builder = MatcherBuilder::from_matching_words(matching_words); | ||||
|  | ||||
| @@ -847,7 +847,7 @@ mod tests { | ||||
|             (vec![all[4].clone()], vec![2]), | ||||
|         ]; | ||||
|  | ||||
|         let matching_words = MatchingWords::new(matching_words); | ||||
|         let matching_words = MatchingWords::new(matching_words).unwrap(); | ||||
|  | ||||
|         let mut builder = MatcherBuilder::from_matching_words(matching_words); | ||||
|         builder.highlight_prefix("_".to_string()); | ||||
|   | ||||
| @@ -747,7 +747,7 @@ fn create_matching_words( | ||||
|     let mut matching_word_cache = MatchingWordCache::default(); | ||||
|     let mut matching_words = Vec::new(); | ||||
|     ngrams(ctx, authorize_typos, query, &mut matching_words, &mut matching_word_cache, 0)?; | ||||
|     Ok(MatchingWords::new(matching_words)) | ||||
|     MatchingWords::new(matching_words) | ||||
| } | ||||
|  | ||||
| pub type PrimitiveQuery = Vec<PrimitiveQueryPart>; | ||||
| @@ -825,9 +825,13 @@ where | ||||
|                     quoted = !quoted; | ||||
|                 } | ||||
|                 // if there is a quote or a hard separator we close the phrase. | ||||
|                 if !phrase.is_empty() && (quote_count > 0 || separator_kind == SeparatorKind::Hard) | ||||
|                 { | ||||
|                     primitive_query.push(PrimitiveQueryPart::Phrase(mem::take(&mut phrase))); | ||||
|                 if quote_count > 0 || separator_kind == SeparatorKind::Hard { | ||||
|                     let phrase = mem::take(&mut phrase); | ||||
|  | ||||
|                     // if the phrase only contains stop words, we don't keep it in the query. | ||||
|                     if phrase.iter().any(|w| w.is_some()) { | ||||
|                         primitive_query.push(PrimitiveQueryPart::Phrase(phrase)); | ||||
|                     } | ||||
|                 } | ||||
|             } | ||||
|             _ => (), | ||||
| @@ -835,7 +839,7 @@ where | ||||
|     } | ||||
|  | ||||
|     // If a quote is never closed, we consider all of the end of the query as a phrase. | ||||
|     if !phrase.is_empty() { | ||||
|     if phrase.iter().any(|w| w.is_some()) { | ||||
|         primitive_query.push(PrimitiveQueryPart::Phrase(mem::take(&mut phrase))); | ||||
|     } | ||||
|  | ||||
|   | ||||
| @@ -32,15 +32,6 @@ fn test_phrase_search_with_stop_words_given_criteria(criteria: &[Criterion]) { | ||||
|     let result = search.execute().unwrap(); | ||||
|     // 1 document should match | ||||
|     assert_eq!(result.documents_ids.len(), 1); | ||||
|  | ||||
|     // test for a single stop word only, no other search terms | ||||
|     let mut search = Search::new(&txn, &index); | ||||
|     search.query("\"the\""); | ||||
|     search.limit(10); | ||||
|     search.authorize_typos(false); | ||||
|     search.terms_matching_strategy(TermsMatchingStrategy::All); | ||||
|     let result = search.execute().unwrap(); | ||||
|     assert_eq!(result.documents_ids.len(), 0); | ||||
| } | ||||
|  | ||||
| #[test] | ||||
|   | ||||
		Reference in New Issue
	
	Block a user