mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-30 23:46:28 +00:00 
			
		
		
		
	Merge #220
220: Make hard separators split phrase query r=Kerollmops a=ManyTheFish hard separators will now split a phrase query as two sequential phrases (double-quoted strings): the query `"Radioactive (Imagine Dragons)"` would be considered equivalent to `"Radioactive" "Imagine Dragons"` which as the little disadvantage of not keeping the order of the two (or more) separate phrases. Fix #208 Co-authored-by: many <maxime@meilisearch.com> Co-authored-by: Many <legendre.maxime.isn@gmail.com>
This commit is contained in:
		| @@ -1,7 +1,9 @@ | ||||
| use std::{fmt, cmp, mem}; | ||||
|  | ||||
| use fst::Set; | ||||
| use meilisearch_tokenizer::{TokenKind, tokenizer::TokenStream}; | ||||
| use meilisearch_tokenizer::token::SeparatorKind; | ||||
| use meilisearch_tokenizer::tokenizer::TokenStream; | ||||
| use meilisearch_tokenizer::TokenKind; | ||||
| use roaring::RoaringBitmap; | ||||
| use slice_group_by::GroupBy; | ||||
|  | ||||
| @@ -467,13 +469,14 @@ fn create_primitive_query(query: TokenStream, stop_words: Option<Set<&[u8]>>, wo | ||||
|                     primitive_query.push(PrimitiveQueryPart::Word(token.word.to_string(), true)); | ||||
|                 } | ||||
|             }, | ||||
|             TokenKind::Separator(_) => { | ||||
|             TokenKind::Separator(separator_kind) => { | ||||
|                 let quote_count = token.word.chars().filter(|&s| s == '"').count(); | ||||
|                 // swap quoted state if we encounter a double quote | ||||
|                 if quote_count % 2 != 0 { | ||||
|                     quoted = !quoted; | ||||
|                 } | ||||
|                 if !phrase.is_empty() && quote_count > 0 { | ||||
|                 // if there is a quote or a hard separator we close the phrase. | ||||
|                 if !phrase.is_empty() && (quote_count > 0 || separator_kind == SeparatorKind::Hard) { | ||||
|                     primitive_query.push(PrimitiveQueryPart::Phrase(mem::take(&mut phrase))); | ||||
|                 } | ||||
|             }, | ||||
| @@ -798,6 +801,29 @@ mod test { | ||||
|         assert_eq!(expected, query_tree); | ||||
|     } | ||||
|  | ||||
|     #[test] | ||||
|     fn phrase_with_hard_separator() { | ||||
|         let query = "\"hey friends. wooop wooop\""; | ||||
|         let analyzer = Analyzer::new(AnalyzerConfig::<Vec<u8>>::default()); | ||||
|         let result = analyzer.analyze(query); | ||||
|         let tokens = result.tokens(); | ||||
|  | ||||
|         let expected = Operation::And(vec![ | ||||
|             Operation::Consecutive(vec![ | ||||
|                 Operation::Query(Query { prefix: false, kind: QueryKind::exact("hey".to_string()) }), | ||||
|                 Operation::Query(Query { prefix: false, kind: QueryKind::exact("friends".to_string()) }), | ||||
|             ]), | ||||
|             Operation::Consecutive(vec![ | ||||
|                 Operation::Query(Query { prefix: false, kind: QueryKind::exact("wooop".to_string()) }), | ||||
|                 Operation::Query(Query { prefix: false, kind: QueryKind::exact("wooop".to_string()) }), | ||||
|             ]), | ||||
|         ]); | ||||
|  | ||||
|         let (query_tree, _) = TestContext::default().build(false, true, None, tokens).unwrap().unwrap(); | ||||
|  | ||||
|         assert_eq!(expected, query_tree); | ||||
|     } | ||||
|  | ||||
|     #[test] | ||||
|     fn optional_word() { | ||||
|         let query = "hey my friend "; | ||||
|   | ||||
		Reference in New Issue
	
	Block a user