mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-31 07:56:28 +00:00 
			
		
		
		
	Merge #5062
5062: Fix bugs for v1.12 r=Kerollmops a=ManyTheFish # Pull Request ## Related issue Fixes #4984 Fixes https://github.com/meilisearch/meilisearch/issues/4974 Fixes [SDK test](https://github.com/meilisearch/meilisearch/actions/runs/11886701996/job/33118278794) ## What does this PR do? - add 3 tests - fix bugs Co-authored-by: ManyTheFish <many@meilisearch.com>
This commit is contained in:
		| @@ -274,7 +274,7 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> { | ||||
|                     last_match_last_token_position_plus_one | ||||
|                 } else { | ||||
|                     // we have matched the end of possible tokens, there's nothing to advance | ||||
|                     tokens.len() - 1 | ||||
|                     tokens.len() | ||||
|                 } | ||||
|             }; | ||||
|  | ||||
|   | ||||
| @@ -193,15 +193,23 @@ pub fn compute_phrase_docids( | ||||
|     if words.is_empty() { | ||||
|         return Ok(RoaringBitmap::new()); | ||||
|     } | ||||
|     let mut candidates = RoaringBitmap::new(); | ||||
|     let mut candidates = None; | ||||
|     for word in words.iter().flatten().copied() { | ||||
|         if let Some(word_docids) = ctx.word_docids(None, Word::Original(word))? { | ||||
|             candidates |= word_docids; | ||||
|             if let Some(candidates) = candidates.as_mut() { | ||||
|                 *candidates &= word_docids; | ||||
|             } else { | ||||
|                 candidates = Some(word_docids); | ||||
|             } | ||||
|         } else { | ||||
|             return Ok(RoaringBitmap::new()); | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     let Some(mut candidates) = candidates else { | ||||
|         return Ok(RoaringBitmap::new()); | ||||
|     }; | ||||
|  | ||||
|     let winsize = words.len().min(3); | ||||
|  | ||||
|     for win in words.windows(winsize) { | ||||
|   | ||||
| @@ -58,9 +58,9 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>( | ||||
|         .map(|s| s.iter().map(String::as_str).collect()); | ||||
|     let old_dictionary: Option<Vec<_>> = | ||||
|         settings_diff.old.dictionary.as_ref().map(|s| s.iter().map(String::as_str).collect()); | ||||
|     let del_builder = | ||||
|     let mut del_builder = | ||||
|         tokenizer_builder(old_stop_words, old_separators.as_deref(), old_dictionary.as_deref()); | ||||
|     let del_tokenizer = del_builder.into_tokenizer(); | ||||
|     let del_tokenizer = del_builder.build(); | ||||
|  | ||||
|     let new_stop_words = settings_diff.new.stop_words.as_ref(); | ||||
|     let new_separators: Option<Vec<_>> = settings_diff | ||||
| @@ -70,9 +70,9 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>( | ||||
|         .map(|s| s.iter().map(String::as_str).collect()); | ||||
|     let new_dictionary: Option<Vec<_>> = | ||||
|         settings_diff.new.dictionary.as_ref().map(|s| s.iter().map(String::as_str).collect()); | ||||
|     let add_builder = | ||||
|     let mut add_builder = | ||||
|         tokenizer_builder(new_stop_words, new_separators.as_deref(), new_dictionary.as_deref()); | ||||
|     let add_tokenizer = add_builder.into_tokenizer(); | ||||
|     let add_tokenizer = add_builder.build(); | ||||
|  | ||||
|     // iterate over documents. | ||||
|     let mut cursor = obkv_documents.into_cursor()?; | ||||
|   | ||||
		Reference in New Issue
	
	Block a user