mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-30 23:46:28 +00:00 
			
		
		
		
	Make the search and the indexing work
This commit is contained in:
		| @@ -1094,10 +1094,7 @@ impl Index { | ||||
|  | ||||
|     /* separators easing method */ | ||||
|  | ||||
|     pub(crate) fn allowed_separators<'t>( | ||||
|         &self, | ||||
|         rtxn: &'t RoTxn, | ||||
|     ) -> Result<Option<BTreeSet<String>>> { | ||||
|     pub fn allowed_separators<'t>(&self, rtxn: &'t RoTxn) -> Result<Option<BTreeSet<String>>> { | ||||
|         let default_separators = | ||||
|             charabia::separators::DEFAULT_SEPARATORS.iter().map(|s| s.to_string()); | ||||
|         let mut separators: Option<BTreeSet<_>> = None; | ||||
|   | ||||
| @@ -479,6 +479,20 @@ pub fn execute_search( | ||||
|             tokbuilder.stop_words(stop_words); | ||||
|         } | ||||
|  | ||||
|         let separators = ctx.index.allowed_separators(ctx.txn)?; | ||||
|         let separators: Option<Vec<_>> = | ||||
|             separators.as_ref().map(|x| x.iter().map(String::as_str).collect()); | ||||
|         if let Some(ref separators) = separators { | ||||
|             tokbuilder.separators(separators); | ||||
|         } | ||||
|  | ||||
|         let dictionary = ctx.index.dictionary(ctx.txn)?; | ||||
|         let dictionary: Option<Vec<_>> = | ||||
|             dictionary.as_ref().map(|x| x.iter().map(String::as_str).collect()); | ||||
|         if let Some(ref dictionary) = dictionary { | ||||
|             tokbuilder.words_dict(dictionary); | ||||
|         } | ||||
|  | ||||
|         let script_lang_map = ctx.index.script_language(ctx.txn)?; | ||||
|         if !script_lang_map.is_empty() { | ||||
|             tokbuilder.allow_list(&script_lang_map); | ||||
|   | ||||
| @@ -28,6 +28,8 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>( | ||||
|     indexer: GrenadParameters, | ||||
|     searchable_fields: &Option<HashSet<FieldId>>, | ||||
|     stop_words: Option<&fst::Set<&[u8]>>, | ||||
|     allowed_separators: Option<&Vec<&str>>, | ||||
|     dictionary: Option<&Vec<&str>>, | ||||
|     max_positions_per_attributes: Option<u32>, | ||||
| ) -> Result<(RoaringBitmap, grenad::Reader<File>, ScriptLanguageDocidsMap)> { | ||||
|     puffin::profile_function!(); | ||||
| @@ -52,6 +54,14 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>( | ||||
|     if let Some(stop_words) = stop_words { | ||||
|         tokenizer_builder.stop_words(stop_words); | ||||
|     } | ||||
|     if let Some(dictionary) = dictionary { | ||||
|         // let dictionary: Vec<_> = dictionary.iter().map(String::as_str).collect(); | ||||
|         tokenizer_builder.words_dict(dictionary.as_slice()); | ||||
|     } | ||||
|     if let Some(separators) = allowed_separators { | ||||
|         // let separators: Vec<_> = separators.iter().map(String::as_str).collect(); | ||||
|         tokenizer_builder.separators(separators.as_slice()); | ||||
|     } | ||||
|     let tokenizer = tokenizer_builder.build(); | ||||
|  | ||||
|     let mut cursor = obkv_documents.into_cursor()?; | ||||
|   | ||||
| @@ -49,6 +49,8 @@ pub(crate) fn data_from_obkv_documents( | ||||
|     geo_fields_ids: Option<(FieldId, FieldId)>, | ||||
|     vectors_field_id: Option<FieldId>, | ||||
|     stop_words: Option<fst::Set<&[u8]>>, | ||||
|     allowed_separators: Option<Vec<&str>>, | ||||
|     dictionary: Option<Vec<&str>>, | ||||
|     max_positions_per_attributes: Option<u32>, | ||||
|     exact_attributes: HashSet<FieldId>, | ||||
| ) -> Result<()> { | ||||
| @@ -76,6 +78,8 @@ pub(crate) fn data_from_obkv_documents( | ||||
|                     geo_fields_ids, | ||||
|                     vectors_field_id, | ||||
|                     &stop_words, | ||||
|                     &allowed_separators, | ||||
|                     &dictionary, | ||||
|                     max_positions_per_attributes, | ||||
|                 ) | ||||
|             }) | ||||
| @@ -289,6 +293,8 @@ fn send_and_extract_flattened_documents_data( | ||||
|     geo_fields_ids: Option<(FieldId, FieldId)>, | ||||
|     vectors_field_id: Option<FieldId>, | ||||
|     stop_words: &Option<fst::Set<&[u8]>>, | ||||
|     allowed_separators: &Option<Vec<&str>>, | ||||
|     dictionary: &Option<Vec<&str>>, | ||||
|     max_positions_per_attributes: Option<u32>, | ||||
| ) -> Result<( | ||||
|     grenad::Reader<CursorClonableMmap>, | ||||
| @@ -344,6 +350,8 @@ fn send_and_extract_flattened_documents_data( | ||||
|                         indexer, | ||||
|                         searchable_fields, | ||||
|                         stop_words.as_ref(), | ||||
|                         allowed_separators.as_ref(), | ||||
|                         dictionary.as_ref(), | ||||
|                         max_positions_per_attributes, | ||||
|                     )?; | ||||
|  | ||||
|   | ||||
| @@ -316,6 +316,12 @@ where | ||||
|         let vectors_field_id = self.index.fields_ids_map(self.wtxn)?.id("_vectors"); | ||||
|  | ||||
|         let stop_words = self.index.stop_words(self.wtxn)?; | ||||
|         let separators = self.index.allowed_separators(self.wtxn)?; | ||||
|         let separators: Option<Vec<_>> = | ||||
|             separators.as_ref().map(|x| x.iter().map(String::as_str).collect()); | ||||
|         let dictionary = self.index.dictionary(self.wtxn)?; | ||||
|         let dictionary: Option<Vec<_>> = | ||||
|             dictionary.as_ref().map(|x| x.iter().map(String::as_str).collect()); | ||||
|         let exact_attributes = self.index.exact_attributes_ids(self.wtxn)?; | ||||
|  | ||||
|         let pool_params = GrenadParameters { | ||||
| @@ -353,6 +359,8 @@ where | ||||
|                     geo_fields_ids, | ||||
|                     vectors_field_id, | ||||
|                     stop_words, | ||||
|                     separators, | ||||
|                     dictionary, | ||||
|                     max_positions_per_attributes, | ||||
|                     exact_attributes, | ||||
|                 ) | ||||
|   | ||||
		Reference in New Issue
	
	Block a user