mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-31 07:56:28 +00:00 
			
		
		
		
	Take stop word in account
This commit is contained in:
		| @@ -21,6 +21,7 @@ pub fn extract_docid_word_positions<R: io::Read>( | ||||
|     mut obkv_documents: grenad::Reader<R>, | ||||
|     indexer: GrenadParameters, | ||||
|     searchable_fields: &Option<HashSet<FieldId>>, | ||||
|     stop_words: Option<&fst::Set<&[u8]>>, | ||||
| ) -> Result<(RoaringBitmap, grenad::Reader<File>)> { | ||||
|     let max_memory = indexer.max_memory_by_thread(); | ||||
|  | ||||
| @@ -35,6 +36,10 @@ pub fn extract_docid_word_positions<R: io::Read>( | ||||
|  | ||||
|     let mut key_buffer = Vec::new(); | ||||
|     let mut field_buffer = String::new(); | ||||
|     let mut config = AnalyzerConfig::default(); | ||||
|     if let Some(stop_words) = stop_words { | ||||
|         config.stop_words(stop_words); | ||||
|     } | ||||
|     let analyzer = Analyzer::<Vec<u8>>::new(AnalyzerConfig::default()); | ||||
|  | ||||
|     while let Some((key, value)) = obkv_documents.next()? { | ||||
|   | ||||
| @@ -37,6 +37,7 @@ pub(crate) fn data_from_obkv_documents( | ||||
|     lmdb_writer_sx: Sender<TypedChunk>, | ||||
|     searchable_fields: Option<HashSet<FieldId>>, | ||||
|     faceted_fields: HashSet<FieldId>, | ||||
|     stop_words: Option<fst::Set<&[u8]>>, | ||||
| ) -> Result<()> { | ||||
|     let result: Result<(Vec<_>, (Vec<_>, Vec<_>))> = obkv_chunks | ||||
|         .par_bridge() | ||||
| @@ -54,6 +55,7 @@ pub(crate) fn data_from_obkv_documents( | ||||
|                         documents_chunk.clone(), | ||||
|                         indexer.clone(), | ||||
|                         &searchable_fields, | ||||
|                         stop_words.as_ref(), | ||||
|                     )?; | ||||
|  | ||||
|                     // send documents_ids to DB writer | ||||
|   | ||||
| @@ -231,6 +231,9 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> { | ||||
|         // get filterable fields for facet databases | ||||
|         let faceted_fields = self.index.faceted_fields_ids(self.wtxn)?; | ||||
|  | ||||
|         let stop_words = self.index.stop_words(self.wtxn)?; | ||||
|         // let stop_words = stop_words.as_ref(); | ||||
|  | ||||
|         // Run extraction pipeline in parallel. | ||||
|         pool.install(|| { | ||||
|             let params = GrenadParameters { | ||||
| @@ -255,6 +258,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> { | ||||
|                 lmdb_writer_sx, | ||||
|                 searchable_fields, | ||||
|                 faceted_fields, | ||||
|                 stop_words, | ||||
|             ) | ||||
|             .unwrap(); | ||||
|         }); | ||||
|   | ||||
		Reference in New Issue
	
	Block a user