mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-10-30 23:46:28 +00:00 
			
		
		
		
	Make max_position_per_attributes changable
This commit is contained in:
		| @@ -23,7 +23,10 @@ pub fn extract_docid_word_positions<R: io::Read>( | ||||
|     indexer: GrenadParameters, | ||||
|     searchable_fields: &Option<HashSet<FieldId>>, | ||||
|     stop_words: Option<&fst::Set<&[u8]>>, | ||||
|     max_positions_per_attributes: Option<u32>, | ||||
| ) -> Result<(RoaringBitmap, grenad::Reader<File>)> { | ||||
|     let max_positions_per_attributes = max_positions_per_attributes | ||||
|         .map_or(MAX_POSITION_PER_ATTRIBUTE, |max| max.min(MAX_POSITION_PER_ATTRIBUTE)); | ||||
|     let max_memory = indexer.max_memory_by_thread(); | ||||
|  | ||||
|     let mut documents_ids = RoaringBitmap::new(); | ||||
| @@ -62,7 +65,7 @@ pub fn extract_docid_word_positions<R: io::Read>( | ||||
|                 if let Some(field) = json_to_string(&value, &mut field_buffer) { | ||||
|                     let analyzed = analyzer.analyze(field); | ||||
|                     let tokens = process_tokens(analyzed.tokens()) | ||||
|                         .take_while(|(p, _)| (*p as u32) < MAX_POSITION_PER_ATTRIBUTE); | ||||
|                         .take_while(|(p, _)| (*p as u32) < max_positions_per_attributes); | ||||
|  | ||||
|                     for (index, token) in tokens { | ||||
|                         let token = token.text().trim(); | ||||
|   | ||||
| @@ -42,6 +42,7 @@ pub(crate) fn data_from_obkv_documents( | ||||
|     primary_key_id: FieldId, | ||||
|     geo_field_id: Option<FieldId>, | ||||
|     stop_words: Option<fst::Set<&[u8]>>, | ||||
|     max_positions_per_attributes: Option<u32>, | ||||
| ) -> Result<()> { | ||||
|     let result: Result<(Vec<_>, (Vec<_>, Vec<_>))> = obkv_chunks | ||||
|         .par_bridge() | ||||
| @@ -55,6 +56,7 @@ pub(crate) fn data_from_obkv_documents( | ||||
|                 primary_key_id, | ||||
|                 geo_field_id, | ||||
|                 &stop_words, | ||||
|                 max_positions_per_attributes, | ||||
|             ) | ||||
|         }) | ||||
|         .collect(); | ||||
| @@ -177,6 +179,7 @@ fn extract_documents_data( | ||||
|     primary_key_id: FieldId, | ||||
|     geo_field_id: Option<FieldId>, | ||||
|     stop_words: &Option<fst::Set<&[u8]>>, | ||||
|     max_positions_per_attributes: Option<u32>, | ||||
| ) -> Result<( | ||||
|     grenad::Reader<CursorClonableMmap>, | ||||
|     (grenad::Reader<CursorClonableMmap>, grenad::Reader<CursorClonableMmap>), | ||||
| @@ -206,6 +209,7 @@ fn extract_documents_data( | ||||
|                     indexer.clone(), | ||||
|                     searchable_fields, | ||||
|                     stop_words.as_ref(), | ||||
|                     max_positions_per_attributes, | ||||
|                 )?; | ||||
|  | ||||
|                 // send documents_ids to DB writer | ||||
|   | ||||
| @@ -68,6 +68,7 @@ pub struct IndexDocuments<'t, 'u, 'i, 'a> { | ||||
|     pub(crate) chunk_compression_type: CompressionType, | ||||
|     pub(crate) chunk_compression_level: Option<u32>, | ||||
|     pub(crate) thread_pool: Option<&'a ThreadPool>, | ||||
|     pub(crate) max_positions_per_attributes: Option<u32>, | ||||
|     facet_level_group_size: Option<NonZeroUsize>, | ||||
|     facet_min_level_size: Option<NonZeroUsize>, | ||||
|     words_prefix_threshold: Option<u32>, | ||||
| @@ -104,6 +105,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> { | ||||
|             update_method: IndexDocumentsMethod::ReplaceDocuments, | ||||
|             autogenerate_docids: false, | ||||
|             update_id, | ||||
|             max_positions_per_attributes: None, | ||||
|         } | ||||
|     } | ||||
|  | ||||
| @@ -262,6 +264,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> { | ||||
|                     primary_key_id, | ||||
|                     geo_field_id, | ||||
|                     stop_words, | ||||
|                     self.max_positions_per_attributes, | ||||
|                 ) | ||||
|             }); | ||||
|  | ||||
| @@ -284,6 +287,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> { | ||||
|                 chunk_compression_type: self.chunk_compression_type, | ||||
|                 chunk_compression_level: self.chunk_compression_level, | ||||
|                 thread_pool: self.thread_pool, | ||||
|                 max_positions_per_attributes: self.max_positions_per_attributes, | ||||
|                 update_id: self.update_id, | ||||
|             }; | ||||
|             let mut deletion_builder = update_builder.delete_documents(self.wtxn, self.index)?; | ||||
|   | ||||
| @@ -69,6 +69,7 @@ pub struct Settings<'a, 't, 'u, 'i> { | ||||
|     pub(crate) chunk_compression_type: CompressionType, | ||||
|     pub(crate) chunk_compression_level: Option<u32>, | ||||
|     pub(crate) thread_pool: Option<&'a ThreadPool>, | ||||
|     pub(crate) max_positions_per_attributes: Option<u32>, | ||||
|     update_id: u64, | ||||
|  | ||||
|     searchable_fields: Setting<Vec<String>>, | ||||
| @@ -108,6 +109,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { | ||||
|             synonyms: Setting::NotSet, | ||||
|             primary_key: Setting::NotSet, | ||||
|             update_id, | ||||
|             max_positions_per_attributes: None, | ||||
|         } | ||||
|     } | ||||
|  | ||||
| @@ -237,6 +239,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { | ||||
|         indexing_builder.chunk_compression_type = self.chunk_compression_type; | ||||
|         indexing_builder.chunk_compression_level = self.chunk_compression_level; | ||||
|         indexing_builder.thread_pool = self.thread_pool; | ||||
|         indexing_builder.max_positions_per_attributes = self.max_positions_per_attributes; | ||||
|         indexing_builder.execute_raw(output, &cb)?; | ||||
|  | ||||
|         Ok(()) | ||||
|   | ||||
| @@ -12,6 +12,7 @@ pub struct UpdateBuilder<'a> { | ||||
|     pub(crate) chunk_compression_type: CompressionType, | ||||
|     pub(crate) chunk_compression_level: Option<u32>, | ||||
|     pub(crate) thread_pool: Option<&'a ThreadPool>, | ||||
|     pub(crate) max_positions_per_attributes: Option<u32>, | ||||
|     pub(crate) update_id: u64, | ||||
| } | ||||
|  | ||||
| @@ -25,6 +26,7 @@ impl<'a> UpdateBuilder<'a> { | ||||
|             chunk_compression_type: CompressionType::None, | ||||
|             chunk_compression_level: None, | ||||
|             thread_pool: None, | ||||
|             max_positions_per_attributes: None, | ||||
|             update_id, | ||||
|         } | ||||
|     } | ||||
| @@ -57,6 +59,10 @@ impl<'a> UpdateBuilder<'a> { | ||||
|         self.thread_pool = Some(thread_pool); | ||||
|     } | ||||
|  | ||||
|     pub fn max_positions_per_attributes(&mut self, max_positions_per_attributes: u32) { | ||||
|         self.max_positions_per_attributes = Some(max_positions_per_attributes); | ||||
|     } | ||||
|  | ||||
|     pub fn clear_documents<'t, 'u, 'i>( | ||||
|         self, | ||||
|         wtxn: &'t mut heed::RwTxn<'i, 'u>, | ||||
| @@ -87,6 +93,7 @@ impl<'a> UpdateBuilder<'a> { | ||||
|         builder.chunk_compression_type = self.chunk_compression_type; | ||||
|         builder.chunk_compression_level = self.chunk_compression_level; | ||||
|         builder.thread_pool = self.thread_pool; | ||||
|         builder.max_positions_per_attributes = self.max_positions_per_attributes; | ||||
|  | ||||
|         builder | ||||
|     } | ||||
| @@ -105,6 +112,7 @@ impl<'a> UpdateBuilder<'a> { | ||||
|         builder.chunk_compression_type = self.chunk_compression_type; | ||||
|         builder.chunk_compression_level = self.chunk_compression_level; | ||||
|         builder.thread_pool = self.thread_pool; | ||||
|         builder.max_positions_per_attributes = self.max_positions_per_attributes; | ||||
|  | ||||
|         builder | ||||
|     } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user