mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-08-02 11:50:03 +00:00
Make max_position_per_attributes changable
This commit is contained in:
@ -23,7 +23,10 @@ pub fn extract_docid_word_positions<R: io::Read>(
|
||||
indexer: GrenadParameters,
|
||||
searchable_fields: &Option<HashSet<FieldId>>,
|
||||
stop_words: Option<&fst::Set<&[u8]>>,
|
||||
max_positions_per_attributes: Option<u32>,
|
||||
) -> Result<(RoaringBitmap, grenad::Reader<File>)> {
|
||||
let max_positions_per_attributes = max_positions_per_attributes
|
||||
.map_or(MAX_POSITION_PER_ATTRIBUTE, |max| max.min(MAX_POSITION_PER_ATTRIBUTE));
|
||||
let max_memory = indexer.max_memory_by_thread();
|
||||
|
||||
let mut documents_ids = RoaringBitmap::new();
|
||||
@ -62,7 +65,7 @@ pub fn extract_docid_word_positions<R: io::Read>(
|
||||
if let Some(field) = json_to_string(&value, &mut field_buffer) {
|
||||
let analyzed = analyzer.analyze(field);
|
||||
let tokens = process_tokens(analyzed.tokens())
|
||||
.take_while(|(p, _)| (*p as u32) < MAX_POSITION_PER_ATTRIBUTE);
|
||||
.take_while(|(p, _)| (*p as u32) < max_positions_per_attributes);
|
||||
|
||||
for (index, token) in tokens {
|
||||
let token = token.text().trim();
|
||||
|
@ -42,6 +42,7 @@ pub(crate) fn data_from_obkv_documents(
|
||||
primary_key_id: FieldId,
|
||||
geo_field_id: Option<FieldId>,
|
||||
stop_words: Option<fst::Set<&[u8]>>,
|
||||
max_positions_per_attributes: Option<u32>,
|
||||
) -> Result<()> {
|
||||
let result: Result<(Vec<_>, (Vec<_>, Vec<_>))> = obkv_chunks
|
||||
.par_bridge()
|
||||
@ -55,6 +56,7 @@ pub(crate) fn data_from_obkv_documents(
|
||||
primary_key_id,
|
||||
geo_field_id,
|
||||
&stop_words,
|
||||
max_positions_per_attributes,
|
||||
)
|
||||
})
|
||||
.collect();
|
||||
@ -177,6 +179,7 @@ fn extract_documents_data(
|
||||
primary_key_id: FieldId,
|
||||
geo_field_id: Option<FieldId>,
|
||||
stop_words: &Option<fst::Set<&[u8]>>,
|
||||
max_positions_per_attributes: Option<u32>,
|
||||
) -> Result<(
|
||||
grenad::Reader<CursorClonableMmap>,
|
||||
(grenad::Reader<CursorClonableMmap>, grenad::Reader<CursorClonableMmap>),
|
||||
@ -206,6 +209,7 @@ fn extract_documents_data(
|
||||
indexer.clone(),
|
||||
searchable_fields,
|
||||
stop_words.as_ref(),
|
||||
max_positions_per_attributes,
|
||||
)?;
|
||||
|
||||
// send documents_ids to DB writer
|
||||
|
@ -68,6 +68,7 @@ pub struct IndexDocuments<'t, 'u, 'i, 'a> {
|
||||
pub(crate) chunk_compression_type: CompressionType,
|
||||
pub(crate) chunk_compression_level: Option<u32>,
|
||||
pub(crate) thread_pool: Option<&'a ThreadPool>,
|
||||
pub(crate) max_positions_per_attributes: Option<u32>,
|
||||
facet_level_group_size: Option<NonZeroUsize>,
|
||||
facet_min_level_size: Option<NonZeroUsize>,
|
||||
words_prefix_threshold: Option<u32>,
|
||||
@ -104,6 +105,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
|
||||
update_method: IndexDocumentsMethod::ReplaceDocuments,
|
||||
autogenerate_docids: false,
|
||||
update_id,
|
||||
max_positions_per_attributes: None,
|
||||
}
|
||||
}
|
||||
|
||||
@ -262,6 +264,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
|
||||
primary_key_id,
|
||||
geo_field_id,
|
||||
stop_words,
|
||||
self.max_positions_per_attributes,
|
||||
)
|
||||
});
|
||||
|
||||
@ -284,6 +287,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
|
||||
chunk_compression_type: self.chunk_compression_type,
|
||||
chunk_compression_level: self.chunk_compression_level,
|
||||
thread_pool: self.thread_pool,
|
||||
max_positions_per_attributes: self.max_positions_per_attributes,
|
||||
update_id: self.update_id,
|
||||
};
|
||||
let mut deletion_builder = update_builder.delete_documents(self.wtxn, self.index)?;
|
||||
|
Reference in New Issue
Block a user