mirror of
				https://github.com/meilisearch/meilisearch.git
				synced 2025-11-04 09:56:28 +00:00 
			
		
		
		
	Don't sort in parallel in sorters of the new indexer
This commit is contained in:
		@@ -40,6 +40,7 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
 | 
				
			|||||||
        indexer.chunk_compression_level,
 | 
					        indexer.chunk_compression_level,
 | 
				
			||||||
        indexer.max_nb_chunks,
 | 
					        indexer.max_nb_chunks,
 | 
				
			||||||
        max_memory,
 | 
					        max_memory,
 | 
				
			||||||
 | 
					        true,
 | 
				
			||||||
    );
 | 
					    );
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    // initialize buffers.
 | 
					    // initialize buffers.
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -32,6 +32,7 @@ pub fn extract_facet_number_docids<R: io::Read + io::Seek>(
 | 
				
			|||||||
        indexer.chunk_compression_level,
 | 
					        indexer.chunk_compression_level,
 | 
				
			||||||
        indexer.max_nb_chunks,
 | 
					        indexer.max_nb_chunks,
 | 
				
			||||||
        max_memory,
 | 
					        max_memory,
 | 
				
			||||||
 | 
					        true,
 | 
				
			||||||
    );
 | 
					    );
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    let mut buffer = Vec::new();
 | 
					    let mut buffer = Vec::new();
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -61,6 +61,7 @@ fn extract_facet_string_docids_document_update<R: io::Read + io::Seek>(
 | 
				
			|||||||
        indexer.chunk_compression_level,
 | 
					        indexer.chunk_compression_level,
 | 
				
			||||||
        indexer.max_nb_chunks,
 | 
					        indexer.max_nb_chunks,
 | 
				
			||||||
        max_memory.map(|m| m / 2),
 | 
					        max_memory.map(|m| m / 2),
 | 
				
			||||||
 | 
					        true,
 | 
				
			||||||
    );
 | 
					    );
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    let mut normalized_facet_string_docids_sorter = create_sorter(
 | 
					    let mut normalized_facet_string_docids_sorter = create_sorter(
 | 
				
			||||||
@@ -70,6 +71,7 @@ fn extract_facet_string_docids_document_update<R: io::Read + io::Seek>(
 | 
				
			|||||||
        indexer.chunk_compression_level,
 | 
					        indexer.chunk_compression_level,
 | 
				
			||||||
        indexer.max_nb_chunks,
 | 
					        indexer.max_nb_chunks,
 | 
				
			||||||
        max_memory.map(|m| m / 2),
 | 
					        max_memory.map(|m| m / 2),
 | 
				
			||||||
 | 
					        true,
 | 
				
			||||||
    );
 | 
					    );
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    let mut buffer = Vec::new();
 | 
					    let mut buffer = Vec::new();
 | 
				
			||||||
@@ -149,6 +151,7 @@ fn extract_facet_string_docids_settings<R: io::Read + io::Seek>(
 | 
				
			|||||||
        indexer.chunk_compression_level,
 | 
					        indexer.chunk_compression_level,
 | 
				
			||||||
        indexer.max_nb_chunks,
 | 
					        indexer.max_nb_chunks,
 | 
				
			||||||
        max_memory.map(|m| m / 2),
 | 
					        max_memory.map(|m| m / 2),
 | 
				
			||||||
 | 
					        true,
 | 
				
			||||||
    );
 | 
					    );
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    let mut normalized_facet_string_docids_sorter = create_sorter(
 | 
					    let mut normalized_facet_string_docids_sorter = create_sorter(
 | 
				
			||||||
@@ -158,6 +161,7 @@ fn extract_facet_string_docids_settings<R: io::Read + io::Seek>(
 | 
				
			|||||||
        indexer.chunk_compression_level,
 | 
					        indexer.chunk_compression_level,
 | 
				
			||||||
        indexer.max_nb_chunks,
 | 
					        indexer.max_nb_chunks,
 | 
				
			||||||
        max_memory.map(|m| m / 2),
 | 
					        max_memory.map(|m| m / 2),
 | 
				
			||||||
 | 
					        true,
 | 
				
			||||||
    );
 | 
					    );
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    let mut buffer = Vec::new();
 | 
					    let mut buffer = Vec::new();
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -53,6 +53,7 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
 | 
				
			|||||||
        indexer.chunk_compression_level,
 | 
					        indexer.chunk_compression_level,
 | 
				
			||||||
        indexer.max_nb_chunks,
 | 
					        indexer.max_nb_chunks,
 | 
				
			||||||
        max_memory.map(|m| m / 2),
 | 
					        max_memory.map(|m| m / 2),
 | 
				
			||||||
 | 
					        true,
 | 
				
			||||||
    );
 | 
					    );
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    let mut fid_docid_facet_strings_sorter = create_sorter(
 | 
					    let mut fid_docid_facet_strings_sorter = create_sorter(
 | 
				
			||||||
@@ -62,6 +63,7 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
 | 
				
			|||||||
        indexer.chunk_compression_level,
 | 
					        indexer.chunk_compression_level,
 | 
				
			||||||
        indexer.max_nb_chunks,
 | 
					        indexer.max_nb_chunks,
 | 
				
			||||||
        max_memory.map(|m| m / 2),
 | 
					        max_memory.map(|m| m / 2),
 | 
				
			||||||
 | 
					        true,
 | 
				
			||||||
    );
 | 
					    );
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    // The tuples represents the Del and Add side for a bitmap
 | 
					    // The tuples represents the Del and Add side for a bitmap
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -35,6 +35,7 @@ pub fn extract_fid_word_count_docids<R: io::Read + io::Seek>(
 | 
				
			|||||||
        indexer.chunk_compression_level,
 | 
					        indexer.chunk_compression_level,
 | 
				
			||||||
        indexer.max_nb_chunks,
 | 
					        indexer.max_nb_chunks,
 | 
				
			||||||
        max_memory,
 | 
					        max_memory,
 | 
				
			||||||
 | 
					        true,
 | 
				
			||||||
    );
 | 
					    );
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    let mut key_buffer = Vec::new();
 | 
					    let mut key_buffer = Vec::new();
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -44,6 +44,7 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(
 | 
				
			|||||||
        indexer.chunk_compression_level,
 | 
					        indexer.chunk_compression_level,
 | 
				
			||||||
        indexer.max_nb_chunks,
 | 
					        indexer.max_nb_chunks,
 | 
				
			||||||
        max_memory.map(|m| m / 3),
 | 
					        max_memory.map(|m| m / 3),
 | 
				
			||||||
 | 
					        true,
 | 
				
			||||||
    );
 | 
					    );
 | 
				
			||||||
    let mut key_buffer = Vec::new();
 | 
					    let mut key_buffer = Vec::new();
 | 
				
			||||||
    let mut del_words = BTreeSet::new();
 | 
					    let mut del_words = BTreeSet::new();
 | 
				
			||||||
@@ -98,6 +99,7 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(
 | 
				
			|||||||
        indexer.chunk_compression_level,
 | 
					        indexer.chunk_compression_level,
 | 
				
			||||||
        indexer.max_nb_chunks,
 | 
					        indexer.max_nb_chunks,
 | 
				
			||||||
        max_memory.map(|m| m / 3),
 | 
					        max_memory.map(|m| m / 3),
 | 
				
			||||||
 | 
					        true,
 | 
				
			||||||
    );
 | 
					    );
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    let mut exact_word_docids_sorter = create_sorter(
 | 
					    let mut exact_word_docids_sorter = create_sorter(
 | 
				
			||||||
@@ -107,6 +109,7 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(
 | 
				
			|||||||
        indexer.chunk_compression_level,
 | 
					        indexer.chunk_compression_level,
 | 
				
			||||||
        indexer.max_nb_chunks,
 | 
					        indexer.max_nb_chunks,
 | 
				
			||||||
        max_memory.map(|m| m / 3),
 | 
					        max_memory.map(|m| m / 3),
 | 
				
			||||||
 | 
					        true,
 | 
				
			||||||
    );
 | 
					    );
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    let mut iter = word_fid_docids_sorter.into_stream_merger_iter()?;
 | 
					    let mut iter = word_fid_docids_sorter.into_stream_merger_iter()?;
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -49,6 +49,7 @@ pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
 | 
				
			|||||||
                indexer.chunk_compression_level,
 | 
					                indexer.chunk_compression_level,
 | 
				
			||||||
                indexer.max_nb_chunks,
 | 
					                indexer.max_nb_chunks,
 | 
				
			||||||
                max_memory.map(|m| m / MAX_DISTANCE as usize),
 | 
					                max_memory.map(|m| m / MAX_DISTANCE as usize),
 | 
				
			||||||
 | 
					                true,
 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
        })
 | 
					        })
 | 
				
			||||||
        .collect();
 | 
					        .collect();
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -33,6 +33,7 @@ pub fn extract_word_position_docids<R: io::Read + io::Seek>(
 | 
				
			|||||||
        indexer.chunk_compression_level,
 | 
					        indexer.chunk_compression_level,
 | 
				
			||||||
        indexer.max_nb_chunks,
 | 
					        indexer.max_nb_chunks,
 | 
				
			||||||
        max_memory,
 | 
					        max_memory,
 | 
				
			||||||
 | 
					        true,
 | 
				
			||||||
    );
 | 
					    );
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    let mut del_word_positions: BTreeSet<(u16, Vec<u8>)> = BTreeSet::new();
 | 
					    let mut del_word_positions: BTreeSet<(u16, Vec<u8>)> = BTreeSet::new();
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -37,6 +37,7 @@ pub fn create_sorter<MF: MergeFunction>(
 | 
				
			|||||||
    chunk_compression_level: Option<u32>,
 | 
					    chunk_compression_level: Option<u32>,
 | 
				
			||||||
    max_nb_chunks: Option<usize>,
 | 
					    max_nb_chunks: Option<usize>,
 | 
				
			||||||
    max_memory: Option<usize>,
 | 
					    max_memory: Option<usize>,
 | 
				
			||||||
 | 
					    sort_in_parallel: bool,
 | 
				
			||||||
) -> grenad::Sorter<MF> {
 | 
					) -> grenad::Sorter<MF> {
 | 
				
			||||||
    let mut builder = grenad::Sorter::builder(merge);
 | 
					    let mut builder = grenad::Sorter::builder(merge);
 | 
				
			||||||
    builder.chunk_compression_type(chunk_compression_type);
 | 
					    builder.chunk_compression_type(chunk_compression_type);
 | 
				
			||||||
@@ -51,7 +52,7 @@ pub fn create_sorter<MF: MergeFunction>(
 | 
				
			|||||||
        builder.allow_realloc(false);
 | 
					        builder.allow_realloc(false);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    builder.sort_algorithm(sort_algorithm);
 | 
					    builder.sort_algorithm(sort_algorithm);
 | 
				
			||||||
    builder.sort_in_parallel(true);
 | 
					    builder.sort_in_parallel(sort_in_parallel);
 | 
				
			||||||
    builder.build()
 | 
					    builder.build()
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -127,6 +127,7 @@ impl<'a, 'i> Transform<'a, 'i> {
 | 
				
			|||||||
            indexer_settings.chunk_compression_level,
 | 
					            indexer_settings.chunk_compression_level,
 | 
				
			||||||
            indexer_settings.max_nb_chunks,
 | 
					            indexer_settings.max_nb_chunks,
 | 
				
			||||||
            indexer_settings.max_memory.map(|mem| mem / 2),
 | 
					            indexer_settings.max_memory.map(|mem| mem / 2),
 | 
				
			||||||
 | 
					            true,
 | 
				
			||||||
        );
 | 
					        );
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        // We initialize the sorter with the user indexing settings.
 | 
					        // We initialize the sorter with the user indexing settings.
 | 
				
			||||||
@@ -137,6 +138,7 @@ impl<'a, 'i> Transform<'a, 'i> {
 | 
				
			|||||||
            indexer_settings.chunk_compression_level,
 | 
					            indexer_settings.chunk_compression_level,
 | 
				
			||||||
            indexer_settings.max_nb_chunks,
 | 
					            indexer_settings.max_nb_chunks,
 | 
				
			||||||
            indexer_settings.max_memory.map(|mem| mem / 2),
 | 
					            indexer_settings.max_memory.map(|mem| mem / 2),
 | 
				
			||||||
 | 
					            true,
 | 
				
			||||||
        );
 | 
					        );
 | 
				
			||||||
        let documents_ids = index.documents_ids(wtxn)?;
 | 
					        let documents_ids = index.documents_ids(wtxn)?;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -988,6 +990,7 @@ impl<'a, 'i> Transform<'a, 'i> {
 | 
				
			|||||||
                self.indexer_settings.chunk_compression_level,
 | 
					                self.indexer_settings.chunk_compression_level,
 | 
				
			||||||
                self.indexer_settings.max_nb_chunks,
 | 
					                self.indexer_settings.max_nb_chunks,
 | 
				
			||||||
                self.indexer_settings.max_memory.map(|mem| mem / 2),
 | 
					                self.indexer_settings.max_memory.map(|mem| mem / 2),
 | 
				
			||||||
 | 
					                true,
 | 
				
			||||||
            ))
 | 
					            ))
 | 
				
			||||||
        } else {
 | 
					        } else {
 | 
				
			||||||
            None
 | 
					            None
 | 
				
			||||||
@@ -1030,6 +1033,7 @@ impl<'a, 'i> Transform<'a, 'i> {
 | 
				
			|||||||
                    self.indexer_settings.chunk_compression_level,
 | 
					                    self.indexer_settings.chunk_compression_level,
 | 
				
			||||||
                    self.indexer_settings.max_nb_chunks,
 | 
					                    self.indexer_settings.max_nb_chunks,
 | 
				
			||||||
                    self.indexer_settings.max_memory.map(|mem| mem / 2),
 | 
					                    self.indexer_settings.max_memory.map(|mem| mem / 2),
 | 
				
			||||||
 | 
					                    true,
 | 
				
			||||||
                ))
 | 
					                ))
 | 
				
			||||||
            } else {
 | 
					            } else {
 | 
				
			||||||
                None
 | 
					                None
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -46,6 +46,10 @@ impl<'extractor> Extractor<'extractor> for FacetedExtractorData<'extractor> {
 | 
				
			|||||||
                self.grenad_parameters.chunk_compression_level,
 | 
					                self.grenad_parameters.chunk_compression_level,
 | 
				
			||||||
                self.grenad_parameters.max_nb_chunks,
 | 
					                self.grenad_parameters.max_nb_chunks,
 | 
				
			||||||
                self.max_memory,
 | 
					                self.max_memory,
 | 
				
			||||||
 | 
					                // *NOTE*: this must not be set to true:
 | 
				
			||||||
 | 
					                // 1. we're already using max parallelism in the pool, so it wouldn't help
 | 
				
			||||||
 | 
					                // 2. it creates correctness issues if it causes to yield a borrow-mut wielding task
 | 
				
			||||||
 | 
					                false,
 | 
				
			||||||
            ),
 | 
					            ),
 | 
				
			||||||
        ))))
 | 
					        ))))
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -48,6 +48,7 @@ impl WordDocidsCachedSorters {
 | 
				
			|||||||
                indexer.chunk_compression_level,
 | 
					                indexer.chunk_compression_level,
 | 
				
			||||||
                indexer.max_nb_chunks,
 | 
					                indexer.max_nb_chunks,
 | 
				
			||||||
                max_memory,
 | 
					                max_memory,
 | 
				
			||||||
 | 
					                false,
 | 
				
			||||||
            ),
 | 
					            ),
 | 
				
			||||||
        );
 | 
					        );
 | 
				
			||||||
        let word_docids = CboCachedSorter::new(
 | 
					        let word_docids = CboCachedSorter::new(
 | 
				
			||||||
@@ -59,6 +60,7 @@ impl WordDocidsCachedSorters {
 | 
				
			|||||||
                indexer.chunk_compression_level,
 | 
					                indexer.chunk_compression_level,
 | 
				
			||||||
                indexer.max_nb_chunks,
 | 
					                indexer.max_nb_chunks,
 | 
				
			||||||
                max_memory,
 | 
					                max_memory,
 | 
				
			||||||
 | 
					                false,
 | 
				
			||||||
            ),
 | 
					            ),
 | 
				
			||||||
        );
 | 
					        );
 | 
				
			||||||
        let exact_word_docids = CboCachedSorter::new(
 | 
					        let exact_word_docids = CboCachedSorter::new(
 | 
				
			||||||
@@ -70,6 +72,7 @@ impl WordDocidsCachedSorters {
 | 
				
			|||||||
                indexer.chunk_compression_level,
 | 
					                indexer.chunk_compression_level,
 | 
				
			||||||
                indexer.max_nb_chunks,
 | 
					                indexer.max_nb_chunks,
 | 
				
			||||||
                max_memory,
 | 
					                max_memory,
 | 
				
			||||||
 | 
					                false,
 | 
				
			||||||
            ),
 | 
					            ),
 | 
				
			||||||
        );
 | 
					        );
 | 
				
			||||||
        let word_position_docids = CboCachedSorter::new(
 | 
					        let word_position_docids = CboCachedSorter::new(
 | 
				
			||||||
@@ -81,6 +84,7 @@ impl WordDocidsCachedSorters {
 | 
				
			|||||||
                indexer.chunk_compression_level,
 | 
					                indexer.chunk_compression_level,
 | 
				
			||||||
                indexer.max_nb_chunks,
 | 
					                indexer.max_nb_chunks,
 | 
				
			||||||
                max_memory,
 | 
					                max_memory,
 | 
				
			||||||
 | 
					                false,
 | 
				
			||||||
            ),
 | 
					            ),
 | 
				
			||||||
        );
 | 
					        );
 | 
				
			||||||
        let fid_word_count_docids = CboCachedSorter::new(
 | 
					        let fid_word_count_docids = CboCachedSorter::new(
 | 
				
			||||||
@@ -92,6 +96,7 @@ impl WordDocidsCachedSorters {
 | 
				
			|||||||
                indexer.chunk_compression_level,
 | 
					                indexer.chunk_compression_level,
 | 
				
			||||||
                indexer.max_nb_chunks,
 | 
					                indexer.max_nb_chunks,
 | 
				
			||||||
                max_memory,
 | 
					                max_memory,
 | 
				
			||||||
 | 
					                false,
 | 
				
			||||||
            ),
 | 
					            ),
 | 
				
			||||||
        );
 | 
					        );
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -50,6 +50,7 @@ impl<'extractor, EX: SearchableExtractor + Sync> Extractor<'extractor>
 | 
				
			|||||||
                self.grenad_parameters.chunk_compression_level,
 | 
					                self.grenad_parameters.chunk_compression_level,
 | 
				
			||||||
                self.grenad_parameters.max_nb_chunks,
 | 
					                self.grenad_parameters.max_nb_chunks,
 | 
				
			||||||
                self.max_memory,
 | 
					                self.max_memory,
 | 
				
			||||||
 | 
					                false,
 | 
				
			||||||
            ),
 | 
					            ),
 | 
				
			||||||
        ))))
 | 
					        ))))
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -60,6 +60,7 @@ impl<'t, 'i> WordPrefixDocids<'t, 'i> {
 | 
				
			|||||||
            self.chunk_compression_level,
 | 
					            self.chunk_compression_level,
 | 
				
			||||||
            self.max_nb_chunks,
 | 
					            self.max_nb_chunks,
 | 
				
			||||||
            self.max_memory,
 | 
					            self.max_memory,
 | 
				
			||||||
 | 
					            true,
 | 
				
			||||||
        );
 | 
					        );
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if !common_prefix_fst_words.is_empty() {
 | 
					        if !common_prefix_fst_words.is_empty() {
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -65,6 +65,7 @@ impl<'t, 'i> WordPrefixIntegerDocids<'t, 'i> {
 | 
				
			|||||||
            self.chunk_compression_level,
 | 
					            self.chunk_compression_level,
 | 
				
			||||||
            self.max_nb_chunks,
 | 
					            self.max_nb_chunks,
 | 
				
			||||||
            self.max_memory,
 | 
					            self.max_memory,
 | 
				
			||||||
 | 
					            true,
 | 
				
			||||||
        );
 | 
					        );
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if !common_prefix_fst_words.is_empty() {
 | 
					        if !common_prefix_fst_words.is_empty() {
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user