Introduce new CLI arguments to deactivate experimental post processing

This commit is contained in:
Clément Renault
2025-10-02 12:06:33 +02:00
parent 5f18a9b2ee
commit 9f79ce82af
9 changed files with 93 additions and 20 deletions

View File

@@ -101,6 +101,8 @@ pub struct GrenadParameters {
pub chunk_compression_level: Option<u32>,
pub max_memory: Option<usize>,
pub max_nb_chunks: Option<usize>,
pub experimental_no_edition_2024_for_prefix_post_processing: bool,
pub experimental_no_edition_2024_for_facet_post_processing: bool,
}
impl Default for GrenadParameters {
@@ -110,6 +112,8 @@ impl Default for GrenadParameters {
chunk_compression_level: None,
max_memory: None,
max_nb_chunks: None,
experimental_no_edition_2024_for_prefix_post_processing: false,
experimental_no_edition_2024_for_facet_post_processing: false,
}
}
}

View File

@@ -254,6 +254,12 @@ where
chunk_compression_level: self.indexer_config.chunk_compression_level,
max_memory: self.indexer_config.max_memory,
max_nb_chunks: self.indexer_config.max_nb_chunks, // default value, may be chosen.
experimental_no_edition_2024_for_prefix_post_processing: self
.indexer_config
.experimental_no_edition_2024_for_prefix_post_processing,
experimental_no_edition_2024_for_facet_post_processing: self
.indexer_config
.experimental_no_edition_2024_for_facet_post_processing,
};
let documents_chunk_size = match self.indexer_config.documents_chunk_size {
Some(chunk_size) => chunk_size,

View File

@@ -983,6 +983,12 @@ impl<'a, 'i> Transform<'a, 'i> {
chunk_compression_level: self.indexer_settings.chunk_compression_level,
max_memory: self.indexer_settings.max_memory,
max_nb_chunks: self.indexer_settings.max_nb_chunks, // default value, may be chosen.
experimental_no_edition_2024_for_prefix_post_processing: self
.indexer_settings
.experimental_no_edition_2024_for_prefix_post_processing,
experimental_no_edition_2024_for_facet_post_processing: self
.indexer_settings
.experimental_no_edition_2024_for_facet_post_processing,
};
// Once we have written all the documents, we merge everything into a Reader.

View File

@@ -18,6 +18,8 @@ pub struct IndexerConfig {
pub skip_index_budget: bool,
pub experimental_no_edition_2024_for_settings: bool,
pub experimental_no_edition_2024_for_dumps: bool,
pub experimental_no_edition_2024_for_prefix_post_processing: bool,
pub experimental_no_edition_2024_for_facet_post_processing: bool,
}
impl IndexerConfig {
@@ -27,6 +29,10 @@ impl IndexerConfig {
chunk_compression_level: self.chunk_compression_level,
max_memory: self.max_memory,
max_nb_chunks: self.max_nb_chunks,
experimental_no_edition_2024_for_prefix_post_processing: self
.experimental_no_edition_2024_for_prefix_post_processing,
experimental_no_edition_2024_for_facet_post_processing: self
.experimental_no_edition_2024_for_facet_post_processing,
}
}
}
@@ -68,6 +74,8 @@ impl Default for IndexerConfig {
skip_index_budget: false,
experimental_no_edition_2024_for_settings: false,
experimental_no_edition_2024_for_dumps: false,
experimental_no_edition_2024_for_prefix_post_processing: false,
experimental_no_edition_2024_for_facet_post_processing: false,
}
}
}

View File

@@ -42,6 +42,7 @@ where
wtxn,
facet_field_ids_delta,
&mut global_fields_ids_map,
indexing_context.grenad_parameters,
indexing_context.progress,
)?;
compute_facet_search_database(index, wtxn, global_fields_ids_map, indexing_context.progress)?;
@@ -219,6 +220,7 @@ fn compute_facet_level_database(
wtxn: &mut RwTxn,
mut facet_field_ids_delta: FacetFieldIdsDelta,
global_fields_ids_map: &mut GlobalFieldsIdsMap,
grenad_parameters: &GrenadParameters,
progress: &Progress,
) -> Result<()> {
let rtxn = index.read_txn()?;
@@ -242,8 +244,14 @@ fn compute_facet_level_database(
match delta {
FacetFieldIdDelta::Bulk => {
progress.update_progress(PostProcessingFacets::StringsBulk);
tracing::debug!(%fid, "bulk string facet processing in parallel");
generate_facet_levels(index, wtxn, fid, FacetType::String)?
if grenad_parameters.experimental_no_edition_2024_for_facet_post_processing {
tracing::debug!(%fid, "bulk string facet processing");
FacetsUpdateBulk::new_not_updating_level_0(index, vec![fid], FacetType::String)
.execute(wtxn)?
} else {
tracing::debug!(%fid, "bulk string facet processing in parallel");
generate_facet_levels(index, wtxn, fid, FacetType::String)?
}
}
FacetFieldIdDelta::Incremental(delta_data) => {
progress.update_progress(PostProcessingFacets::StringsIncremental);

View File

@@ -157,7 +157,8 @@ struct WordPrefixIntegerDocids<'i> {
database: Database<Bytes, CboRoaringBitmapCodec>,
prefix_database: Database<Bytes, CboRoaringBitmapCodec>,
max_memory_by_thread: Option<usize>,
read_uncommitted_in_parallel: bool,
/// Do not use an experimental LMDB feature to read uncommitted data in parallel.
no_experimental_post_processing: bool,
}
impl<'i> WordPrefixIntegerDocids<'i> {
@@ -172,15 +173,11 @@ impl<'i> WordPrefixIntegerDocids<'i> {
database,
prefix_database,
max_memory_by_thread: grenad_parameters.max_memory_by_thread(),
read_uncommitted_in_parallel: false,
no_experimental_post_processing: grenad_parameters
.experimental_no_edition_2024_for_prefix_post_processing,
}
}
/// Use an experimental LMDB feature to read uncommitted data in parallel.
fn read_uncommitted_in_parallel(&mut self, value: bool) {
self.read_uncommitted_in_parallel = value;
}
fn execute(
self,
wtxn: &mut heed::RwTxn,
@@ -188,10 +185,10 @@ impl<'i> WordPrefixIntegerDocids<'i> {
prefix_to_delete: &BTreeSet<Prefix>,
) -> Result<()> {
delete_prefixes(wtxn, &self.prefix_database, prefix_to_delete)?;
if self.read_uncommitted_in_parallel {
self.recompute_modified_prefixes_no_frozen(wtxn, prefix_to_compute)
} else {
if self.no_experimental_post_processing {
self.recompute_modified_prefixes(wtxn, prefix_to_compute)
} else {
self.recompute_modified_prefixes_no_frozen(wtxn, prefix_to_compute)
}
}
@@ -510,14 +507,13 @@ pub fn compute_word_prefix_fid_docids(
prefix_to_delete: &BTreeSet<Prefix>,
grenad_parameters: &GrenadParameters,
) -> Result<()> {
let mut builder = WordPrefixIntegerDocids::new(
WordPrefixIntegerDocids::new(
index,
index.word_fid_docids.remap_key_type(),
index.word_prefix_fid_docids.remap_key_type(),
grenad_parameters,
);
builder.read_uncommitted_in_parallel(true);
builder.execute(wtxn, prefix_to_compute, prefix_to_delete)
)
.execute(wtxn, prefix_to_compute, prefix_to_delete)
}
#[tracing::instrument(level = "trace", skip_all, target = "indexing::prefix")]
@@ -528,12 +524,11 @@ pub fn compute_word_prefix_position_docids(
prefix_to_delete: &BTreeSet<Prefix>,
grenad_parameters: &GrenadParameters,
) -> Result<()> {
let mut builder = WordPrefixIntegerDocids::new(
WordPrefixIntegerDocids::new(
index,
index.word_position_docids.remap_key_type(),
index.word_prefix_position_docids.remap_key_type(),
grenad_parameters,
);
builder.read_uncommitted_in_parallel(true);
builder.execute(wtxn, prefix_to_compute, prefix_to_delete)
)
.execute(wtxn, prefix_to_compute, prefix_to_delete)
}