Move embedder stats out of progress

This commit is contained in:
Mubelotix
2025-06-23 15:24:14 +02:00
parent 4cadc8113b
commit 4925b30196
30 changed files with 255 additions and 69 deletions

View File

@ -687,6 +687,8 @@ pub fn extract_embeddings<R: io::Read + io::Seek>(
unused_vectors_distribution: &UnusedVectorsDistribution,
request_threads: &ThreadPoolNoAbort,
) -> Result<grenad::Reader<BufReader<File>>> {
println!("Extract embedder stats {}:", embedder_stats.is_some());
let n_chunks = embedder.chunk_count_hint(); // chunk level parallelism
let n_vectors_per_chunk = embedder.prompt_count_in_chunk_hint(); // number of vectors in a single chunk

View File

@ -50,7 +50,7 @@ pub(crate) fn data_from_obkv_documents(
settings_diff: Arc<InnerIndexSettingsDiff>,
max_positions_per_attributes: Option<u32>,
possible_embedding_mistakes: Arc<PossibleEmbeddingMistakes>,
embedder_stats: Option<Arc<EmbedderStats>>,
embedder_stats: Arc<EmbedderStats>,
) -> Result<()> {
let (original_pipeline_result, flattened_pipeline_result): (Result<_>, Result<_>) = rayon::join(
|| {
@ -234,7 +234,7 @@ fn send_original_documents_data(
embedders_configs: Arc<Vec<IndexEmbeddingConfig>>,
settings_diff: Arc<InnerIndexSettingsDiff>,
possible_embedding_mistakes: Arc<PossibleEmbeddingMistakes>,
embedder_stats: Option<Arc<EmbedderStats>>,
embedder_stats: Arc<EmbedderStats>,
) -> Result<()> {
let original_documents_chunk =
original_documents_chunk.and_then(|c| unsafe { as_cloneable_grenad(&c) })?;
@ -274,7 +274,7 @@ fn send_original_documents_data(
embedder.clone(),
&embedder_name,
&possible_embedding_mistakes,
embedder_stats.clone(),
Some(embedder_stats.clone()),
&unused_vectors_distribution,
request_threads(),
) {

View File

@ -81,7 +81,7 @@ pub struct IndexDocuments<'t, 'i, 'a, FP, FA> {
added_documents: u64,
deleted_documents: u64,
embedders: EmbeddingConfigs,
embedder_stats: Option<Arc<EmbedderStats>>,
embedder_stats: Arc<EmbedderStats>,
}
#[derive(Default, Debug, Clone)]
@ -104,7 +104,7 @@ where
config: IndexDocumentsConfig,
progress: FP,
should_abort: FA,
embedder_stats: Option<Arc<EmbedderStats>>,
embedder_stats: Arc<EmbedderStats>,
) -> Result<IndexDocuments<'t, 'i, 'a, FP, FA>> {
let transform = Some(Transform::new(
wtxn,
@ -2030,6 +2030,7 @@ mod tests {
EmbeddingConfigs::default(),
&|| false,
&Progress::default(),
Default::default(),
)
.unwrap();
wtxn.commit().unwrap();
@ -2117,6 +2118,7 @@ mod tests {
EmbeddingConfigs::default(),
&|| false,
&Progress::default(),
Default::default(),
)
.unwrap();
wtxn.commit().unwrap();
@ -2302,6 +2304,7 @@ mod tests {
embedders,
&|| false,
&Progress::default(),
Default::default(),
)
.unwrap();
wtxn.commit().unwrap();
@ -2364,6 +2367,7 @@ mod tests {
embedders,
&|| false,
&Progress::default(),
Default::default(),
)
.unwrap();
wtxn.commit().unwrap();
@ -2417,6 +2421,7 @@ mod tests {
embedders,
&|| false,
&Progress::default(),
Default::default(),
)
.unwrap();
wtxn.commit().unwrap();
@ -2469,6 +2474,7 @@ mod tests {
embedders,
&|| false,
&Progress::default(),
Default::default(),
)
.unwrap();
wtxn.commit().unwrap();
@ -2523,6 +2529,7 @@ mod tests {
embedders,
&|| false,
&Progress::default(),
Default::default(),
)
.unwrap();
wtxn.commit().unwrap();
@ -2582,6 +2589,7 @@ mod tests {
embedders,
&|| false,
&Progress::default(),
Default::default(),
)
.unwrap();
wtxn.commit().unwrap();
@ -2634,6 +2642,7 @@ mod tests {
embedders,
&|| false,
&Progress::default(),
Default::default(),
)
.unwrap();
wtxn.commit().unwrap();
@ -2686,6 +2695,7 @@ mod tests {
embedders,
&|| false,
&Progress::default(),
Default::default(),
)
.unwrap();
wtxn.commit().unwrap();
@ -2884,6 +2894,7 @@ mod tests {
embedders,
&|| false,
&Progress::default(),
Default::default(),
)
.unwrap();
wtxn.commit().unwrap();
@ -2943,6 +2954,7 @@ mod tests {
embedders,
&|| false,
&Progress::default(),
Default::default(),
)
.unwrap();
wtxn.commit().unwrap();
@ -2999,6 +3011,7 @@ mod tests {
embedders,
&|| false,
&Progress::default(),
Default::default(),
)
.unwrap();
wtxn.commit().unwrap();