Re-integrate embedder stats

This commit is contained in:
ManyTheFish
2025-06-30 09:46:19 +02:00
parent d35b2d8d33
commit 6db5939f84
17 changed files with 45 additions and 23 deletions

View File

@ -303,6 +303,7 @@ pub struct SettingsChangeEmbeddingExtractor<'a, 'b> {
old_embedders: &'a EmbeddingConfigs,
embedder_actions: &'a BTreeMap<String, EmbedderAction>,
embedder_category_id: &'a std::collections::HashMap<String, u8>,
embedder_stats: &'a EmbedderStats,
sender: EmbeddingSender<'a, 'b>,
possible_embedding_mistakes: PossibleEmbeddingMistakes,
threads: &'a ThreadPoolNoAbort,
@ -314,6 +315,7 @@ impl<'a, 'b> SettingsChangeEmbeddingExtractor<'a, 'b> {
old_embedders: &'a EmbeddingConfigs,
embedder_actions: &'a BTreeMap<String, EmbedderAction>,
embedder_category_id: &'a std::collections::HashMap<String, u8>,
embedder_stats: &'a EmbedderStats,
sender: EmbeddingSender<'a, 'b>,
field_distribution: &'a FieldDistribution,
threads: &'a ThreadPoolNoAbort,
@ -324,6 +326,7 @@ impl<'a, 'b> SettingsChangeEmbeddingExtractor<'a, 'b> {
old_embedders,
embedder_actions,
embedder_category_id,
embedder_stats,
sender,
threads,
possible_embedding_mistakes,
@ -371,6 +374,7 @@ impl<'extractor> SettingsChangeExtractor<'extractor> for SettingsChangeEmbedding
prompt,
context.data,
&self.possible_embedding_mistakes,
self.embedder_stats,
self.threads,
self.sender,
&context.doc_alloc,

View File

@ -333,6 +333,7 @@ pub(super) fn extract_all_settings_changes<MSP, SD>(
field_distribution: &mut BTreeMap<String, u64>,
mut index_embeddings: Vec<IndexEmbeddingConfig>,
modified_docids: &mut RoaringBitmap,
embedder_stats: &EmbedderStats,
) -> Result<Vec<IndexEmbeddingConfig>>
where
MSP: Fn() -> bool + Sync,
@ -371,6 +372,7 @@ where
settings_delta.old_embedders(),
settings_delta.embedder_actions(),
settings_delta.new_embedder_category_id(),
embedder_stats,
embedding_sender,
field_distribution,
request_threads(),

View File

@ -1,6 +1,6 @@
use std::collections::BTreeMap;
use std::sync::atomic::AtomicBool;
use std::sync::{Once, RwLock};
use std::sync::{Arc, Once, RwLock};
use std::thread::{self, Builder};
use big_s::S;
@ -20,8 +20,8 @@ use super::steps::IndexingStep;
use super::thread_local::ThreadLocal;
use crate::documents::PrimaryKey;
use crate::fields_ids_map::metadata::{FieldIdMapWithMetadata, MetadataBuilder};
use crate::update::settings::SettingsDelta;
use crate::progress::{EmbedderStats, Progress};
use crate::update::settings::SettingsDelta;
use crate::update::GrenadParameters;
use crate::vector::settings::{EmbedderAction, WriteBackToDocuments};
use crate::vector::{ArroyWrapper, Embedder, EmbeddingConfigs};
@ -213,6 +213,7 @@ pub fn reindex<'indexer, 'index, MSP, SD>(
settings_delta: &'indexer SD,
must_stop_processing: &'indexer MSP,
progress: &'indexer Progress,
embedder_stats: Arc<EmbedderStats>,
) -> Result<ChannelCongestion>
where
MSP: Fn() -> bool + Sync,
@ -274,6 +275,7 @@ where
field_distribution,
index_embeddings,
modified_docids,
&embedder_stats,
)
})
.unwrap()