mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-09-07 05:06:31 +00:00
Move embedder stats out of progress
This commit is contained in:
@ -1,4 +1,5 @@
|
||||
use std::cell::RefCell;
|
||||
use std::f32::consts::E;
|
||||
use std::{cell::RefCell, sync::Arc};
|
||||
|
||||
use bumpalo::collections::Vec as BVec;
|
||||
use bumpalo::Bump;
|
||||
@ -6,6 +7,7 @@ use hashbrown::{DefaultHashBuilder, HashMap};
|
||||
|
||||
use super::cache::DelAddRoaringBitmap;
|
||||
use crate::error::FaultSource;
|
||||
use crate::progress::EmbedderStats;
|
||||
use crate::prompt::Prompt;
|
||||
use crate::update::new::channel::EmbeddingSender;
|
||||
use crate::update::new::indexer::document_changes::{DocumentChangeContext, Extractor};
|
||||
@ -22,6 +24,7 @@ pub struct EmbeddingExtractor<'a, 'b> {
|
||||
embedders: &'a EmbeddingConfigs,
|
||||
sender: EmbeddingSender<'a, 'b>,
|
||||
possible_embedding_mistakes: PossibleEmbeddingMistakes,
|
||||
embedder_stats: Option<Arc<EmbedderStats>>,
|
||||
threads: &'a ThreadPoolNoAbort,
|
||||
}
|
||||
|
||||
@ -30,10 +33,11 @@ impl<'a, 'b> EmbeddingExtractor<'a, 'b> {
|
||||
embedders: &'a EmbeddingConfigs,
|
||||
sender: EmbeddingSender<'a, 'b>,
|
||||
field_distribution: &'a FieldDistribution,
|
||||
embedder_stats: Option<Arc<EmbedderStats>>,
|
||||
threads: &'a ThreadPoolNoAbort,
|
||||
) -> Self {
|
||||
let possible_embedding_mistakes = PossibleEmbeddingMistakes::new(field_distribution);
|
||||
Self { embedders, sender, threads, possible_embedding_mistakes }
|
||||
Self { embedders, sender, threads, possible_embedding_mistakes, embedder_stats }
|
||||
}
|
||||
}
|
||||
|
||||
@ -75,6 +79,7 @@ impl<'extractor> Extractor<'extractor> for EmbeddingExtractor<'_, '_> {
|
||||
prompt,
|
||||
context.data,
|
||||
&self.possible_embedding_mistakes,
|
||||
self.embedder_stats.clone(),
|
||||
self.threads,
|
||||
self.sender,
|
||||
&context.doc_alloc,
|
||||
@ -307,6 +312,7 @@ struct Chunks<'a, 'b, 'extractor> {
|
||||
dimensions: usize,
|
||||
prompt: &'a Prompt,
|
||||
possible_embedding_mistakes: &'a PossibleEmbeddingMistakes,
|
||||
embedder_stats: Option<Arc<EmbedderStats>>,
|
||||
user_provided: &'a RefCell<EmbeddingExtractorData<'extractor>>,
|
||||
threads: &'a ThreadPoolNoAbort,
|
||||
sender: EmbeddingSender<'a, 'b>,
|
||||
@ -322,6 +328,7 @@ impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
|
||||
prompt: &'a Prompt,
|
||||
user_provided: &'a RefCell<EmbeddingExtractorData<'extractor>>,
|
||||
possible_embedding_mistakes: &'a PossibleEmbeddingMistakes,
|
||||
embedder_stats: Option<Arc<EmbedderStats>>,
|
||||
threads: &'a ThreadPoolNoAbort,
|
||||
sender: EmbeddingSender<'a, 'b>,
|
||||
doc_alloc: &'a Bump,
|
||||
@ -336,6 +343,7 @@ impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
|
||||
embedder,
|
||||
prompt,
|
||||
possible_embedding_mistakes,
|
||||
embedder_stats,
|
||||
threads,
|
||||
sender,
|
||||
embedder_id,
|
||||
@ -371,6 +379,7 @@ impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
|
||||
self.embedder_id,
|
||||
self.embedder_name,
|
||||
self.possible_embedding_mistakes,
|
||||
self.embedder_stats.clone(),
|
||||
unused_vectors_distribution,
|
||||
self.threads,
|
||||
self.sender,
|
||||
@ -389,6 +398,7 @@ impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
|
||||
self.embedder_id,
|
||||
self.embedder_name,
|
||||
self.possible_embedding_mistakes,
|
||||
self.embedder_stats.clone(),
|
||||
unused_vectors_distribution,
|
||||
self.threads,
|
||||
self.sender,
|
||||
@ -407,6 +417,7 @@ impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
|
||||
embedder_id: u8,
|
||||
embedder_name: &str,
|
||||
possible_embedding_mistakes: &PossibleEmbeddingMistakes,
|
||||
embedder_stats: Option<Arc<EmbedderStats>>,
|
||||
unused_vectors_distribution: &UnusedVectorsDistributionBump,
|
||||
threads: &ThreadPoolNoAbort,
|
||||
sender: EmbeddingSender<'a, 'b>,
|
||||
@ -450,7 +461,7 @@ impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
|
||||
return Err(crate::Error::UserError(crate::UserError::DocumentEmbeddingError(msg)));
|
||||
}
|
||||
|
||||
let res = match embedder.embed_index_ref(texts.as_slice(), threads, None) {
|
||||
let res = match embedder.embed_index_ref(texts.as_slice(), threads, embedder_stats) {
|
||||
Ok(embeddings) => {
|
||||
for (docid, embedding) in ids.into_iter().zip(embeddings) {
|
||||
sender.set_vector(*docid, embedder_id, embedding).unwrap();
|
||||
|
@ -1,6 +1,7 @@
|
||||
use std::collections::BTreeMap;
|
||||
use std::sync::atomic::AtomicBool;
|
||||
use std::sync::OnceLock;
|
||||
use std::sync::Arc;
|
||||
|
||||
use bumpalo::Bump;
|
||||
use roaring::RoaringBitmap;
|
||||
@ -13,6 +14,7 @@ use super::super::thread_local::{FullySend, ThreadLocal};
|
||||
use super::super::FacetFieldIdsDelta;
|
||||
use super::document_changes::{extract, DocumentChanges, IndexingContext};
|
||||
use crate::index::IndexEmbeddingConfig;
|
||||
use crate::progress::EmbedderStats;
|
||||
use crate::progress::MergingWordCache;
|
||||
use crate::proximity::ProximityPrecision;
|
||||
use crate::update::new::extract::EmbeddingExtractor;
|
||||
@ -34,6 +36,7 @@ pub(super) fn extract_all<'pl, 'extractor, DC, MSP>(
|
||||
mut index_embeddings: Vec<IndexEmbeddingConfig>,
|
||||
document_ids: &mut RoaringBitmap,
|
||||
modified_docids: &mut RoaringBitmap,
|
||||
embedder_stats: Arc<EmbedderStats>,
|
||||
) -> Result<(FacetFieldIdsDelta, Vec<IndexEmbeddingConfig>)>
|
||||
where
|
||||
DC: DocumentChanges<'pl>,
|
||||
@ -245,6 +248,7 @@ where
|
||||
embedders,
|
||||
embedding_sender,
|
||||
field_distribution,
|
||||
Some(embedder_stats),
|
||||
request_threads(),
|
||||
);
|
||||
let mut datastore = ThreadLocal::with_capacity(rayon::current_num_threads());
|
||||
|
@ -1,6 +1,7 @@
|
||||
use std::sync::atomic::AtomicBool;
|
||||
use std::sync::{Once, RwLock};
|
||||
use std::thread::{self, Builder};
|
||||
use std::sync::Arc;
|
||||
|
||||
use big_s::S;
|
||||
use document_changes::{DocumentChanges, IndexingContext};
|
||||
@ -19,7 +20,7 @@ use super::steps::IndexingStep;
|
||||
use super::thread_local::ThreadLocal;
|
||||
use crate::documents::PrimaryKey;
|
||||
use crate::fields_ids_map::metadata::{FieldIdMapWithMetadata, MetadataBuilder};
|
||||
use crate::progress::Progress;
|
||||
use crate::progress::{EmbedderStats, Progress};
|
||||
use crate::update::GrenadParameters;
|
||||
use crate::vector::{ArroyWrapper, EmbeddingConfigs};
|
||||
use crate::{FieldsIdsMap, GlobalFieldsIdsMap, Index, InternalError, Result, ThreadPoolNoAbort};
|
||||
@ -55,6 +56,7 @@ pub fn index<'pl, 'indexer, 'index, DC, MSP>(
|
||||
embedders: EmbeddingConfigs,
|
||||
must_stop_processing: &'indexer MSP,
|
||||
progress: &'indexer Progress,
|
||||
embedder_stats: Arc<EmbedderStats>,
|
||||
) -> Result<ChannelCongestion>
|
||||
where
|
||||
DC: DocumentChanges<'pl>,
|
||||
@ -158,6 +160,7 @@ where
|
||||
index_embeddings,
|
||||
document_ids,
|
||||
modified_docids,
|
||||
embedder_stats,
|
||||
)
|
||||
})
|
||||
.unwrap()
|
||||
|
Reference in New Issue
Block a user