mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-10-02 17:56:28 +00:00
Add embedder stats in batches
This commit is contained in:
@@ -17,6 +17,7 @@ use crate::constants::RESERVED_VECTORS_FIELD_NAME;
|
||||
use crate::error::FaultSource;
|
||||
use crate::fields_ids_map::metadata::FieldIdMapWithMetadata;
|
||||
use crate::index::IndexEmbeddingConfig;
|
||||
use crate::progress::EmbedderStats;
|
||||
use crate::prompt::Prompt;
|
||||
use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd};
|
||||
use crate::update::settings::InnerIndexSettingsDiff;
|
||||
@@ -682,6 +683,7 @@ pub fn extract_embeddings<R: io::Read + io::Seek>(
|
||||
embedder: Arc<Embedder>,
|
||||
embedder_name: &str,
|
||||
possible_embedding_mistakes: &PossibleEmbeddingMistakes,
|
||||
embedder_stats: Option<Arc<EmbedderStats>>,
|
||||
unused_vectors_distribution: &UnusedVectorsDistribution,
|
||||
request_threads: &ThreadPoolNoAbort,
|
||||
) -> Result<grenad::Reader<BufReader<File>>> {
|
||||
@@ -724,6 +726,7 @@ pub fn extract_embeddings<R: io::Read + io::Seek>(
|
||||
std::mem::replace(&mut chunks, Vec::with_capacity(n_chunks)),
|
||||
embedder_name,
|
||||
possible_embedding_mistakes,
|
||||
embedder_stats.clone(),
|
||||
unused_vectors_distribution,
|
||||
request_threads,
|
||||
)?;
|
||||
@@ -746,6 +749,7 @@ pub fn extract_embeddings<R: io::Read + io::Seek>(
|
||||
std::mem::take(&mut chunks),
|
||||
embedder_name,
|
||||
possible_embedding_mistakes,
|
||||
embedder_stats.clone(),
|
||||
unused_vectors_distribution,
|
||||
request_threads,
|
||||
)?;
|
||||
@@ -764,6 +768,7 @@ pub fn extract_embeddings<R: io::Read + io::Seek>(
|
||||
vec![std::mem::take(&mut current_chunk)],
|
||||
embedder_name,
|
||||
possible_embedding_mistakes,
|
||||
embedder_stats,
|
||||
unused_vectors_distribution,
|
||||
request_threads,
|
||||
)?;
|
||||
@@ -783,10 +788,11 @@ fn embed_chunks(
|
||||
text_chunks: Vec<Vec<String>>,
|
||||
embedder_name: &str,
|
||||
possible_embedding_mistakes: &PossibleEmbeddingMistakes,
|
||||
embedder_stats: Option<Arc<EmbedderStats>>,
|
||||
unused_vectors_distribution: &UnusedVectorsDistribution,
|
||||
request_threads: &ThreadPoolNoAbort,
|
||||
) -> Result<Vec<Vec<Embedding>>> {
|
||||
match embedder.embed_index(text_chunks, request_threads) {
|
||||
match embedder.embed_index(text_chunks, request_threads, embedder_stats) {
|
||||
Ok(chunks) => Ok(chunks),
|
||||
Err(error) => {
|
||||
if let FaultSource::Bug = error.fault {
|
||||
|
@@ -31,6 +31,7 @@ use self::extract_word_position_docids::extract_word_position_docids;
|
||||
use super::helpers::{as_cloneable_grenad, CursorClonableMmap, GrenadParameters};
|
||||
use super::{helpers, TypedChunk};
|
||||
use crate::index::IndexEmbeddingConfig;
|
||||
use crate::progress::EmbedderStats;
|
||||
use crate::update::settings::InnerIndexSettingsDiff;
|
||||
use crate::vector::error::PossibleEmbeddingMistakes;
|
||||
use crate::{FieldId, Result, ThreadPoolNoAbort, ThreadPoolNoAbortBuilder};
|
||||
@@ -49,6 +50,7 @@ pub(crate) fn data_from_obkv_documents(
|
||||
settings_diff: Arc<InnerIndexSettingsDiff>,
|
||||
max_positions_per_attributes: Option<u32>,
|
||||
possible_embedding_mistakes: Arc<PossibleEmbeddingMistakes>,
|
||||
embedder_stats: Option<Arc<EmbedderStats>>,
|
||||
) -> Result<()> {
|
||||
let (original_pipeline_result, flattened_pipeline_result): (Result<_>, Result<_>) = rayon::join(
|
||||
|| {
|
||||
@@ -62,6 +64,7 @@ pub(crate) fn data_from_obkv_documents(
|
||||
embedders_configs.clone(),
|
||||
settings_diff.clone(),
|
||||
possible_embedding_mistakes.clone(),
|
||||
embedder_stats.clone(),
|
||||
)
|
||||
})
|
||||
.collect::<Result<()>>()
|
||||
@@ -231,6 +234,7 @@ fn send_original_documents_data(
|
||||
embedders_configs: Arc<Vec<IndexEmbeddingConfig>>,
|
||||
settings_diff: Arc<InnerIndexSettingsDiff>,
|
||||
possible_embedding_mistakes: Arc<PossibleEmbeddingMistakes>,
|
||||
embedder_stats: Option<Arc<EmbedderStats>>,
|
||||
) -> Result<()> {
|
||||
let original_documents_chunk =
|
||||
original_documents_chunk.and_then(|c| unsafe { as_cloneable_grenad(&c) })?;
|
||||
@@ -270,6 +274,7 @@ fn send_original_documents_data(
|
||||
embedder.clone(),
|
||||
&embedder_name,
|
||||
&possible_embedding_mistakes,
|
||||
embedder_stats.clone(),
|
||||
&unused_vectors_distribution,
|
||||
request_threads(),
|
||||
) {
|
||||
|
Reference in New Issue
Block a user