mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-09-07 05:06:31 +00:00
Add embedder stats in batches
This commit is contained in:
@ -1,4 +1,5 @@
|
||||
use std::collections::BTreeMap;
|
||||
use std::sync::Arc;
|
||||
use std::time::Instant;
|
||||
|
||||
use deserr::Deserr;
|
||||
@ -14,6 +15,7 @@ use super::{
|
||||
};
|
||||
use crate::error::FaultSource;
|
||||
use crate::ThreadPoolNoAbort;
|
||||
use crate::progress::EmbedderStats;
|
||||
|
||||
// retrying in case of failure
|
||||
pub struct Retry {
|
||||
@ -168,19 +170,21 @@ impl Embedder {
|
||||
&self,
|
||||
texts: Vec<String>,
|
||||
deadline: Option<Instant>,
|
||||
embedder_stats: Option<Arc<EmbedderStats>>,
|
||||
) -> Result<Vec<Embedding>, EmbedError> {
|
||||
embed(&self.data, texts.as_slice(), texts.len(), Some(self.dimensions), deadline)
|
||||
embed(&self.data, texts.as_slice(), texts.len(), Some(self.dimensions), deadline, embedder_stats)
|
||||
}
|
||||
|
||||
pub fn embed_ref<S>(
|
||||
&self,
|
||||
texts: &[S],
|
||||
deadline: Option<Instant>,
|
||||
embedder_stats: Option<Arc<EmbedderStats>>,
|
||||
) -> Result<Vec<Embedding>, EmbedError>
|
||||
where
|
||||
S: AsRef<str> + Serialize,
|
||||
{
|
||||
embed(&self.data, texts, texts.len(), Some(self.dimensions), deadline)
|
||||
embed(&self.data, texts, texts.len(), Some(self.dimensions), deadline, embedder_stats)
|
||||
}
|
||||
|
||||
pub fn embed_tokens(
|
||||
@ -188,7 +192,7 @@ impl Embedder {
|
||||
tokens: &[u32],
|
||||
deadline: Option<Instant>,
|
||||
) -> Result<Embedding, EmbedError> {
|
||||
let mut embeddings = embed(&self.data, tokens, 1, Some(self.dimensions), deadline)?;
|
||||
let mut embeddings = embed(&self.data, tokens, 1, Some(self.dimensions), deadline, None)?;
|
||||
// unwrap: guaranteed that embeddings.len() == 1, otherwise the previous line terminated in error
|
||||
Ok(embeddings.pop().unwrap())
|
||||
}
|
||||
@ -197,15 +201,16 @@ impl Embedder {
|
||||
&self,
|
||||
text_chunks: Vec<Vec<String>>,
|
||||
threads: &ThreadPoolNoAbort,
|
||||
embedder_stats: Option<Arc<EmbedderStats>>,
|
||||
) -> Result<Vec<Vec<Embedding>>, EmbedError> {
|
||||
// This condition helps reduce the number of active rayon jobs
|
||||
// so that we avoid consuming all the LMDB rtxns and avoid stack overflows.
|
||||
if threads.active_operations() >= REQUEST_PARALLELISM {
|
||||
text_chunks.into_iter().map(move |chunk| self.embed(chunk, None)).collect()
|
||||
text_chunks.into_iter().map(move |chunk| self.embed(chunk, None, embedder_stats.clone())).collect()
|
||||
} else {
|
||||
threads
|
||||
.install(move || {
|
||||
text_chunks.into_par_iter().map(move |chunk| self.embed(chunk, None)).collect()
|
||||
text_chunks.into_par_iter().map(move |chunk| self.embed(chunk, None, embedder_stats.clone())).collect()
|
||||
})
|
||||
.map_err(|error| EmbedError {
|
||||
kind: EmbedErrorKind::PanicInThreadPool(error),
|
||||
@ -218,13 +223,14 @@ impl Embedder {
|
||||
&self,
|
||||
texts: &[&str],
|
||||
threads: &ThreadPoolNoAbort,
|
||||
embedder_stats: Option<Arc<EmbedderStats>>
|
||||
) -> Result<Vec<Embedding>, EmbedError> {
|
||||
// This condition helps reduce the number of active rayon jobs
|
||||
// so that we avoid consuming all the LMDB rtxns and avoid stack overflows.
|
||||
if threads.active_operations() >= REQUEST_PARALLELISM {
|
||||
let embeddings: Result<Vec<Vec<Embedding>>, _> = texts
|
||||
.chunks(self.prompt_count_in_chunk_hint())
|
||||
.map(move |chunk| self.embed_ref(chunk, None))
|
||||
.map(move |chunk| self.embed_ref(chunk, None, embedder_stats.clone()))
|
||||
.collect();
|
||||
|
||||
let embeddings = embeddings?;
|
||||
@ -234,7 +240,7 @@ impl Embedder {
|
||||
.install(move || {
|
||||
let embeddings: Result<Vec<Vec<Embedding>>, _> = texts
|
||||
.par_chunks(self.prompt_count_in_chunk_hint())
|
||||
.map(move |chunk| self.embed_ref(chunk, None))
|
||||
.map(move |chunk| self.embed_ref(chunk, None, embedder_stats.clone()))
|
||||
.collect();
|
||||
|
||||
let embeddings = embeddings?;
|
||||
@ -272,7 +278,7 @@ impl Embedder {
|
||||
}
|
||||
|
||||
fn infer_dimensions(data: &EmbedderData) -> Result<usize, NewEmbedderError> {
|
||||
let v = embed(data, ["test"].as_slice(), 1, None, None)
|
||||
let v = embed(data, ["test"].as_slice(), 1, None, None, None)
|
||||
.map_err(NewEmbedderError::could_not_determine_dimension)?;
|
||||
// unwrap: guaranteed that v.len() == 1, otherwise the previous line terminated in error
|
||||
Ok(v.first().unwrap().len())
|
||||
@ -284,6 +290,7 @@ fn embed<S>(
|
||||
expected_count: usize,
|
||||
expected_dimension: Option<usize>,
|
||||
deadline: Option<Instant>,
|
||||
embedder_stats: Option<Arc<EmbedderStats>>,
|
||||
) -> Result<Vec<Embedding>, EmbedError>
|
||||
where
|
||||
S: Serialize,
|
||||
@ -302,6 +309,9 @@ where
|
||||
let body = data.request.inject_texts(inputs);
|
||||
|
||||
for attempt in 0..10 {
|
||||
if let Some(embedder_stats) = &embedder_stats {
|
||||
embedder_stats.as_ref().total_requests.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
|
||||
}
|
||||
let response = request.clone().send_json(&body);
|
||||
let result = check_response(response, data.configuration_source).and_then(|response| {
|
||||
response_to_embedding(response, data, expected_count, expected_dimension)
|
||||
@ -311,6 +321,12 @@ where
|
||||
Ok(response) => return Ok(response),
|
||||
Err(retry) => {
|
||||
tracing::warn!("Failed: {}", retry.error);
|
||||
if let Some(embedder_stats) = &embedder_stats {
|
||||
if let Ok(mut errors) = embedder_stats.errors.write() {
|
||||
errors.0 = Some(retry.error.to_string());
|
||||
errors.1 += 1;
|
||||
}
|
||||
}
|
||||
if let Some(deadline) = deadline {
|
||||
let now = std::time::Instant::now();
|
||||
if now > deadline {
|
||||
@ -336,12 +352,26 @@ where
|
||||
std::thread::sleep(retry_duration);
|
||||
}
|
||||
|
||||
if let Some(embedder_stats) = &embedder_stats {
|
||||
embedder_stats.as_ref().total_requests.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
|
||||
}
|
||||
let response = request.send_json(&body);
|
||||
let result = check_response(response, data.configuration_source);
|
||||
result.map_err(Retry::into_error).and_then(|response| {
|
||||
let result = check_response(response, data.configuration_source).and_then(|response| {
|
||||
response_to_embedding(response, data, expected_count, expected_dimension)
|
||||
.map_err(Retry::into_error)
|
||||
})
|
||||
});
|
||||
|
||||
match result {
|
||||
Ok(response) => Ok(response),
|
||||
Err(retry) => {
|
||||
if let Some(embedder_stats) = &embedder_stats {
|
||||
if let Ok(mut errors) = embedder_stats.errors.write() {
|
||||
errors.0 = Some(retry.error.to_string());
|
||||
errors.1 += 1;
|
||||
}
|
||||
}
|
||||
Err(retry.into_error())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn check_response(
|
||||
|
Reference in New Issue
Block a user