Add deadline of 3 seconds to embedding requests made in the context of hybrid search

This commit is contained in:
Louis Dureuil
2024-11-06 09:24:51 +01:00
parent a05e448cf8
commit e9d17136b2
7 changed files with 80 additions and 31 deletions

View File

@@ -1,4 +1,5 @@
use std::collections::BTreeMap;
use std::time::Instant;
use deserr::Deserr;
use rand::Rng;
@@ -153,19 +154,31 @@ impl Embedder {
Ok(Self { data, dimensions, distribution: options.distribution })
}
pub fn embed(&self, texts: Vec<String>) -> Result<Vec<Embedding>, EmbedError> {
embed(&self.data, texts.as_slice(), texts.len(), Some(self.dimensions))
pub fn embed(
&self,
texts: Vec<String>,
deadline: Option<Instant>,
) -> Result<Vec<Embedding>, EmbedError> {
embed(&self.data, texts.as_slice(), texts.len(), Some(self.dimensions), deadline)
}
pub fn embed_ref<S>(&self, texts: &[S]) -> Result<Vec<Embedding>, EmbedError>
pub fn embed_ref<S>(
&self,
texts: &[S],
deadline: Option<Instant>,
) -> Result<Vec<Embedding>, EmbedError>
where
S: AsRef<str> + Serialize,
{
embed(&self.data, texts, texts.len(), Some(self.dimensions))
embed(&self.data, texts, texts.len(), Some(self.dimensions), deadline)
}
pub fn embed_tokens(&self, tokens: &[usize]) -> Result<Embedding, EmbedError> {
let mut embeddings = embed(&self.data, tokens, 1, Some(self.dimensions))?;
pub fn embed_tokens(
&self,
tokens: &[usize],
deadline: Option<Instant>,
) -> Result<Embedding, EmbedError> {
let mut embeddings = embed(&self.data, tokens, 1, Some(self.dimensions), deadline)?;
// unwrap: guaranteed that embeddings.len() == 1, otherwise the previous line terminated in error
Ok(embeddings.pop().unwrap())
}
@@ -177,7 +190,7 @@ impl Embedder {
) -> Result<Vec<Vec<Embedding>>, EmbedError> {
threads
.install(move || {
text_chunks.into_par_iter().map(move |chunk| self.embed(chunk)).collect()
text_chunks.into_par_iter().map(move |chunk| self.embed(chunk, None)).collect()
})
.map_err(|error| EmbedError {
kind: EmbedErrorKind::PanicInThreadPool(error),
@@ -194,7 +207,7 @@ impl Embedder {
.install(move || {
let embeddings: Result<Vec<Vec<Embedding>>, _> = texts
.par_chunks(self.prompt_count_in_chunk_hint())
.map(move |chunk| self.embed_ref(chunk))
.map(move |chunk| self.embed_ref(chunk, None))
.collect();
let embeddings = embeddings?;
@@ -227,7 +240,7 @@ impl Embedder {
}
fn infer_dimensions(data: &EmbedderData) -> Result<usize, NewEmbedderError> {
let v = embed(data, ["test"].as_slice(), 1, None)
let v = embed(data, ["test"].as_slice(), 1, None, None)
.map_err(NewEmbedderError::could_not_determine_dimension)?;
// unwrap: guaranteed that v.len() == 1, otherwise the previous line terminated in error
Ok(v.first().unwrap().len())
@@ -238,6 +251,7 @@ fn embed<S>(
inputs: &[S],
expected_count: usize,
expected_dimension: Option<usize>,
deadline: Option<Instant>,
) -> Result<Vec<Embedding>, EmbedError>
where
S: Serialize,
@@ -265,7 +279,18 @@ where
Ok(response) => return Ok(response),
Err(retry) => {
tracing::warn!("Failed: {}", retry.error);
retry.into_duration(attempt)
if let Some(deadline) = deadline {
let now = std::time::Instant::now();
if now > deadline {
tracing::warn!("Could not embed due to deadline");
return Err(retry.into_error());
}
let duration_to_deadline = deadline - now;
retry.into_duration(attempt).map(|duration| duration.min(duration_to_deadline))
} else {
retry.into_duration(attempt)
}
}
}?;