Implement for multi-search

This commit is contained in:
Mubelotix
2025-07-25 11:45:51 +02:00
parent 26da478b5b
commit a7fe2abca4
6 changed files with 318 additions and 26 deletions

View File

@ -230,7 +230,14 @@ impl Search<'_> {
}
// no embedder, no semantic search
let Some(SemanticSearch { vector, embedder_name, embedder, quantized, media }) = semantic
let Some(SemanticSearch {
vector,
mut auto_embedded,
embedder_name,
embedder,
quantized,
media,
}) = semantic
else {
return Ok(return_keyword_results(self.limit, self.offset, keyword_results));
};
@ -253,7 +260,10 @@ impl Search<'_> {
let deadline = std::time::Instant::now() + std::time::Duration::from_secs(3);
match embedder.embed_search(query, Some(deadline)) {
Ok(embedding) => embedding,
Ok(embedding) => {
auto_embedded = true;
embedding
}
Err(error) => {
tracing::error!(error=%error, "Embedding failed");
return Ok(return_keyword_results(
@ -268,6 +278,7 @@ impl Search<'_> {
search.semantic = Some(SemanticSearch {
vector: Some(vector_query.clone()),
auto_embedded,
embedder_name,
embedder,
quantized,
@ -280,7 +291,7 @@ impl Search<'_> {
let keyword_results = ScoreWithRatioResult::new(keyword_results, 1.0 - semantic_ratio);
let vector_results = ScoreWithRatioResult::new(vector_results, semantic_ratio);
let (mut merge_results, semantic_hit_count) = ScoreWithRatioResult::merge(
let (merge_results, semantic_hit_count) = ScoreWithRatioResult::merge(
vector_results,
keyword_results,
self.offset,
@ -289,7 +300,6 @@ impl Search<'_> {
search.index,
search.rtxn,
)?;
merge_results.query_vector = Some(vector_query);
assert!(merge_results.documents_ids.len() <= self.limit);
Ok((merge_results, Some(semantic_hit_count)))
}

View File

@ -32,6 +32,7 @@ pub mod similar;
#[derive(Debug, Clone)]
pub struct SemanticSearch {
vector: Option<Vec<f32>>,
auto_embedded: bool,
media: Option<serde_json::Value>,
embedder_name: String,
embedder: Arc<Embedder>,
@ -97,7 +98,33 @@ impl<'a> Search<'a> {
vector: Option<Embedding>,
media: Option<serde_json::Value>,
) -> &mut Search<'a> {
self.semantic = Some(SemanticSearch { embedder_name, embedder, quantized, vector, media });
self.semantic = Some(SemanticSearch {
embedder_name,
auto_embedded: false,
embedder,
quantized,
vector,
media,
});
self
}
pub fn semantic_auto_embedded(
&mut self,
embedder_name: String,
embedder: Arc<Embedder>,
quantized: bool,
vector: Option<Embedding>,
media: Option<serde_json::Value>,
) -> &mut Search<'a> {
self.semantic = Some(SemanticSearch {
embedder_name,
auto_embedded: true,
embedder,
quantized,
vector,
media,
});
self
}
@ -225,6 +252,7 @@ impl<'a> Search<'a> {
}
let universe = filtered_universe(ctx.index, ctx.txn, &self.filter)?;
let mut query_vector = None;
let PartialSearchResult {
located_query_terms,
candidates,
@ -235,26 +263,32 @@ impl<'a> Search<'a> {
} = match self.semantic.as_ref() {
Some(SemanticSearch {
vector: Some(vector),
auto_embedded,
embedder_name,
embedder,
quantized,
media: _,
}) => execute_vector_search(
&mut ctx,
vector,
self.scoring_strategy,
universe,
&self.sort_criteria,
&self.distinct,
self.geo_param,
self.offset,
self.limit,
embedder_name,
embedder,
*quantized,
self.time_budget.clone(),
self.ranking_score_threshold,
)?,
}) => {
if *auto_embedded {
query_vector = Some(vector.clone());
}
execute_vector_search(
&mut ctx,
vector,
self.scoring_strategy,
universe,
&self.sort_criteria,
&self.distinct,
self.geo_param,
self.offset,
self.limit,
embedder_name,
embedder,
*quantized,
self.time_budget.clone(),
self.ranking_score_threshold,
)?
}
_ => execute_search(
&mut ctx,
self.query.as_deref(),
@ -295,7 +329,7 @@ impl<'a> Search<'a> {
documents_ids,
degraded,
used_negative_operator,
query_vector: None,
query_vector,
})
}
}