Support not specifying an embedder in the vector filter

This commit is contained in:
Mubelotix
2025-07-07 18:34:24 +02:00
parent 2052537681
commit 9c60e9689f

View File

@ -6,7 +6,7 @@ use crate::vector::{ArroyStats, ArroyWrapper};
use crate::{Index, Result};
pub(super) struct VectorFilter<'a> {
embedder_name: &'a str,
embedder_name: Option<&'a str>,
fragment_name: Option<&'a str>,
user_provided: bool,
// TODO: not_user_provided: bool,
@ -14,12 +14,14 @@ pub(super) struct VectorFilter<'a> {
impl<'a> VectorFilter<'a> {
pub(super) fn matches(value: &str, op: &Condition) -> bool {
matches!(op, Condition::Exists) && value.starts_with("_vectors.")
matches!(op, Condition::Exists) && (value.starts_with("_vectors.") || value == "_vectors")
}
/// Parses a vector filter string.
///
/// Valid formats:
/// - `_vectors`
/// - `_vectors.userProvided`
/// - `_vectors.{embedder_name}`
/// - `_vectors.{embedder_name}.userProvided`
/// - `_vectors.{embedder_name}.fragments.{fragment_name}`
@ -33,11 +35,7 @@ impl<'a> VectorFilter<'a> {
))));
}
let embedder_name = split.next().ok_or_else(|| {
Error::UserError(UserError::InvalidFilter(String::from(
"Vector filter must contain an embedder name",
)))
})?;
let embedder_name = split.next();
let mut fragment_name = None;
if split.peek() == Some(&"fragments") {
@ -74,44 +72,63 @@ impl<'a> VectorFilter<'a> {
let index_embedding_configs = index.embedding_configs();
let embedding_configs = index_embedding_configs.embedding_configs(rtxn)?;
let Some(embedder_config) =
embedding_configs.iter().find(|config| config.name == self.embedder_name)
else {
return Ok(RoaringBitmap::new());
};
let Some(embedder_info) =
index_embedding_configs.embedder_info(rtxn, self.embedder_name)?
else {
return Ok(RoaringBitmap::new());
};
let arroy_wrapper = ArroyWrapper::new(
index.vector_arroy,
embedder_info.embedder_id,
embedder_config.config.quantized(),
);
let mut docids = if let Some(fragment_name) = self.fragment_name {
let Some(fragment_config) = embedder_config
.fragments
.as_slice()
.iter()
.find(|fragment| fragment.name == fragment_name)
let mut embedders = Vec::new();
if let Some(embedder_name) = self.embedder_name {
let Some(embedder_config) =
embedding_configs.iter().find(|config| config.name == embedder_name)
else {
return Ok(RoaringBitmap::new());
};
arroy_wrapper.items_in_store(rtxn, fragment_config.id, |bitmap| bitmap.clone())?
let Some(embedder_info) =
index_embedding_configs.embedder_info(rtxn, embedder_name)?
else {
return Ok(RoaringBitmap::new());
};
embedders.push((embedder_config, embedder_info));
} else {
let mut stats = ArroyStats::default();
arroy_wrapper.aggregate_stats(rtxn, &mut stats)?;
stats.documents
for embedder_config in embedding_configs.iter() {
let Some(embedder_info) =
index_embedding_configs.embedder_info(rtxn, &embedder_config.name)?
else {
continue;
};
embedders.push((embedder_config, embedder_info));
}
};
let mut docids = RoaringBitmap::new();
for (embedder_config, embedder_info) in embedders {
let arroy_wrapper = ArroyWrapper::new(
index.vector_arroy,
embedder_info.embedder_id,
embedder_config.config.quantized(),
);
// FIXME: performance
if self.user_provided {
let user_provided_docsids = embedder_info.embedding_status.user_provided_docids();
docids &= user_provided_docsids;
let mut new_docids = if let Some(fragment_name) = self.fragment_name {
let Some(fragment_config) = embedder_config
.fragments
.as_slice()
.iter()
.find(|fragment| fragment.name == fragment_name)
else {
return Ok(RoaringBitmap::new());
};
arroy_wrapper.items_in_store(rtxn, fragment_config.id, |bitmap| bitmap.clone())?
} else {
let mut stats = ArroyStats::default();
arroy_wrapper.aggregate_stats(rtxn, &mut stats)?;
stats.documents
};
// FIXME: performance
if self.user_provided {
let user_provided_docsids = embedder_info.embedding_status.user_provided_docids();
new_docids &= user_provided_docsids;
}
docids |= new_docids;
}
if let Some(universe) = universe {