Support not specifying an embedder in the vector filter

This commit is contained in:
Mubelotix
2025-07-07 18:34:24 +02:00
parent 2052537681
commit 9c60e9689f

View File

@ -6,7 +6,7 @@ use crate::vector::{ArroyStats, ArroyWrapper};
use crate::{Index, Result}; use crate::{Index, Result};
pub(super) struct VectorFilter<'a> { pub(super) struct VectorFilter<'a> {
embedder_name: &'a str, embedder_name: Option<&'a str>,
fragment_name: Option<&'a str>, fragment_name: Option<&'a str>,
user_provided: bool, user_provided: bool,
// TODO: not_user_provided: bool, // TODO: not_user_provided: bool,
@ -14,12 +14,14 @@ pub(super) struct VectorFilter<'a> {
impl<'a> VectorFilter<'a> { impl<'a> VectorFilter<'a> {
pub(super) fn matches(value: &str, op: &Condition) -> bool { pub(super) fn matches(value: &str, op: &Condition) -> bool {
matches!(op, Condition::Exists) && value.starts_with("_vectors.") matches!(op, Condition::Exists) && (value.starts_with("_vectors.") || value == "_vectors")
} }
/// Parses a vector filter string. /// Parses a vector filter string.
/// ///
/// Valid formats: /// Valid formats:
/// - `_vectors`
/// - `_vectors.userProvided`
/// - `_vectors.{embedder_name}` /// - `_vectors.{embedder_name}`
/// - `_vectors.{embedder_name}.userProvided` /// - `_vectors.{embedder_name}.userProvided`
/// - `_vectors.{embedder_name}.fragments.{fragment_name}` /// - `_vectors.{embedder_name}.fragments.{fragment_name}`
@ -33,11 +35,7 @@ impl<'a> VectorFilter<'a> {
)))); ))));
} }
let embedder_name = split.next().ok_or_else(|| { let embedder_name = split.next();
Error::UserError(UserError::InvalidFilter(String::from(
"Vector filter must contain an embedder name",
)))
})?;
let mut fragment_name = None; let mut fragment_name = None;
if split.peek() == Some(&"fragments") { if split.peek() == Some(&"fragments") {
@ -74,44 +72,63 @@ impl<'a> VectorFilter<'a> {
let index_embedding_configs = index.embedding_configs(); let index_embedding_configs = index.embedding_configs();
let embedding_configs = index_embedding_configs.embedding_configs(rtxn)?; let embedding_configs = index_embedding_configs.embedding_configs(rtxn)?;
let Some(embedder_config) = let mut embedders = Vec::new();
embedding_configs.iter().find(|config| config.name == self.embedder_name) if let Some(embedder_name) = self.embedder_name {
else { let Some(embedder_config) =
return Ok(RoaringBitmap::new()); embedding_configs.iter().find(|config| config.name == embedder_name)
};
let Some(embedder_info) =
index_embedding_configs.embedder_info(rtxn, self.embedder_name)?
else {
return Ok(RoaringBitmap::new());
};
let arroy_wrapper = ArroyWrapper::new(
index.vector_arroy,
embedder_info.embedder_id,
embedder_config.config.quantized(),
);
let mut docids = if let Some(fragment_name) = self.fragment_name {
let Some(fragment_config) = embedder_config
.fragments
.as_slice()
.iter()
.find(|fragment| fragment.name == fragment_name)
else { else {
return Ok(RoaringBitmap::new()); return Ok(RoaringBitmap::new());
}; };
let Some(embedder_info) =
arroy_wrapper.items_in_store(rtxn, fragment_config.id, |bitmap| bitmap.clone())? index_embedding_configs.embedder_info(rtxn, embedder_name)?
else {
return Ok(RoaringBitmap::new());
};
embedders.push((embedder_config, embedder_info));
} else { } else {
let mut stats = ArroyStats::default(); for embedder_config in embedding_configs.iter() {
arroy_wrapper.aggregate_stats(rtxn, &mut stats)?; let Some(embedder_info) =
stats.documents index_embedding_configs.embedder_info(rtxn, &embedder_config.name)?
else {
continue;
};
embedders.push((embedder_config, embedder_info));
}
}; };
let mut docids = RoaringBitmap::new();
for (embedder_config, embedder_info) in embedders {
let arroy_wrapper = ArroyWrapper::new(
index.vector_arroy,
embedder_info.embedder_id,
embedder_config.config.quantized(),
);
// FIXME: performance let mut new_docids = if let Some(fragment_name) = self.fragment_name {
if self.user_provided { let Some(fragment_config) = embedder_config
let user_provided_docsids = embedder_info.embedding_status.user_provided_docids(); .fragments
docids &= user_provided_docsids; .as_slice()
.iter()
.find(|fragment| fragment.name == fragment_name)
else {
return Ok(RoaringBitmap::new());
};
arroy_wrapper.items_in_store(rtxn, fragment_config.id, |bitmap| bitmap.clone())?
} else {
let mut stats = ArroyStats::default();
arroy_wrapper.aggregate_stats(rtxn, &mut stats)?;
stats.documents
};
// FIXME: performance
if self.user_provided {
let user_provided_docsids = embedder_info.embedding_status.user_provided_docids();
new_docids &= user_provided_docsids;
}
docids |= new_docids;
} }
if let Some(universe) = universe { if let Some(universe) = universe {