From 9c60e9689f806442abd9e9ebe40f98ba17c65db9 Mon Sep 17 00:00:00 2001 From: Mubelotix Date: Mon, 7 Jul 2025 18:34:24 +0200 Subject: [PATCH] Support not specifying an embedder in the vector filter --- .../milli/src/search/facet/filter_vector.rs | 95 +++++++++++-------- 1 file changed, 56 insertions(+), 39 deletions(-) diff --git a/crates/milli/src/search/facet/filter_vector.rs b/crates/milli/src/search/facet/filter_vector.rs index 701ab561c..ee1c19923 100644 --- a/crates/milli/src/search/facet/filter_vector.rs +++ b/crates/milli/src/search/facet/filter_vector.rs @@ -6,7 +6,7 @@ use crate::vector::{ArroyStats, ArroyWrapper}; use crate::{Index, Result}; pub(super) struct VectorFilter<'a> { - embedder_name: &'a str, + embedder_name: Option<&'a str>, fragment_name: Option<&'a str>, user_provided: bool, // TODO: not_user_provided: bool, @@ -14,12 +14,14 @@ pub(super) struct VectorFilter<'a> { impl<'a> VectorFilter<'a> { pub(super) fn matches(value: &str, op: &Condition) -> bool { - matches!(op, Condition::Exists) && value.starts_with("_vectors.") + matches!(op, Condition::Exists) && (value.starts_with("_vectors.") || value == "_vectors") } /// Parses a vector filter string. /// /// Valid formats: + /// - `_vectors` + /// - `_vectors.userProvided` /// - `_vectors.{embedder_name}` /// - `_vectors.{embedder_name}.userProvided` /// - `_vectors.{embedder_name}.fragments.{fragment_name}` @@ -33,11 +35,7 @@ impl<'a> VectorFilter<'a> { )))); } - let embedder_name = split.next().ok_or_else(|| { - Error::UserError(UserError::InvalidFilter(String::from( - "Vector filter must contain an embedder name", - ))) - })?; + let embedder_name = split.next(); let mut fragment_name = None; if split.peek() == Some(&"fragments") { @@ -74,44 +72,63 @@ impl<'a> VectorFilter<'a> { let index_embedding_configs = index.embedding_configs(); let embedding_configs = index_embedding_configs.embedding_configs(rtxn)?; - let Some(embedder_config) = - embedding_configs.iter().find(|config| config.name == self.embedder_name) - else { - return Ok(RoaringBitmap::new()); - }; - let Some(embedder_info) = - index_embedding_configs.embedder_info(rtxn, self.embedder_name)? - else { - return Ok(RoaringBitmap::new()); - }; - - let arroy_wrapper = ArroyWrapper::new( - index.vector_arroy, - embedder_info.embedder_id, - embedder_config.config.quantized(), - ); - - let mut docids = if let Some(fragment_name) = self.fragment_name { - let Some(fragment_config) = embedder_config - .fragments - .as_slice() - .iter() - .find(|fragment| fragment.name == fragment_name) + let mut embedders = Vec::new(); + if let Some(embedder_name) = self.embedder_name { + let Some(embedder_config) = + embedding_configs.iter().find(|config| config.name == embedder_name) else { return Ok(RoaringBitmap::new()); }; - - arroy_wrapper.items_in_store(rtxn, fragment_config.id, |bitmap| bitmap.clone())? + let Some(embedder_info) = + index_embedding_configs.embedder_info(rtxn, embedder_name)? + else { + return Ok(RoaringBitmap::new()); + }; + + embedders.push((embedder_config, embedder_info)); } else { - let mut stats = ArroyStats::default(); - arroy_wrapper.aggregate_stats(rtxn, &mut stats)?; - stats.documents + for embedder_config in embedding_configs.iter() { + let Some(embedder_info) = + index_embedding_configs.embedder_info(rtxn, &embedder_config.name)? + else { + continue; + }; + embedders.push((embedder_config, embedder_info)); + } }; + + let mut docids = RoaringBitmap::new(); + for (embedder_config, embedder_info) in embedders { + let arroy_wrapper = ArroyWrapper::new( + index.vector_arroy, + embedder_info.embedder_id, + embedder_config.config.quantized(), + ); - // FIXME: performance - if self.user_provided { - let user_provided_docsids = embedder_info.embedding_status.user_provided_docids(); - docids &= user_provided_docsids; + let mut new_docids = if let Some(fragment_name) = self.fragment_name { + let Some(fragment_config) = embedder_config + .fragments + .as_slice() + .iter() + .find(|fragment| fragment.name == fragment_name) + else { + return Ok(RoaringBitmap::new()); + }; + + arroy_wrapper.items_in_store(rtxn, fragment_config.id, |bitmap| bitmap.clone())? + } else { + let mut stats = ArroyStats::default(); + arroy_wrapper.aggregate_stats(rtxn, &mut stats)?; + stats.documents + }; + + // FIXME: performance + if self.user_provided { + let user_provided_docsids = embedder_info.embedding_status.user_provided_docids(); + new_docids &= user_provided_docsids; + } + + docids |= new_docids; } if let Some(universe) = universe {