Refactor to support less combinations

This commit is contained in:
Mubelotix
2025-07-23 15:33:17 +02:00
parent 776e55d209
commit aa5a1f333a
2 changed files with 109 additions and 100 deletions

View File

@ -981,7 +981,7 @@ async fn vector_filter_specific_fragment_user_provided() {
.await; .await;
snapshot!(value, @r#" snapshot!(value, @r#"
{ {
"message": "Index `[uuid]`: Vector filter cannot have both `other` and `userProvided`.\n31:43 _vectors.rest.fragments.other.userProvided EXISTS", "message": "Index `[uuid]`: Vector filter cannot have both `fragments` and `userProvided`.\n15:24 _vectors.rest.fragments.other.userProvided EXISTS",
"code": "invalid_search_filter", "code": "invalid_search_filter",
"type": "invalid_request", "type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_filter" "link": "https://docs.meilisearch.com/errors#invalid_search_filter"
@ -1156,10 +1156,9 @@ async fn vector_filter_or_combination() {
async fn vector_filter_regenerate() { async fn vector_filter_regenerate() {
let index = shared_index_for_fragments().await; let index = shared_index_for_fragments().await;
for selector in ["_vectors.rest.regenerate", "_vectors.rest.fragments.basic.regenerate"] {
let (value, _code) = index let (value, _code) = index
.search_post(json!({ .search_post(json!({
"filter": format!("{selector} EXISTS"), "filter": format!("_vectors.rest.regenerate EXISTS"),
"attributesToRetrieve": ["name"] "attributesToRetrieve": ["name"]
})) }))
.await; .await;
@ -1183,5 +1182,19 @@ async fn vector_filter_regenerate() {
"estimatedTotalHits": 3 "estimatedTotalHits": 3
} }
"#); "#);
let (value, _code) = index
.search_post(json!({
"filter": format!("_vectors.rest.fragments.basic.regenerate EXISTS"),
"attributesToRetrieve": ["name"]
}))
.await;
snapshot!(value, @r#"
{
"message": "Index `[uuid]`: Vector filter cannot have both `fragments` and `regenerate`.\n15:24 _vectors.rest.fragments.basic.regenerate EXISTS",
"code": "invalid_search_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
} }
"#);
} }

View File

@ -8,16 +8,17 @@ use crate::Index;
#[derive(Debug)] #[derive(Debug)]
enum VectorFilterInner<'a> { enum VectorFilterInner<'a> {
Fragment { embedder_token: Token<'a>, fragment_token: Token<'a> }, Fragment(Token<'a>),
DocumentTemplate { embedder_token: Token<'a> }, DocumentTemplate,
UserProvided { embedder_token: Token<'a> }, UserProvided,
FullEmbedder { embedder_token: Token<'a> }, Regenerate,
None,
} }
#[derive(Debug)] #[derive(Debug)]
pub(super) struct VectorFilter<'a> { pub(super) struct VectorFilter<'a> {
inner: Option<VectorFilterInner<'a>>, embedder: Option<Token<'a>>,
regenerate: bool, inner: VectorFilterInner<'a>,
} }
#[derive(Debug, thiserror::Error)] #[derive(Debug, thiserror::Error)]
@ -31,8 +32,10 @@ pub enum VectorFilterError<'a> {
#[error("Vector filter is inconsistent: either specify a fragment name or remove the `fragments` part.")] #[error("Vector filter is inconsistent: either specify a fragment name or remove the `fragments` part.")]
MissingFragmentName(Token<'a>), MissingFragmentName(Token<'a>),
#[error("Vector filter cannot have both `{}` and `{}`.", _0.0.value(), _0.1.value())] #[error("Vector filter cannot have both {}.", {
ExclusiveOptions(Box<(Token<'a>, Token<'a>)>), _0.iter().map(|t| format!("`{}`", t.value())).collect::<Vec<_>>().join(" and ")
})]
ExclusiveOptions(Vec<Token<'a>>),
#[error("Vector filter has leftover token: `{}`.", _0.value())] #[error("Vector filter has leftover token: `{}`.", _0.value())]
LeftoverToken(Token<'a>), LeftoverToken(Token<'a>),
@ -73,7 +76,12 @@ impl<'a> From<VectorFilterError<'a>> for Error {
InvalidPrefix(token) | MissingFragmentName(token) | LeftoverToken(token) => { InvalidPrefix(token) | MissingFragmentName(token) | LeftoverToken(token) => {
token.clone().as_external_error(err).into() token.clone().as_external_error(err).into()
} }
ExclusiveOptions(tokens) => tokens.1.clone().as_external_error(err).into(), ExclusiveOptions(tokens) => tokens
.first()
.cloned()
.unwrap_or_else(|| Token::from("")) // Should never happen: tokens is never created empty
.as_external_error(err)
.into(),
EmbedderDoesNotExist { embedder: token, .. } EmbedderDoesNotExist { embedder: token, .. }
| FragmentDoesNotExist { fragment: token, .. } => token.as_external_error(err).into(), | FragmentDoesNotExist { fragment: token, .. } => token.as_external_error(err).into(),
} }
@ -92,11 +100,8 @@ impl<'a> VectorFilter<'a> {
/// - `_vectors.{embedder_name}` /// - `_vectors.{embedder_name}`
/// - `_vectors.{embedder_name}.regenerate` /// - `_vectors.{embedder_name}.regenerate`
/// - `_vectors.{embedder_name}.userProvided` /// - `_vectors.{embedder_name}.userProvided`
/// - `_vectors.{embedder_name}.userProvided.regenerate`
/// - `_vectors.{embedder_name}.documentTemplate` /// - `_vectors.{embedder_name}.documentTemplate`
/// - `_vectors.{embedder_name}.documentTemplate.regenerate`
/// - `_vectors.{embedder_name}.fragments.{fragment_name}` /// - `_vectors.{embedder_name}.fragments.{fragment_name}`
/// - `_vectors.{embedder_name}.fragments.{fragment_name}.regenerate`
pub(super) fn parse(s: &'a Token<'a>) -> Result<Self, VectorFilterError<'a>> { pub(super) fn parse(s: &'a Token<'a>) -> Result<Self, VectorFilterError<'a>> {
let mut split = s.split(".").peekable(); let mut split = s.split(".").peekable();
@ -108,54 +113,53 @@ impl<'a> VectorFilter<'a> {
let embedder_name = split.next(); let embedder_name = split.next();
let mut fragment_name = None; let mut fragment_tokens = None;
if split.peek().map(|t| t.value()) == Some("fragments") { if split.peek().map(|t| t.value()) == Some("fragments") {
let token = split.next().expect("it was peeked before"); let token = split.next().expect("it was peeked before");
let name = split.next().ok_or_else(|| MissingFragmentName(token.clone()))?;
fragment_name = Some(split.next().ok_or(MissingFragmentName(token))?); fragment_tokens = Some((token, name));
} }
let mut remaining_tokens = split.collect::<Vec<_>>();
let mut user_provided_token = None; let mut user_provided_token = None;
if split.peek().map(|t| t.value()) == Some("userProvided") { if let Some(position) = remaining_tokens.iter().position(|t| t.value() == "userProvided") {
user_provided_token = split.next(); user_provided_token = Some(remaining_tokens.remove(position));
} }
let mut document_template_token = None; let mut document_template_token = None;
if split.peek().map(|t| t.value()) == Some("documentTemplate") { if let Some(position) =
document_template_token = split.next(); remaining_tokens.iter().position(|t| t.value() == "documentTemplate")
{
document_template_token = Some(remaining_tokens.remove(position));
} }
let mut regenerate_token = None; let mut regenerate_token = None;
if split.peek().map(|t| t.value()) == Some("regenerate") { if let Some(position) = remaining_tokens.iter().position(|t| t.value() == "regenerate") {
regenerate_token = split.next(); regenerate_token = Some(remaining_tokens.remove(position));
} }
let inner = match (fragment_name, user_provided_token, document_template_token) { if !remaining_tokens.is_empty() {
(Some(fragment_name), None, None) => Some(VectorFilterInner::Fragment { return Err(LeftoverToken(remaining_tokens.remove(0)));
embedder_token: embedder_name }
.expect("embedder name comes before fragment so it's always Some"),
fragment_token: fragment_name, let inner =
}), match (fragment_tokens, user_provided_token, document_template_token, regenerate_token)
(None, Some(_), None) => Some(VectorFilterInner::UserProvided { {
embedder_token: embedder_name (Some((_token, name)), None, None, None) => VectorFilterInner::Fragment(name),
.expect("embedder name comes before userProvided so it's always Some"), (None, Some(_), None, None) => VectorFilterInner::UserProvided,
}), (None, None, Some(_), None) => VectorFilterInner::DocumentTemplate,
(None, None, Some(_)) => Some(VectorFilterInner::DocumentTemplate { (None, None, None, Some(_)) => VectorFilterInner::Regenerate,
embedder_token: embedder_name (None, None, None, None) => VectorFilterInner::None,
.expect("embedder name comes before documentTemplate so it's always Some"), (a, b, c, d) => {
}), let a = a.map(|(token, _)| token);
(Some(a), Some(b), _) | (_, Some(a), Some(b)) | (Some(a), None, Some(b)) => { let present = [a, b, c, d].into_iter().flatten().collect();
return Err(ExclusiveOptions(Box::new((a, b)))); return Err(ExclusiveOptions(present));
} }
(None, None, None) => embedder_name
.map(|embedder_token| VectorFilterInner::FullEmbedder { embedder_token }),
}; };
if let Some(next) = split.next() { Ok(Self { inner, embedder: embedder_name })
return Err(LeftoverToken(next))?;
}
Ok(Self { inner, regenerate: regenerate_token.is_some() })
} }
pub(super) fn evaluate( pub(super) fn evaluate(
@ -167,19 +171,16 @@ impl<'a> VectorFilter<'a> {
let index_embedding_configs = index.embedding_configs(); let index_embedding_configs = index.embedding_configs();
let embedding_configs = index_embedding_configs.embedding_configs(rtxn)?; let embedding_configs = index_embedding_configs.embedding_configs(rtxn)?;
let inners = match self.inner { let embedders = match self.embedder {
Some(inner) => vec![inner], Some(embedder) => vec![embedder],
None => embedding_configs None => {
.iter() embedding_configs.iter().map(|config| Token::from(config.name.as_str())).collect()
.map(|config| VectorFilterInner::FullEmbedder { }
embedder_token: Token::from(config.name.as_str()),
})
.collect(),
}; };
let mut docids = inners let mut docids = embedders
.iter() .iter()
.map(|i| i.evaluate_inner(rtxn, index, &embedding_configs, self.regenerate)) .map(|e| self.inner.evaluate(rtxn, index, e, &embedding_configs))
.union()?; .union()?;
if let Some(universe) = universe { if let Some(universe) = universe {
@ -191,19 +192,13 @@ impl<'a> VectorFilter<'a> {
} }
impl VectorFilterInner<'_> { impl VectorFilterInner<'_> {
fn evaluate_inner( fn evaluate(
&self, &self,
rtxn: &heed::RoTxn<'_>, rtxn: &heed::RoTxn<'_>,
index: &Index, index: &Index,
embedder: &Token<'_>,
embedding_configs: &[IndexEmbeddingConfig], embedding_configs: &[IndexEmbeddingConfig],
regenerate: bool,
) -> crate::Result<RoaringBitmap> { ) -> crate::Result<RoaringBitmap> {
let embedder = match self {
VectorFilterInner::Fragment { embedder_token, .. } => embedder_token,
VectorFilterInner::DocumentTemplate { embedder_token } => embedder_token,
VectorFilterInner::UserProvided { embedder_token } => embedder_token,
VectorFilterInner::FullEmbedder { embedder_token } => embedder_token,
};
let embedder_name = embedder.value(); let embedder_name = embedder.value();
let available_embedders = let available_embedders =
|| embedding_configs.iter().map(|c| c.name.clone()).collect::<Vec<_>>(); || embedding_configs.iter().map(|c| c.name.clone()).collect::<Vec<_>>();
@ -224,8 +219,8 @@ impl VectorFilterInner<'_> {
embedding_config.config.quantized(), embedding_config.config.quantized(),
); );
let mut docids = match self { let docids = match self {
VectorFilterInner::Fragment { embedder_token: embedder, fragment_token: fragment } => { VectorFilterInner::Fragment(fragment) => {
let fragment_name = fragment.value(); let fragment_name = fragment.value();
let fragment_config = embedding_config let fragment_config = embedding_config
.fragments .fragments
@ -245,7 +240,7 @@ impl VectorFilterInner<'_> {
arroy_wrapper.items_in_store(rtxn, fragment_config.id, |bitmap| bitmap.clone())? arroy_wrapper.items_in_store(rtxn, fragment_config.id, |bitmap| bitmap.clone())?
} }
VectorFilterInner::DocumentTemplate { .. } => { VectorFilterInner::DocumentTemplate => {
if !embedding_config.fragments.as_slice().is_empty() { if !embedding_config.fragments.as_slice().is_empty() {
return Ok(RoaringBitmap::new()); return Ok(RoaringBitmap::new());
} }
@ -255,22 +250,23 @@ impl VectorFilterInner<'_> {
arroy_wrapper.aggregate_stats(rtxn, &mut stats)?; arroy_wrapper.aggregate_stats(rtxn, &mut stats)?;
stats.documents - user_provided_docsids.clone() stats.documents - user_provided_docsids.clone()
} }
VectorFilterInner::UserProvided { .. } => { VectorFilterInner::UserProvided => {
let user_provided_docsids = embedder_info.embedding_status.user_provided_docids(); let user_provided_docsids = embedder_info.embedding_status.user_provided_docids();
user_provided_docsids.clone() user_provided_docsids.clone()
} }
VectorFilterInner::FullEmbedder { .. } => { VectorFilterInner::Regenerate => {
let mut stats = ArroyStats::default();
arroy_wrapper.aggregate_stats(rtxn, &mut stats)?;
let skip_regenerate = embedder_info.embedding_status.skip_regenerate_docids();
stats.documents - skip_regenerate
}
VectorFilterInner::None => {
let mut stats = ArroyStats::default(); let mut stats = ArroyStats::default();
arroy_wrapper.aggregate_stats(rtxn, &mut stats)?; arroy_wrapper.aggregate_stats(rtxn, &mut stats)?;
stats.documents stats.documents
} }
}; };
if regenerate {
let skip_regenerate = embedder_info.embedding_status.skip_regenerate_docids();
docids -= skip_regenerate;
}
Ok(docids) Ok(docids)
} }
} }