diff --git a/crates/meilisearch/tests/search/filters.rs b/crates/meilisearch/tests/search/filters.rs index 1a27bdf99..12bfbe2ea 100644 --- a/crates/meilisearch/tests/search/filters.rs +++ b/crates/meilisearch/tests/search/filters.rs @@ -1152,19 +1152,18 @@ async fn vector_filter_or_combination() { "#); } - #[actix_rt::test] async fn vector_filter_regenerate() { let index = shared_index_for_fragments().await; for selector in ["_vectors.rest.regenerate", "_vectors.rest.fragments.basic.regenerate"] { let (value, _code) = index - .search_post(json!({ - "filter": format!("{selector} EXISTS"), - "attributesToRetrieve": ["name"] - })) - .await; - snapshot!(value, @r#" + .search_post(json!({ + "filter": format!("{selector} EXISTS"), + "attributesToRetrieve": ["name"] + })) + .await; + snapshot!(value, @r#" { "hits": [ { @@ -1186,4 +1185,3 @@ async fn vector_filter_regenerate() { "#); } } - diff --git a/crates/milli/src/search/facet/filter_vector.rs b/crates/milli/src/search/facet/filter_vector.rs index 7fbd9c916..a59bbb5f9 100644 --- a/crates/milli/src/search/facet/filter_vector.rs +++ b/crates/milli/src/search/facet/filter_vector.rs @@ -14,108 +14,49 @@ enum VectorFilterInner<'a> { FullEmbedder { embedder_token: Token<'a> }, } -impl VectorFilterInner<'_> { - fn evaluate_inner( - &self, - rtxn: &heed::RoTxn<'_>, - index: &Index, - embedding_configs: &[IndexEmbeddingConfig], - regenerate: bool, - ) -> crate::Result { - let embedder = match self { - VectorFilterInner::Fragment { embedder_token, .. } => embedder_token, - VectorFilterInner::DocumentTemplate { embedder_token } => embedder_token, - VectorFilterInner::UserProvided { embedder_token } => embedder_token, - VectorFilterInner::FullEmbedder { embedder_token } => embedder_token, - }; - let embedder_name = embedder.value(); - let available_embedders = - || embedding_configs.iter().map(|c| c.name.clone()).collect::>(); - - let embedding_config = embedding_configs - .iter() - .find(|config| config.name == embedder_name) - .ok_or_else(|| EmbedderDoesNotExist { embedder, available: available_embedders() })?; - - let embedder_info = index - .embedding_configs() - .embedder_info(rtxn, embedder_name)? - .ok_or_else(|| EmbedderDoesNotExist { embedder, available: available_embedders() })?; - - let arroy_wrapper = ArroyWrapper::new( - index.vector_arroy, - embedder_info.embedder_id, - embedding_config.config.quantized(), - ); - - let mut docids = match self { - VectorFilterInner::Fragment { embedder_token: embedder, fragment_token: fragment } => { - let fragment_name = fragment.value(); - let fragment_config = embedding_config - .fragments - .as_slice() - .iter() - .find(|fragment| fragment.name == fragment_name) - .ok_or_else(|| FragmentDoesNotExist { - embedder, - fragment, - available: embedding_config - .fragments - .as_slice() - .iter() - .map(|f| f.name.clone()) - .collect(), - })?; - - arroy_wrapper.items_in_store(rtxn, fragment_config.id, |bitmap| bitmap.clone())? - } - VectorFilterInner::DocumentTemplate { .. } => { - if !embedding_config.fragments.as_slice().is_empty() { - return Ok(RoaringBitmap::new()); - } - - let user_provided_docsids = embedder_info.embedding_status.user_provided_docids(); - let mut stats = ArroyStats::default(); - arroy_wrapper.aggregate_stats(rtxn, &mut stats)?; - stats.documents - user_provided_docsids.clone() - } - VectorFilterInner::UserProvided { .. } => { - let user_provided_docsids = embedder_info.embedding_status.user_provided_docids(); - user_provided_docsids.clone() - } - VectorFilterInner::FullEmbedder { .. } => { - let mut stats = ArroyStats::default(); - arroy_wrapper.aggregate_stats(rtxn, &mut stats)?; - stats.documents - } - }; - - if regenerate { - let skip_regenerate = embedder_info.embedding_status.skip_regenerate_docids(); - docids -= skip_regenerate; - } - - Ok(docids) - } -} - #[derive(Debug)] pub(super) struct VectorFilter<'a> { inner: Option>, regenerate: bool, } -#[derive(Debug)] +#[derive(Debug, thiserror::Error)] pub enum VectorFilterError<'a> { + #[error("Vector filter cannot be empty.")] EmptyFilter, + + #[error("Vector filter must start with `_vectors` but found `{}`.", _0.value())] InvalidPrefix(Token<'a>), + + #[error("Vector filter is inconsistent: either specify a fragment name or remove the `fragments` part.")] MissingFragmentName(Token<'a>), + + #[error("Vector filter cannot have both `{}` and `{}`.", _0.0.value(), _0.1.value())] ExclusiveOptions(Box<(Token<'a>, Token<'a>)>), + + #[error("Vector filter has leftover token: `{}`.", _0.value())] LeftoverToken(Token<'a>), - EmbedderDoesNotExist { - embedder: &'a Token<'a>, - available: Vec, - }, + + #[error("The embedder `{}` does not exist. {}", embedder.value(), { + if available.is_empty() { + String::from("This index does not have any configured embedders.") + } else { + let mut available = available.clone(); + available.sort_unstable(); + format!("Available embedders are: {}.", available.iter().map(|e| format!("`{e}`")).collect::>().join(", ")) + } + })] + EmbedderDoesNotExist { embedder: &'a Token<'a>, available: Vec }, + + #[error("The fragment `{}` does not exist on embedder `{}`. {}", fragment.value(), embedder.value(), { + if available.is_empty() { + String::from("This embedder does not have any configured fragments.") + } else { + let mut available = available.clone(); + available.sort_unstable(); + format!("Available fragments on this embedder are: {}.", available.iter().map(|f| format!("`{f}`")).collect::>().join(", ")) + } + })] FragmentDoesNotExist { embedder: &'a Token<'a>, fragment: &'a Token<'a>, @@ -125,78 +66,6 @@ pub enum VectorFilterError<'a> { use VectorFilterError::*; -impl std::error::Error for VectorFilterError<'_> {} - -impl std::fmt::Display for VectorFilterError<'_> { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - EmptyFilter => { - write!(f, "Vector filter cannot be empty.") - } - InvalidPrefix(prefix) => { - write!( - f, - "Vector filter must start with `_vectors` but found `{}`.", - prefix.value() - ) - } - MissingFragmentName(_token) => { - write!(f, "Vector filter is inconsistent: either specify a fragment name or remove the `fragments` part.") - } - ExclusiveOptions(tokens) => { - write!( - f, - "Vector filter cannot have both `{}` and `{}`.", - tokens.0.value(), - tokens.1.value() - ) - } - LeftoverToken(token) => { - write!(f, "Vector filter has leftover token: `{}`.", token.value()) - } - EmbedderDoesNotExist { embedder, available } => { - write!(f, "The embedder `{}` does not exist.", embedder.value())?; - if available.is_empty() { - write!(f, " This index does not have configured embedders.") - } else { - write!(f, " Available embedders are: ")?; - let mut available = available.clone(); - available.sort_unstable(); - for (idx, embedder) in available.iter().enumerate() { - write!(f, "`{embedder}`")?; - if idx != available.len() - 1 { - write!(f, ", ")?; - } - } - write!(f, ".") - } - } - FragmentDoesNotExist { embedder, fragment, available } => { - write!( - f, - "The fragment `{}` does not exist on embedder `{}`.", - fragment.value(), - embedder.value(), - )?; - if available.is_empty() { - write!(f, " This embedder does not have configured fragments.") - } else { - write!(f, " Available fragments on this embedder are: ")?; - let mut available = available.clone(); - available.sort_unstable(); - for (idx, fragment) in available.iter().enumerate() { - write!(f, "`{fragment}`")?; - if idx != available.len() - 1 { - write!(f, ", ")?; - } - } - write!(f, ".") - } - } - } - } -} - impl<'a> From> for Error { fn from(err: VectorFilterError<'a>) -> Self { match &err { @@ -320,3 +189,88 @@ impl<'a> VectorFilter<'a> { Ok(docids) } } + +impl VectorFilterInner<'_> { + fn evaluate_inner( + &self, + rtxn: &heed::RoTxn<'_>, + index: &Index, + embedding_configs: &[IndexEmbeddingConfig], + regenerate: bool, + ) -> crate::Result { + let embedder = match self { + VectorFilterInner::Fragment { embedder_token, .. } => embedder_token, + VectorFilterInner::DocumentTemplate { embedder_token } => embedder_token, + VectorFilterInner::UserProvided { embedder_token } => embedder_token, + VectorFilterInner::FullEmbedder { embedder_token } => embedder_token, + }; + let embedder_name = embedder.value(); + let available_embedders = + || embedding_configs.iter().map(|c| c.name.clone()).collect::>(); + + let embedding_config = embedding_configs + .iter() + .find(|config| config.name == embedder_name) + .ok_or_else(|| EmbedderDoesNotExist { embedder, available: available_embedders() })?; + + let embedder_info = index + .embedding_configs() + .embedder_info(rtxn, embedder_name)? + .ok_or_else(|| EmbedderDoesNotExist { embedder, available: available_embedders() })?; + + let arroy_wrapper = ArroyWrapper::new( + index.vector_arroy, + embedder_info.embedder_id, + embedding_config.config.quantized(), + ); + + let mut docids = match self { + VectorFilterInner::Fragment { embedder_token: embedder, fragment_token: fragment } => { + let fragment_name = fragment.value(); + let fragment_config = embedding_config + .fragments + .as_slice() + .iter() + .find(|fragment| fragment.name == fragment_name) + .ok_or_else(|| FragmentDoesNotExist { + embedder, + fragment, + available: embedding_config + .fragments + .as_slice() + .iter() + .map(|f| f.name.clone()) + .collect(), + })?; + + arroy_wrapper.items_in_store(rtxn, fragment_config.id, |bitmap| bitmap.clone())? + } + VectorFilterInner::DocumentTemplate { .. } => { + if !embedding_config.fragments.as_slice().is_empty() { + return Ok(RoaringBitmap::new()); + } + + let user_provided_docsids = embedder_info.embedding_status.user_provided_docids(); + let mut stats = ArroyStats::default(); + arroy_wrapper.aggregate_stats(rtxn, &mut stats)?; + stats.documents - user_provided_docsids.clone() + } + VectorFilterInner::UserProvided { .. } => { + let user_provided_docsids = embedder_info.embedding_status.user_provided_docids(); + user_provided_docsids.clone() + } + VectorFilterInner::FullEmbedder { .. } => { + let mut stats = ArroyStats::default(); + arroy_wrapper.aggregate_stats(rtxn, &mut stats)?; + stats.documents + } + }; + + if regenerate { + let skip_regenerate = embedder_info.embedding_status.skip_regenerate_docids(); + docids -= skip_regenerate; + } + + Ok(docids) + } +}