From bb4d57386280796928f311133eb14d1a4af470a4 Mon Sep 17 00:00:00 2001 From: Mubelotix Date: Thu, 24 Jul 2025 14:56:35 +0200 Subject: [PATCH] Switch to a nom parser --- crates/filter-parser/src/condition.rs | 60 ++++ crates/filter-parser/src/error.rs | 34 ++ crates/filter-parser/src/lib.rs | 46 ++- crates/filter-parser/src/value.rs | 33 ++ crates/meilisearch/tests/search/filters.rs | 18 +- crates/milli/src/search/facet/filter.rs | 23 +- .../milli/src/search/facet/filter_vector.rs | 306 ++++++------------ 7 files changed, 269 insertions(+), 251 deletions(-) diff --git a/crates/filter-parser/src/condition.rs b/crates/filter-parser/src/condition.rs index 0fc007bf1..af0767706 100644 --- a/crates/filter-parser/src/condition.rs +++ b/crates/filter-parser/src/condition.rs @@ -7,11 +7,20 @@ use nom::branch::alt; use nom::bytes::complete::tag; +use nom::character::complete::char; +use nom::character::complete::multispace0; use nom::character::complete::multispace1; use nom::combinator::cut; +use nom::combinator::map; +use nom::combinator::value; +use nom::sequence::preceded; use nom::sequence::{terminated, tuple}; use Condition::*; +use crate::error::IResultExt; +use crate::value::parse_vector_value; +use crate::ErrorKind; +use crate::VectorFilter; use crate::{parse_value, FilterCondition, IResult, Span, Token}; #[derive(Debug, Clone, PartialEq, Eq)] @@ -113,6 +122,57 @@ pub fn parse_not_exists(input: Span) -> IResult { Ok((input, FilterCondition::Not(Box::new(FilterCondition::Condition { fid: key, op: Exists })))) } +fn parse_vectors(input: Span) -> IResult<(Token, Option, VectorFilter<'_>)> { + let (input, _) = multispace0(input)?; + let (input, fid) = tag("_vectors")(input)?; + + if let Ok((input, _)) = multispace1::<_, crate::Error>(input) { + return Ok((input, (Token::from(fid), None, VectorFilter::None))); + } + + let (input, _) = char('.')(input)?; + + // From this point, we are certain this is a vector filter, so our errors must be final. + // We could use nom's `cut`` but it's better to be explicit about the errors + + let (input, embedder_name) = parse_vector_value(input).map_cut(ErrorKind::VectorFilterInvalidEmbedder)?; + + let (input, filter) = alt(( + map( + preceded(tag(".fragments"), |input| { + let (input, _) = tag(".")(input).map_cut(ErrorKind::VectorFilterMissingFragment)?; + parse_vector_value(input).map_cut(ErrorKind::VectorFilterInvalidFragment) + }), + VectorFilter::Fragment, + ), + value(VectorFilter::UserProvided, tag(".userProvided")), + value(VectorFilter::DocumentTemplate, tag(".documentTemplate")), + value(VectorFilter::Regenerate, tag(".regenerate")), + value(VectorFilter::None, nom::combinator::success("")), + ))(input)?; + + let (input, _) = multispace1(input).map_cut(ErrorKind::VectorFilterLeftover)?; + + Ok((input, (Token::from(fid), Some(embedder_name), filter))) +} + +/// vectors_exists = vectors "EXISTS" +pub fn parse_vectors_exists(input: Span) -> IResult { + let (input, (fid, embedder, filter)) = terminated(parse_vectors, tag("EXISTS"))(input)?; + + Ok((input, FilterCondition::VectorExists { fid, embedder, filter })) +} +/// vectors_not_exists = vectors "NOT" WS+ "EXISTS" +pub fn parse_vectors_not_exists(input: Span) -> IResult { + let (input, (fid, embedder, filter)) = parse_vectors(input)?; + + let (input, _) = tuple((tag("NOT"), multispace1, tag("EXISTS")))(input)?; + Ok(( + input, + FilterCondition::Not(Box::new(FilterCondition::VectorExists { fid, embedder, filter })), + )) +} + /// contains = value "CONTAINS" value pub fn parse_contains(input: Span) -> IResult { let (input, (fid, contains, value)) = diff --git a/crates/filter-parser/src/error.rs b/crates/filter-parser/src/error.rs index 855ce983e..cf2419b01 100644 --- a/crates/filter-parser/src/error.rs +++ b/crates/filter-parser/src/error.rs @@ -42,6 +42,23 @@ pub fn cut_with_err<'a, O>( } } +pub trait IResultExt<'a> { + fn map_cut(self, kind: ErrorKind<'a>) -> Self; +} + +impl<'a, T> IResultExt<'a> for IResult<'a, T> { + fn map_cut(self, kind: ErrorKind<'a>) -> Self { + self.map_err(move |e: nom::Err>| { + let input = match e { + nom::Err::Incomplete(_) => return e, + nom::Err::Error(e) => *e.context(), + nom::Err::Failure(e) => *e.context(), + }; + nom::Err::Failure(Error::new_from_kind(input, kind)) + }) + } +} + #[derive(Debug)] pub struct Error<'a> { context: Span<'a>, @@ -76,6 +93,11 @@ pub enum ErrorKind<'a> { InternalError(error::ErrorKind), DepthLimitReached, External(String), + + VectorFilterLeftover, + VectorFilterInvalidEmbedder, + VectorFilterMissingFragment, + VectorFilterInvalidFragment, } impl<'a> Error<'a> { @@ -169,6 +191,18 @@ impl Display for Error<'_> { ErrorKind::MisusedGeoBoundingBox => { writeln!(f, "The `_geoBoundingBox` filter is an operation and can't be used as a value.")? } + ErrorKind::VectorFilterLeftover => { + writeln!(f, "The vector filter has leftover tokens.")? + } + ErrorKind::VectorFilterInvalidFragment => { + writeln!(f, "The vector filter's fragment is invalid.")? + } + ErrorKind::VectorFilterMissingFragment => { + writeln!(f, "The vector filter is missing a fragment name.")? + } + ErrorKind::VectorFilterInvalidEmbedder => { + writeln!(f, "The vector filter's embedder is invalid.")? + } ErrorKind::ReservedKeyword(word) => { writeln!(f, "`{word}` is a reserved keyword and thus cannot be used as a field name unless it is put inside quotes. Use \"{word}\" or \'{word}\' instead.")? } diff --git a/crates/filter-parser/src/lib.rs b/crates/filter-parser/src/lib.rs index 1590b08fd..b5697f914 100644 --- a/crates/filter-parser/src/lib.rs +++ b/crates/filter-parser/src/lib.rs @@ -65,6 +65,9 @@ use nom_locate::LocatedSpan; pub(crate) use value::parse_value; use value::word_exact; +use crate::condition::{parse_vectors_exists, parse_vectors_not_exists}; +use crate::error::IResultExt; + pub type Span<'a> = LocatedSpan<&'a str, &'a str>; type IResult<'a, Ret> = nom::IResult, Ret, Error<'a>>; @@ -146,6 +149,15 @@ impl<'a> From<&'a str> for Token<'a> { } } +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum VectorFilter<'a> { + Fragment(Token<'a>), + DocumentTemplate, + UserProvided, + Regenerate, + None, +} + #[derive(Debug, Clone, PartialEq, Eq)] pub enum FilterCondition<'a> { Not(Box), @@ -153,6 +165,7 @@ pub enum FilterCondition<'a> { In { fid: Token<'a>, els: Vec> }, Or(Vec), And(Vec), + VectorExists { fid: Token<'a>, embedder: Option>, filter: VectorFilter<'a> }, GeoLowerThan { point: [Token<'a>; 2], radius: Token<'a> }, GeoBoundingBox { top_right_point: [Token<'a>; 2], bottom_left_point: [Token<'a>; 2] }, } @@ -183,7 +196,8 @@ impl<'a> FilterCondition<'a> { FilterCondition::Or(seq) | FilterCondition::And(seq) => { seq.iter().find_map(|filter| filter.use_contains_operator()) } - FilterCondition::GeoLowerThan { .. } + FilterCondition::VectorExists { .. } + | FilterCondition::GeoLowerThan { .. } | FilterCondition::GeoBoundingBox { .. } | FilterCondition::In { .. } => None, } @@ -191,13 +205,7 @@ impl<'a> FilterCondition<'a> { pub fn use_vector_filter(&self) -> Option<&Token> { match self { - FilterCondition::Condition { fid, op: _ } => { - if fid.value().starts_with("_vectors.") || fid.value() == "_vectors" { - Some(fid) - } else { - None - } - } + FilterCondition::Condition { .. } => None, FilterCondition::Not(this) => this.use_vector_filter(), FilterCondition::Or(seq) | FilterCondition::And(seq) => { seq.iter().find_map(|filter| filter.use_vector_filter()) @@ -205,6 +213,7 @@ impl<'a> FilterCondition<'a> { FilterCondition::GeoLowerThan { .. } | FilterCondition::GeoBoundingBox { .. } | FilterCondition::In { .. } => None, + FilterCondition::VectorExists { fid, .. } => Some(fid), } } @@ -292,10 +301,7 @@ fn parse_in_body(input: Span) -> IResult> { let (input, _) = ws(word_exact("IN"))(input)?; // everything after `IN` can be a failure - let (input, _) = - cut_with_err(tag("["), |_| Error::new_from_kind(input, ErrorKind::InOpeningBracket))( - input, - )?; + let (input, _) = tag("[")(input).map_cut(ErrorKind::InOpeningBracket)?; let (input, content) = cut(parse_value_list)(input)?; @@ -529,8 +535,7 @@ fn parse_primary(input: Span, depth: usize) -> IResult { parse_is_not_null, parse_is_empty, parse_is_not_empty, - parse_exists, - parse_not_exists, + alt((parse_vectors_exists, parse_vectors_not_exists, parse_exists, parse_not_exists)), parse_to, parse_contains, parse_not_contains, @@ -586,6 +591,19 @@ impl std::fmt::Display for FilterCondition<'_> { } write!(f, "]") } + FilterCondition::VectorExists { fid: _, embedder, filter: inner } => { + write!(f, "_vectors")?; + if let Some(embedder) = embedder { + write!(f, ".{embedder:?}")?; + } + match inner { + VectorFilter::Fragment(fragment) => write!(f, ".fragments.{fragment:?}"), + VectorFilter::DocumentTemplate => write!(f, ".documentTemplate"), + VectorFilter::UserProvided => write!(f, ".userProvided"), + VectorFilter::Regenerate => write!(f, ".regenerate"), + VectorFilter::None => Ok(()), + } + } FilterCondition::GeoLowerThan { point, radius } => { write!(f, "_geoRadius({}, {}, {})", point[0], point[1], radius) } diff --git a/crates/filter-parser/src/value.rs b/crates/filter-parser/src/value.rs index 98cac39fe..345f0b0a2 100644 --- a/crates/filter-parser/src/value.rs +++ b/crates/filter-parser/src/value.rs @@ -80,6 +80,39 @@ pub fn word_exact<'a, 'b: 'a>(tag: &'b str) -> impl Fn(Span<'a>) -> IResult<'a, } } +/// vector_value = ( non_dot_word | singleQuoted | doubleQuoted) +pub fn parse_vector_value(input: Span) -> IResult { + pub fn non_dot_word(input: Span) -> IResult { + let (input, word) = take_while1(|c| is_value_component(c) && c != '.')(input)?; + Ok((input, word.into())) + } + + let (input, value) = alt(( + delimited(char('\''), cut(|input| quoted_by('\'', input)), cut(char('\''))), + delimited(char('"'), cut(|input| quoted_by('"', input)), cut(char('"'))), + non_dot_word, + ))(input)?; + + match unescaper::unescape(value.value()) { + Ok(content) => { + if content.len() != value.value().len() { + Ok((input, Token::new(value.original_span(), Some(content)))) + } else { + Ok((input, value)) + } + } + Err(unescaper::Error::IncompleteStr(_)) => Err(nom::Err::Incomplete(nom::Needed::Unknown)), + Err(unescaper::Error::ParseIntError { .. }) => Err(nom::Err::Error(Error::new_from_kind( + value.original_span(), + ErrorKind::InvalidEscapedNumber, + ))), + Err(unescaper::Error::InvalidChar { .. }) => Err(nom::Err::Error(Error::new_from_kind( + value.original_span(), + ErrorKind::MalformedValue, + ))), + } +} + /// value = WS* ( word | singleQuoted | doubleQuoted) WS+ pub fn parse_value(input: Span) -> IResult { // to get better diagnostic message we are going to strip the left whitespaces from the input right now diff --git a/crates/meilisearch/tests/search/filters.rs b/crates/meilisearch/tests/search/filters.rs index cd2da747d..67f9ebb71 100644 --- a/crates/meilisearch/tests/search/filters.rs +++ b/crates/meilisearch/tests/search/filters.rs @@ -779,7 +779,7 @@ async fn vector_filter_missing_fragment() { .await; snapshot!(value, @r#" { - "message": "Index `[uuid]`: Vector filter is inconsistent: either specify a fragment name or remove the `fragments` part.\n15:24 _vectors.rest.fragments EXISTS", + "message": "The vector filter is missing a fragment name.\n24:31 _vectors.rest.fragments EXISTS", "code": "invalid_search_filter", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_search_filter" @@ -981,7 +981,7 @@ async fn vector_filter_specific_fragment_user_provided() { .await; snapshot!(value, @r#" { - "message": "Index `[uuid]`: Vector filter cannot have both `fragments` and `userProvided`.\n15:24 _vectors.rest.fragments.other.userProvided EXISTS", + "message": "The vector filter has leftover tokens.\n30:50 _vectors.rest.fragments.other.userProvided EXISTS", "code": "invalid_search_filter", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_search_filter" @@ -1190,11 +1190,11 @@ async fn vector_filter_regenerate() { })) .await; snapshot!(value, @r#" - { - "message": "Index `[uuid]`: Vector filter cannot have both `fragments` and `regenerate`.\n15:24 _vectors.rest.fragments.basic.regenerate EXISTS", - "code": "invalid_search_filter", - "type": "invalid_request", - "link": "https://docs.meilisearch.com/errors#invalid_search_filter" - } - "#); + { + "message": "The vector filter has leftover tokens.\n30:48 _vectors.rest.fragments.basic.regenerate EXISTS", + "code": "invalid_search_filter", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_search_filter" + } + "#); } diff --git a/crates/milli/src/search/facet/filter.rs b/crates/milli/src/search/facet/filter.rs index 21a552965..4e67814d3 100644 --- a/crates/milli/src/search/facet/filter.rs +++ b/crates/milli/src/search/facet/filter.rs @@ -10,8 +10,8 @@ use memchr::memmem::Finder; use roaring::{MultiOps, RoaringBitmap}; use serde_json::Value; -use super::{facet_range_search, filter_vector::VectorFilter}; -use crate::constants::RESERVED_GEO_FIELD_NAME; +use super::facet_range_search; +use crate::constants::{RESERVED_GEO_FIELD_NAME, RESERVED_VECTORS_FIELD_NAME}; use crate::error::{Error, UserError}; use crate::filterable_attributes_rules::{filtered_matching_patterns, matching_features}; use crate::heed_codec::facet::{ @@ -230,7 +230,7 @@ impl<'a> Filter<'a> { } pub fn use_vector_filter(&self) -> Option<&Token> { - self.condition.use_vector_filter() + dbg!(self.condition.use_vector_filter()) } } @@ -241,10 +241,10 @@ impl<'a> Filter<'a> { let filterable_attributes_rules = index.filterable_attributes_rules(rtxn)?; for fid in self.condition.fids(MAX_FILTER_DEPTH) { - let attribute = fid.value(); + let attribute = dbg!(fid.value()); if matching_features(attribute, &filterable_attributes_rules) .is_some_and(|(_, features)| features.is_filterable()) - || VectorFilter::matches(attribute) + || attribute == RESERVED_VECTORS_FIELD_NAME { continue; } @@ -549,16 +549,6 @@ impl<'a> Filter<'a> { } FilterCondition::Condition { fid, op } => { let value = fid.value(); - if VectorFilter::matches(value) { - if !matches!(op, Condition::Exists) { - return Err(Error::UserError(UserError::InvalidFilter(String::from( - "Vector filter can only be used with the `exists` operator", - )))); - } - let vector_filter = VectorFilter::parse(fid)?; - return vector_filter.evaluate(rtxn, index, universe); - } - let Some(field_id) = field_ids_map.id(value) else { return Ok(RoaringBitmap::new()); }; @@ -616,6 +606,9 @@ impl<'a> Filter<'a> { Ok(RoaringBitmap::new()) } } + FilterCondition::VectorExists { fid: _, embedder, filter } => { + super::filter_vector::evaluate(rtxn, index, universe, embedder.clone(), filter) + } FilterCondition::GeoLowerThan { point, radius } => { if index.is_geo_filtering_enabled(rtxn)? { let base_point: [f64; 2] = diff --git a/crates/milli/src/search/facet/filter_vector.rs b/crates/milli/src/search/facet/filter_vector.rs index 91f138685..2ddd801ed 100644 --- a/crates/milli/src/search/facet/filter_vector.rs +++ b/crates/milli/src/search/facet/filter_vector.rs @@ -1,45 +1,13 @@ -use filter_parser::Token; +use filter_parser::{Token, VectorFilter}; use roaring::{MultiOps, RoaringBitmap}; -use crate::error::{Error, UserError}; +use crate::error::Error; use crate::vector::db::IndexEmbeddingConfig; use crate::vector::{ArroyStats, ArroyWrapper}; use crate::Index; -#[derive(Debug)] -enum VectorFilterInner<'a> { - Fragment(Token<'a>), - DocumentTemplate, - UserProvided, - Regenerate, - None, -} - -#[derive(Debug)] -pub(super) struct VectorFilter<'a> { - embedder: Option>, - inner: VectorFilterInner<'a>, -} - #[derive(Debug, thiserror::Error)] pub enum VectorFilterError<'a> { - #[error("Vector filter cannot be empty.")] - EmptyFilter, - - #[error("Vector filter must start with `_vectors` but found `{}`.", _0.value())] - InvalidPrefix(Token<'a>), - - #[error("Vector filter is inconsistent: either specify a fragment name or remove the `fragments` part.")] - MissingFragmentName(Token<'a>), - - #[error("Vector filter cannot have both {}.", { - _0.iter().map(|t| format!("`{}`", t.value())).collect::>().join(" and ") - })] - ExclusiveOptions(Vec>), - - #[error("Vector filter has leftover token: `{}`.", _0.value())] - LeftoverToken(Token<'a>), - #[error("The embedder `{}` does not exist. {}", embedder.value(), { if available.is_empty() { String::from("This index does not have any configured embedders.") @@ -72,201 +40,113 @@ use VectorFilterError::*; impl<'a> From> for Error { fn from(err: VectorFilterError<'a>) -> Self { match &err { - EmptyFilter => Error::UserError(UserError::InvalidFilter(err.to_string())), - InvalidPrefix(token) | MissingFragmentName(token) | LeftoverToken(token) => { - token.clone().as_external_error(err).into() - } - ExclusiveOptions(tokens) => tokens - .first() - .cloned() - .unwrap_or_else(|| Token::from("")) // Should never happen: tokens is never created empty - .as_external_error(err) - .into(), EmbedderDoesNotExist { embedder: token, .. } | FragmentDoesNotExist { fragment: token, .. } => token.as_external_error(err).into(), } } } -impl<'a> VectorFilter<'a> { - pub(super) fn matches(value: &str) -> bool { - value.starts_with("_vectors.") || value == "_vectors" +pub(super) fn evaluate( + rtxn: &heed::RoTxn<'_>, + index: &Index, + universe: Option<&RoaringBitmap>, + embedder: Option>, + filter: &VectorFilter<'_>, +) -> crate::Result { + let index_embedding_configs = index.embedding_configs(); + let embedding_configs = index_embedding_configs.embedding_configs(rtxn)?; + + let embedders = match embedder { + Some(embedder) => vec![embedder], + None => embedding_configs.iter().map(|config| Token::from(config.name.as_str())).collect(), + }; + + let mut docids = embedders + .iter() + .map(|e| evaluate_inner(rtxn, index, e, &embedding_configs, filter)) + .union()?; + + if let Some(universe) = universe { + docids &= universe; } - /// Parses a vector filter string. - /// - /// Valid formats: - /// - `_vectors` - /// - `_vectors.{embedder_name}` - /// - `_vectors.{embedder_name}.regenerate` - /// - `_vectors.{embedder_name}.userProvided` - /// - `_vectors.{embedder_name}.documentTemplate` - /// - `_vectors.{embedder_name}.fragments.{fragment_name}` - pub(super) fn parse(s: &'a Token<'a>) -> Result> { - let mut split = s.split(".").peekable(); - - match split.next() { - Some(token) if token.value() == "_vectors" => (), - Some(token) => return Err(InvalidPrefix(token)), - None => return Err(EmptyFilter), - } - - let embedder_name = split.next(); - - let mut fragment_tokens = None; - if split.peek().map(|t| t.value()) == Some("fragments") { - let token = split.next().expect("it was peeked before"); - let name = split.next().ok_or_else(|| MissingFragmentName(token.clone()))?; - - fragment_tokens = Some((token, name)); - } - - let mut remaining_tokens = split.collect::>(); - - let mut user_provided_token = None; - if let Some(position) = remaining_tokens.iter().position(|t| t.value() == "userProvided") { - user_provided_token = Some(remaining_tokens.remove(position)); - } - - let mut document_template_token = None; - if let Some(position) = - remaining_tokens.iter().position(|t| t.value() == "documentTemplate") - { - document_template_token = Some(remaining_tokens.remove(position)); - } - - let mut regenerate_token = None; - if let Some(position) = remaining_tokens.iter().position(|t| t.value() == "regenerate") { - regenerate_token = Some(remaining_tokens.remove(position)); - } - - if !remaining_tokens.is_empty() { - return Err(LeftoverToken(remaining_tokens.remove(0))); - } - - let inner = - match (fragment_tokens, user_provided_token, document_template_token, regenerate_token) - { - (Some((_token, name)), None, None, None) => VectorFilterInner::Fragment(name), - (None, Some(_), None, None) => VectorFilterInner::UserProvided, - (None, None, Some(_), None) => VectorFilterInner::DocumentTemplate, - (None, None, None, Some(_)) => VectorFilterInner::Regenerate, - (None, None, None, None) => VectorFilterInner::None, - (a, b, c, d) => { - let a = a.map(|(token, _)| token); - let present = [a, b, c, d].into_iter().flatten().collect(); - return Err(ExclusiveOptions(present)); - } - }; - - Ok(Self { inner, embedder: embedder_name }) - } - - pub(super) fn evaluate( - self, - rtxn: &heed::RoTxn<'_>, - index: &Index, - universe: Option<&RoaringBitmap>, - ) -> crate::Result { - let index_embedding_configs = index.embedding_configs(); - let embedding_configs = index_embedding_configs.embedding_configs(rtxn)?; - - let embedders = match self.embedder { - Some(embedder) => vec![embedder], - None => { - embedding_configs.iter().map(|config| Token::from(config.name.as_str())).collect() - } - }; - - let mut docids = embedders - .iter() - .map(|e| self.inner.evaluate(rtxn, index, e, &embedding_configs)) - .union()?; - - if let Some(universe) = universe { - docids &= universe; - } - - Ok(docids) - } + Ok(docids) } -impl VectorFilterInner<'_> { - fn evaluate( - &self, - rtxn: &heed::RoTxn<'_>, - index: &Index, - embedder: &Token<'_>, - embedding_configs: &[IndexEmbeddingConfig], - ) -> crate::Result { - let embedder_name = embedder.value(); - let available_embedders = - || embedding_configs.iter().map(|c| c.name.clone()).collect::>(); +fn evaluate_inner( + rtxn: &heed::RoTxn<'_>, + index: &Index, + embedder: &Token<'_>, + embedding_configs: &[IndexEmbeddingConfig], + filter: &VectorFilter<'_>, +) -> crate::Result { + let embedder_name = embedder.value(); + let available_embedders = + || embedding_configs.iter().map(|c| c.name.clone()).collect::>(); - let embedding_config = embedding_configs - .iter() - .find(|config| config.name == embedder_name) - .ok_or_else(|| EmbedderDoesNotExist { embedder, available: available_embedders() })?; + let embedding_config = embedding_configs + .iter() + .find(|config| config.name == embedder_name) + .ok_or_else(|| EmbedderDoesNotExist { embedder, available: available_embedders() })?; - let embedder_info = index - .embedding_configs() - .embedder_info(rtxn, embedder_name)? - .ok_or_else(|| EmbedderDoesNotExist { embedder, available: available_embedders() })?; + let embedder_info = index + .embedding_configs() + .embedder_info(rtxn, embedder_name)? + .ok_or_else(|| EmbedderDoesNotExist { embedder, available: available_embedders() })?; - let arroy_wrapper = ArroyWrapper::new( - index.vector_arroy, - embedder_info.embedder_id, - embedding_config.config.quantized(), - ); + let arroy_wrapper = ArroyWrapper::new( + index.vector_arroy, + embedder_info.embedder_id, + embedding_config.config.quantized(), + ); - let docids = match self { - VectorFilterInner::Fragment(fragment) => { - let fragment_name = fragment.value(); - let fragment_config = embedding_config - .fragments - .as_slice() - .iter() - .find(|fragment| fragment.name == fragment_name) - .ok_or_else(|| FragmentDoesNotExist { - embedder, - fragment, - available: embedding_config - .fragments - .as_slice() - .iter() - .map(|f| f.name.clone()) - .collect(), - })?; + let docids = match filter { + VectorFilter::Fragment(fragment) => { + let fragment_name = fragment.value(); + let fragment_config = embedding_config + .fragments + .as_slice() + .iter() + .find(|fragment| fragment.name == fragment_name) + .ok_or_else(|| FragmentDoesNotExist { + embedder, + fragment, + available: embedding_config + .fragments + .as_slice() + .iter() + .map(|f| f.name.clone()) + .collect(), + })?; - arroy_wrapper.items_in_store(rtxn, fragment_config.id, |bitmap| bitmap.clone())? + arroy_wrapper.items_in_store(rtxn, fragment_config.id, |bitmap| bitmap.clone())? + } + VectorFilter::DocumentTemplate => { + if !embedding_config.fragments.as_slice().is_empty() { + return Ok(RoaringBitmap::new()); } - VectorFilterInner::DocumentTemplate => { - if !embedding_config.fragments.as_slice().is_empty() { - return Ok(RoaringBitmap::new()); - } - let user_provided_docsids = embedder_info.embedding_status.user_provided_docids(); - let mut stats = ArroyStats::default(); - arroy_wrapper.aggregate_stats(rtxn, &mut stats)?; - stats.documents - user_provided_docsids.clone() - } - VectorFilterInner::UserProvided => { - let user_provided_docsids = embedder_info.embedding_status.user_provided_docids(); - user_provided_docsids.clone() - } - VectorFilterInner::Regenerate => { - let mut stats = ArroyStats::default(); - arroy_wrapper.aggregate_stats(rtxn, &mut stats)?; - let skip_regenerate = embedder_info.embedding_status.skip_regenerate_docids(); - stats.documents - skip_regenerate - } - VectorFilterInner::None => { - let mut stats = ArroyStats::default(); - arroy_wrapper.aggregate_stats(rtxn, &mut stats)?; - stats.documents - } - }; + let user_provided_docsids = embedder_info.embedding_status.user_provided_docids(); + let mut stats = ArroyStats::default(); + arroy_wrapper.aggregate_stats(rtxn, &mut stats)?; + stats.documents - user_provided_docsids.clone() + } + VectorFilter::UserProvided => { + let user_provided_docsids = embedder_info.embedding_status.user_provided_docids(); + user_provided_docsids.clone() + } + VectorFilter::Regenerate => { + let mut stats = ArroyStats::default(); + arroy_wrapper.aggregate_stats(rtxn, &mut stats)?; + let skip_regenerate = embedder_info.embedding_status.skip_regenerate_docids(); + stats.documents - skip_regenerate + } + VectorFilter::None => { + let mut stats = ArroyStats::default(); + arroy_wrapper.aggregate_stats(rtxn, &mut stats)?; + stats.documents + } + }; - Ok(docids) - } + Ok(docids) }