Merge branch 'fragment-filters' into render-route

2025-09-06 12:46:31 +00:00 · 2025-08-01 09:13:17 +02:00
parent 99b4dce8ae be045a7636
commit dae4fa874c
19 changed files with 920 additions and 30 deletions
--- a/crates/filter-parser/src/condition.rs
+++ b/crates/filter-parser/src/condition.rs
@ -7,11 +7,20 @@

 use nom::branch::alt;
 use nom::bytes::complete::tag;
+use nom::character::complete::char;
+use nom::character::complete::multispace0;
 use nom::character::complete::multispace1;
 use nom::combinator::cut;
+use nom::combinator::map;
+use nom::combinator::value;
+use nom::sequence::preceded;
 use nom::sequence::{terminated, tuple};
 use Condition::*;

+use crate::error::IResultExt;
+use crate::value::parse_vector_value;
+use crate::ErrorKind;
+use crate::VectorFilter;
 use crate::{parse_value, FilterCondition, IResult, Span, Token};

 #[derive(Debug, Clone, PartialEq, Eq)]
@ -113,6 +122,58 @@ pub fn parse_not_exists(input: Span) -> IResult<FilterCondition> {
    Ok((input, FilterCondition::Not(Box::new(FilterCondition::Condition { fid: key, op: Exists }))))
 }

+fn parse_vectors(input: Span) -> IResult<(Token, Option<Token>, VectorFilter<'_>)> {
+    let (input, _) = multispace0(input)?;
+    let (input, fid) = tag("_vectors")(input)?;
+
+    if let Ok((input, _)) = multispace1::<_, crate::Error>(input) {
+        return Ok((input, (Token::from(fid), None, VectorFilter::None)));
+    }
+
+    let (input, _) = char('.')(input)?;
+
+    // From this point, we are certain this is a vector filter, so our errors must be final.
+    // We could use nom's `cut` but it's better to be explicit about the errors
+
+    let (input, embedder_name) =
+        parse_vector_value(input).map_cut(ErrorKind::VectorFilterInvalidEmbedder)?;
+
+    let (input, filter) = alt((
+        map(
+            preceded(tag(".fragments"), |input| {
+                let (input, _) = tag(".")(input).map_cut(ErrorKind::VectorFilterMissingFragment)?;
+                parse_vector_value(input).map_cut(ErrorKind::VectorFilterInvalidFragment)
+            }),
+            VectorFilter::Fragment,
+        ),
+        value(VectorFilter::UserProvided, tag(".userProvided")),
+        value(VectorFilter::DocumentTemplate, tag(".documentTemplate")),
+        value(VectorFilter::Regenerate, tag(".regenerate")),
+        value(VectorFilter::None, nom::combinator::success("")),
+    ))(input)?;
+
+    let (input, _) = multispace1(input).map_cut(ErrorKind::VectorFilterLeftover)?;
+
+    Ok((input, (Token::from(fid), Some(embedder_name), filter)))
+}
+
+/// vectors_exists          = vectors "EXISTS"
+pub fn parse_vectors_exists(input: Span) -> IResult<FilterCondition> {
+    let (input, (fid, embedder, filter)) = terminated(parse_vectors, tag("EXISTS"))(input)?;
+
+    Ok((input, FilterCondition::VectorExists { fid, embedder, filter }))
+}
+/// vectors_not_exists      = vectors "NOT" WS+ "EXISTS"
+pub fn parse_vectors_not_exists(input: Span) -> IResult<FilterCondition> {
+    let (input, (fid, embedder, filter)) = parse_vectors(input)?;
+
+    let (input, _) = tuple((tag("NOT"), multispace1, tag("EXISTS")))(input)?;
+    Ok((
+        input,
+        FilterCondition::Not(Box::new(FilterCondition::VectorExists { fid, embedder, filter })),
+    ))
+}
+
 /// contains        = value "CONTAINS" value
 pub fn parse_contains(input: Span) -> IResult<FilterCondition> {
    let (input, (fid, contains, value)) =
--- a/crates/filter-parser/src/error.rs
+++ b/crates/filter-parser/src/error.rs
@ -42,6 +42,23 @@ pub fn cut_with_err<'a, O>(
    }
 }

+pub trait IResultExt<'a> {
+    fn map_cut(self, kind: ErrorKind<'a>) -> Self;
+}
+
+impl<'a, T> IResultExt<'a> for IResult<'a, T> {
+    fn map_cut(self, kind: ErrorKind<'a>) -> Self {
+        self.map_err(move |e: nom::Err<Error<'a>>| {
+            let input = match e {
+                nom::Err::Incomplete(_) => return e,
+                nom::Err::Error(e) => *e.context(),
+                nom::Err::Failure(e) => *e.context(),
+            };
+            nom::Err::Failure(Error::new_from_kind(input, kind))
+        })
+    }
+}
+
 #[derive(Debug)]
 pub struct Error<'a> {
    context: Span<'a>,
@ -61,6 +78,10 @@ pub enum ErrorKind<'a> {
    GeoBoundingBox,
    MisusedGeoRadius,
    MisusedGeoBoundingBox,
+    VectorFilterLeftover,
+    VectorFilterInvalidEmbedder,
+    VectorFilterMissingFragment,
+    VectorFilterInvalidFragment,
    InvalidPrimary,
    InvalidEscapedNumber,
    ExpectedEof,
@ -169,6 +190,18 @@ impl Display for Error<'_> {
            ErrorKind::MisusedGeoBoundingBox => {
                writeln!(f, "The `_geoBoundingBox` filter is an operation and can't be used as a value.")?
            }
+            ErrorKind::VectorFilterLeftover => {
+                writeln!(f, "The vector filter has leftover tokens.")?
+            }
+            ErrorKind::VectorFilterInvalidFragment => {
+                writeln!(f, "The vector filter's fragment is invalid.")?
+            }
+            ErrorKind::VectorFilterMissingFragment => {
+                writeln!(f, "The vector filter is missing a fragment name.")?
+            }
+            ErrorKind::VectorFilterInvalidEmbedder => {
+                writeln!(f, "The vector filter's embedder is invalid.")?
+            }
            ErrorKind::ReservedKeyword(word) => {
                writeln!(f, "`{word}` is a reserved keyword and thus cannot be used as a field name unless it is put inside quotes. Use \"{word}\" or \'{word}\' instead.")?
            }
--- a/crates/filter-parser/src/lib.rs
+++ b/crates/filter-parser/src/lib.rs
@ -65,6 +65,9 @@ use nom_locate::LocatedSpan;
 pub(crate) use value::parse_value;
 use value::word_exact;

+use crate::condition::{parse_vectors_exists, parse_vectors_not_exists};
+use crate::error::IResultExt;
+
 pub type Span<'a> = LocatedSpan<&'a str, &'a str>;

 type IResult<'a, Ret> = nom::IResult<Span<'a>, Ret, Error<'a>>;
@ -136,6 +139,15 @@ impl<'a> From<&'a str> for Token<'a> {
    }
 }

+#[derive(Debug, Clone, PartialEq, Eq)]
+pub enum VectorFilter<'a> {
+    Fragment(Token<'a>),
+    DocumentTemplate,
+    UserProvided,
+    Regenerate,
+    None,
+}
+
 #[derive(Debug, Clone, PartialEq, Eq)]
 pub enum FilterCondition<'a> {
    Not(Box<Self>),
@ -143,6 +155,7 @@ pub enum FilterCondition<'a> {
    In { fid: Token<'a>, els: Vec<Token<'a>> },
    Or(Vec<Self>),
    And(Vec<Self>),
+    VectorExists { fid: Token<'a>, embedder: Option<Token<'a>>, filter: VectorFilter<'a> },
    GeoLowerThan { point: [Token<'a>; 2], radius: Token<'a> },
    GeoBoundingBox { top_right_point: [Token<'a>; 2], bottom_left_point: [Token<'a>; 2] },
 }
@ -173,9 +186,24 @@ impl<'a> FilterCondition<'a> {
            FilterCondition::Or(seq) | FilterCondition::And(seq) => {
                seq.iter().find_map(|filter| filter.use_contains_operator())
            }
+            FilterCondition::VectorExists { .. }
+            | FilterCondition::GeoLowerThan { .. }
+            | FilterCondition::GeoBoundingBox { .. }
+            | FilterCondition::In { .. } => None,
+        }
+    }
+
+    pub fn use_vector_filter(&self) -> Option<&Token> {
+        match self {
+            FilterCondition::Condition { .. } => None,
+            FilterCondition::Not(this) => this.use_vector_filter(),
+            FilterCondition::Or(seq) | FilterCondition::And(seq) => {
+                seq.iter().find_map(|filter| filter.use_vector_filter())
+            }
            FilterCondition::GeoLowerThan { .. }
            | FilterCondition::GeoBoundingBox { .. }
            | FilterCondition::In { .. } => None,
+            FilterCondition::VectorExists { fid, .. } => Some(fid),
        }
    }

@ -263,10 +291,7 @@ fn parse_in_body(input: Span) -> IResult<Vec<Token>> {
    let (input, _) = ws(word_exact("IN"))(input)?;

    // everything after `IN` can be a failure
-    let (input, _) =
-        cut_with_err(tag("["), |_| Error::new_from_kind(input, ErrorKind::InOpeningBracket))(
-            input,
-        )?;
+    let (input, _) = tag("[")(input).map_cut(ErrorKind::InOpeningBracket)?;

    let (input, content) = cut(parse_value_list)(input)?;

@ -500,8 +525,7 @@ fn parse_primary(input: Span, depth: usize) -> IResult<FilterCondition> {
        parse_is_not_null,
        parse_is_empty,
        parse_is_not_empty,
-        parse_exists,
-        parse_not_exists,
+        alt((parse_vectors_exists, parse_vectors_not_exists, parse_exists, parse_not_exists)),
        parse_to,
        parse_contains,
        parse_not_contains,
@ -557,6 +581,22 @@ impl std::fmt::Display for FilterCondition<'_> {
                }
                write!(f, "]")
            }
+            FilterCondition::VectorExists { fid: _, embedder, filter: inner } => {
+                write!(f, "_vectors")?;
+                if let Some(embedder) = embedder {
+                    write!(f, ".{:?}", embedder.value())?;
+                }
+                match inner {
+                    VectorFilter::Fragment(fragment) => {
+                        write!(f, ".fragments.{:?}", fragment.value())?
+                    }
+                    VectorFilter::DocumentTemplate => write!(f, ".documentTemplate")?,
+                    VectorFilter::UserProvided => write!(f, ".userProvided")?,
+                    VectorFilter::Regenerate => write!(f, ".regenerate")?,
+                    VectorFilter::None => (),
+                }
+                write!(f, " EXISTS")
+            }
            FilterCondition::GeoLowerThan { point, radius } => {
                write!(f, "_geoRadius({}, {}, {})", point[0], point[1], radius)
            }
@ -630,6 +670,9 @@ pub mod tests {
        insta::assert_snapshot!(p(r"title = 'foo\\\\\\\\'"), @r#"{title} = {foo\\\\}"#);
        // but it also works with other sequences
        insta::assert_snapshot!(p(r#"title = 'foo\x20\n\t\"\'"'"#), @"{title} = {foo \n\t\"\'\"}");
+
+        insta::assert_snapshot!(p(r#"_vectors." valid.name  ".fragments."also.. valid! " EXISTS"#), @r#"_vectors." valid.name  ".fragments."also.. valid! " EXISTS"#);
+        insta::assert_snapshot!(p("_vectors.\"\n\t\r\\\"\" EXISTS"), @r#"_vectors."\n\t\r\"" EXISTS"#);
    }

    #[test]
@ -692,6 +735,18 @@ pub mod tests {
        insta::assert_snapshot!(p("NOT subscribers IS NOT EMPTY"), @"{subscribers} IS EMPTY");
        insta::assert_snapshot!(p("subscribers  IS   NOT   EMPTY"), @"NOT ({subscribers} IS EMPTY)");

+        // Test _vectors EXISTS + _vectors NOT EXITS
+        insta::assert_snapshot!(p("_vectors EXISTS"), @"_vectors EXISTS");
+        insta::assert_snapshot!(p("_vectors.embedderName EXISTS"), @r#"_vectors."embedderName" EXISTS"#);
+        insta::assert_snapshot!(p("_vectors.embedderName.documentTemplate EXISTS"), @r#"_vectors."embedderName".documentTemplate EXISTS"#);
+        insta::assert_snapshot!(p("_vectors.embedderName.regenerate EXISTS"), @r#"_vectors."embedderName".regenerate EXISTS"#);
+        insta::assert_snapshot!(p("_vectors.embedderName.regenerate EXISTS"), @r#"_vectors."embedderName".regenerate EXISTS"#);
+        insta::assert_snapshot!(p("_vectors.embedderName.fragments.fragmentName EXISTS"), @r#"_vectors."embedderName".fragments."fragmentName" EXISTS"#);
+        insta::assert_snapshot!(p("  _vectors.embedderName.fragments.fragmentName   EXISTS"), @r#"_vectors."embedderName".fragments."fragmentName" EXISTS"#);
+        insta::assert_snapshot!(p("NOT _vectors EXISTS"), @"NOT (_vectors EXISTS)");
+        insta::assert_snapshot!(p(" NOT  _vectors   EXISTS"), @"NOT (_vectors EXISTS)");
+        insta::assert_snapshot!(p("  _vectors  NOT  EXISTS"), @"NOT (_vectors EXISTS)");
+
        // Test EXISTS + NOT EXITS
        insta::assert_snapshot!(p("subscribers EXISTS"), @"{subscribers} EXISTS");
        insta::assert_snapshot!(p("NOT subscribers EXISTS"), @"NOT ({subscribers} EXISTS)");
@ -946,6 +1001,59 @@ pub mod tests {
        "###
        );

+        insta::assert_snapshot!(p(r#"_vectors _vectors EXISTS"#), @r"
+        Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `_vectors _vectors EXISTS`.
+        1:25 _vectors _vectors EXISTS
+        ");
+        insta::assert_snapshot!(p(r#"_vectors. embedderName EXISTS"#), @r"
+        The vector filter's embedder is invalid.
+        10:30 _vectors. embedderName EXISTS
+        ");
+        insta::assert_snapshot!(p(r#"_vectors .embedderName EXISTS"#), @r"
+        Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `_vectors .embedderName EXISTS`.
+        1:30 _vectors .embedderName EXISTS
+        ");
+        insta::assert_snapshot!(p(r#"_vectors.embedderName. EXISTS"#), @r"
+        The vector filter has leftover tokens.
+        22:30 _vectors.embedderName. EXISTS
+        ");
+        insta::assert_snapshot!(p(r#"_vectors."embedderName EXISTS"#), @r#"
+        The vector filter's embedder is invalid.
+        30:30 _vectors."embedderName EXISTS
+        "#);
+        insta::assert_snapshot!(p(r#"_vectors."embedderNam"e EXISTS"#), @r#"
+        The vector filter has leftover tokens.
+        23:31 _vectors."embedderNam"e EXISTS
+        "#);
+        insta::assert_snapshot!(p(r#"_vectors.embedderName.documentTemplate. EXISTS"#), @r"
+        The vector filter has leftover tokens.
+        39:47 _vectors.embedderName.documentTemplate. EXISTS
+        ");
+        insta::assert_snapshot!(p(r#"_vectors.embedderName.fragments EXISTS"#), @r"
+        The vector filter is missing a fragment name.
+        32:39 _vectors.embedderName.fragments EXISTS
+        ");
+        insta::assert_snapshot!(p(r#"_vectors.embedderName.fragments. EXISTS"#), @r"
+        The vector filter's fragment is invalid.
+        33:40 _vectors.embedderName.fragments. EXISTS
+        ");
+        insta::assert_snapshot!(p(r#"_vectors.embedderName.fragments.test test EXISTS"#), @r"
+        Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `_vectors.embedderName.fragments.test test EXISTS`.
+        1:49 _vectors.embedderName.fragments.test test EXISTS
+        ");
+        insta::assert_snapshot!(p(r#"_vectors.embedderName.fragments. test EXISTS"#), @r"
+        The vector filter's fragment is invalid.
+        33:45 _vectors.embedderName.fragments. test EXISTS
+        ");
+        insta::assert_snapshot!(p(r#"_vectors.embedderName .fragments. test EXISTS"#), @r"
+        Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `_vectors.embedderName .fragments. test EXISTS`.
+        1:46 _vectors.embedderName .fragments. test EXISTS
+        ");
+        insta::assert_snapshot!(p(r#"_vectors.embedderName .fragments.test EXISTS"#), @r"
+        Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `_vectors.embedderName .fragments.test EXISTS`.
+        1:45 _vectors.embedderName .fragments.test EXISTS
+        ");
+
        insta::assert_snapshot!(p(r#"NOT OR EXISTS AND EXISTS NOT EXISTS"#), @r###"
        Was expecting a value but instead got `OR`, which is a reserved keyword. To use `OR` as a field name or a value, surround it by quotes.
        5:7 NOT OR EXISTS AND EXISTS NOT EXISTS
--- a/crates/filter-parser/src/value.rs
+++ b/crates/filter-parser/src/value.rs
@ -80,6 +80,39 @@ pub fn word_exact<'a, 'b: 'a>(tag: &'b str) -> impl Fn(Span<'a>) -> IResult<'a,
    }
 }

+/// vector_value          = ( non_dot_word | singleQuoted | doubleQuoted)
+pub fn parse_vector_value(input: Span) -> IResult<Token> {
+    pub fn non_dot_word(input: Span) -> IResult<Token> {
+        let (input, word) = take_while1(|c| is_value_component(c) && c != '.')(input)?;
+        Ok((input, word.into()))
+    }
+
+    let (input, value) = alt((
+        delimited(char('\''), cut(|input| quoted_by('\'', input)), cut(char('\''))),
+        delimited(char('"'), cut(|input| quoted_by('"', input)), cut(char('"'))),
+        non_dot_word,
+    ))(input)?;
+
+    match unescaper::unescape(value.value()) {
+        Ok(content) => {
+            if content.len() != value.value().len() {
+                Ok((input, Token::new(value.original_span(), Some(content))))
+            } else {
+                Ok((input, value))
+            }
+        }
+        Err(unescaper::Error::IncompleteStr(_)) => Err(nom::Err::Incomplete(nom::Needed::Unknown)),
+        Err(unescaper::Error::ParseIntError { .. }) => Err(nom::Err::Error(Error::new_from_kind(
+            value.original_span(),
+            ErrorKind::InvalidEscapedNumber,
+        ))),
+        Err(unescaper::Error::InvalidChar { .. }) => Err(nom::Err::Error(Error::new_from_kind(
+            value.original_span(),
+            ErrorKind::MalformedValue,
+        ))),
+    }
+}
+
 /// value          = WS* ( word | singleQuoted | doubleQuoted) WS+
 pub fn parse_value(input: Span) -> IResult<Token> {
    // to get better diagnostic message we are going to strip the left whitespaces from the input right now
--- a/crates/index-scheduler/src/scheduler/test_document_addition.rs
+++ b/crates/index-scheduler/src/scheduler/test_document_addition.rs
@ -736,7 +736,7 @@ fn test_document_addition_mixed_rights_with_index() {
 #[test]
 fn test_document_addition_mixed_right_without_index_starts_with_cant_create() {
    // We're going to autobatch multiple document addition.
-    // - The index does not exists
+    // - The index does not exist
    // - The first document addition don't have the right to create an index
    // - The second do. They should not batch together.
    // - The second should batch with everything else as it's going to create an index.
--- a/crates/meilisearch/src/routes/indexes/documents.rs
+++ b/crates/meilisearch/src/routes/indexes/documents.rs
@ -139,6 +139,8 @@ pub struct DocumentsFetchAggregator<Method: AggregateMethod> {
    per_document_id: bool,
    // if a filter was used
    per_filter: bool,
+    with_vector_filter: bool,
+
    // if documents were sorted
    sort: bool,

@ -166,6 +168,7 @@ impl<Method: AggregateMethod> Aggregate for DocumentsFetchAggregator<Method> {
        Box::new(Self {
            per_document_id: self.per_document_id | new.per_document_id,
            per_filter: self.per_filter | new.per_filter,
+            with_vector_filter: self.with_vector_filter | new.with_vector_filter,
            sort: self.sort | new.sort,
            retrieve_vectors: self.retrieve_vectors | new.retrieve_vectors,
            max_limit: self.max_limit.max(new.max_limit),
@ -250,6 +253,7 @@ pub async fn get_document(
            retrieve_vectors: param_retrieve_vectors.0,
            per_document_id: true,
            per_filter: false,
+            with_vector_filter: false,
            sort: false,
            max_limit: 0,
            max_offset: 0,
@ -475,6 +479,10 @@ pub async fn documents_by_query_post(
    analytics.publish(
        DocumentsFetchAggregator::<DocumentsPOST> {
            per_filter: body.filter.is_some(),
+            with_vector_filter: body
+                .filter
+                .as_ref()
+                .is_some_and(|f| f.to_string().contains("_vectors")),
            sort: body.sort.is_some(),
            retrieve_vectors: body.retrieve_vectors,
            max_limit: body.limit,
@ -576,6 +584,10 @@ pub async fn get_documents(
    analytics.publish(
        DocumentsFetchAggregator::<DocumentsGET> {
            per_filter: query.filter.is_some(),
+            with_vector_filter: query
+                .filter
+                .as_ref()
+                .is_some_and(|f| f.to_string().contains("_vectors")),
            sort: query.sort.is_some(),
            retrieve_vectors: query.retrieve_vectors,
            max_limit: query.limit,
--- a/crates/meilisearch/src/routes/indexes/search_analytics.rs
+++ b/crates/meilisearch/src/routes/indexes/search_analytics.rs
@ -40,6 +40,7 @@ pub struct SearchAggregator<Method: AggregateMethod> {
    // filter
    filter_with_geo_radius: bool,
    filter_with_geo_bounding_box: bool,
+    filter_on_vectors: bool,
    // every time a request has a filter, this field must be incremented by the number of terms it contains
    filter_sum_of_criteria_terms: usize,
    // every time a request has a filter, this field must be incremented by one
@ -163,6 +164,7 @@ impl<Method: AggregateMethod> SearchAggregator<Method> {
            let stringified_filters = filter.to_string();
            ret.filter_with_geo_radius = stringified_filters.contains("_geoRadius(");
            ret.filter_with_geo_bounding_box = stringified_filters.contains("_geoBoundingBox(");
+            ret.filter_on_vectors = stringified_filters.contains("_vectors");
            ret.filter_sum_of_criteria_terms = RE.split(&stringified_filters).count();
        }

@ -260,6 +262,7 @@ impl<Method: AggregateMethod> Aggregate for SearchAggregator<Method> {
            distinct,
            filter_with_geo_radius,
            filter_with_geo_bounding_box,
+            filter_on_vectors,
            filter_sum_of_criteria_terms,
            filter_total_number_of_criteria,
            used_syntax,
@ -314,6 +317,7 @@ impl<Method: AggregateMethod> Aggregate for SearchAggregator<Method> {
        // filter
        self.filter_with_geo_radius |= filter_with_geo_radius;
        self.filter_with_geo_bounding_box |= filter_with_geo_bounding_box;
+        self.filter_on_vectors |= filter_on_vectors;
        self.filter_sum_of_criteria_terms =
            self.filter_sum_of_criteria_terms.saturating_add(filter_sum_of_criteria_terms);
        self.filter_total_number_of_criteria =
@ -388,6 +392,7 @@ impl<Method: AggregateMethod> Aggregate for SearchAggregator<Method> {
            distinct,
            filter_with_geo_radius,
            filter_with_geo_bounding_box,
+            filter_on_vectors,
            filter_sum_of_criteria_terms,
            filter_total_number_of_criteria,
            used_syntax,
@ -445,6 +450,7 @@ impl<Method: AggregateMethod> Aggregate for SearchAggregator<Method> {
            "filter": {
               "with_geoRadius": filter_with_geo_radius,
               "with_geoBoundingBox": filter_with_geo_bounding_box,
+               "on_vectors": filter_on_vectors,
               "avg_criteria_number": format!("{:.2}", filter_sum_of_criteria_terms as f64 / filter_total_number_of_criteria as f64),
               "most_used_syntax": used_syntax.iter().max_by_key(|(_, v)| *v).map(|(k, _)| json!(k)).unwrap_or_else(|| json!(null)),
            },
--- a/crates/meilisearch/src/routes/tasks.rs
+++ b/crates/meilisearch/src/routes/tasks.rs
@ -336,7 +336,7 @@ impl<Method: AggregateMethod + 'static> Aggregate for TaskFilterAnalytics<Method
                "link": "https://docs.meilisearch.com/errors#missing_authorization_header"
            }
        )),
-        (status = 404, description = "The task uid does not exists", body = ResponseError, content_type = "application/json", example = json!(
+        (status = 404, description = "The task uid does not exist", body = ResponseError, content_type = "application/json", example = json!(
            {
                "message": "Task :taskUid not found.",
                "code": "task_not_found",
@ -430,7 +430,7 @@ async fn cancel_tasks(
                "link": "https://docs.meilisearch.com/errors#missing_authorization_header"
            }
        )),
-        (status = 404, description = "The task uid does not exists", body = ResponseError, content_type = "application/json", example = json!(
+        (status = 404, description = "The task uid does not exist", body = ResponseError, content_type = "application/json", example = json!(
            {
                "message": "Task :taskUid not found.",
                "code": "task_not_found",
@ -611,7 +611,7 @@ async fn get_tasks(
                "link": "https://docs.meilisearch.com/errors#missing_authorization_header"
            }
        )),
-        (status = 404, description = "The task uid does not exists", body = ResponseError, content_type = "application/json", example = json!(
+        (status = 404, description = "The task uid does not exist", body = ResponseError, content_type = "application/json", example = json!(
            {
                "message": "Task :taskUid not found.",
                "code": "task_not_found",
@ -665,7 +665,7 @@ async fn get_task(
                "link": "https://docs.meilisearch.com/errors#missing_authorization_header"
            }
        )),
-        (status = 404, description = "The task uid does not exists", body = ResponseError, content_type = "application/json", example = json!(
+        (status = 404, description = "The task uid does not exist", body = ResponseError, content_type = "application/json", example = json!(
            {
                "message": "Task :taskUid not found.",
                "code": "task_not_found",
--- a/crates/meilisearch/src/search/mod.rs
+++ b/crates/meilisearch/src/search/mod.rs
@ -2081,7 +2081,7 @@ pub(crate) fn parse_filter(
    })?;

    if let Some(ref filter) = filter {
-        // If the contains operator is used while the contains filter features is not enabled, errors out
+        // If the contains operator is used while the contains filter feature is not enabled, errors out
        if let Some((token, error)) =
            filter.use_contains_operator().zip(features.check_contains_filter().err())
        {
@ -2092,6 +2092,18 @@ pub(crate) fn parse_filter(
        }
    }

+    if let Some(ref filter) = filter {
+        // If a vector filter is used while the multi modal feature is not enabled, errors out
+        if let Some((token, error)) =
+            filter.use_vector_filter().zip(features.check_multimodal("using a vector filter").err())
+        {
+            return Err(ResponseError::from_msg(
+                token.as_external_error(error).to_string(),
+                Code::FeatureNotEnabled,
+            ));
+        }
+    }
+
    Ok(filter)
 }

--- a/crates/meilisearch/tests/documents/errors.rs
+++ b/crates/meilisearch/tests/documents/errors.rs
@ -557,7 +557,7 @@ async fn delete_document_by_filter() {
    "###);

    let index = shared_does_not_exists_index().await;
-    // index does not exists
+    // index does not exist
    let (response, _code) =
        index.delete_document_by_filter_fail(json!({ "filter": "doggo = bernese"})).await;
    snapshot!(response, @r###"
--- a/crates/meilisearch/tests/search/errors.rs
+++ b/crates/meilisearch/tests/search/errors.rs
@ -304,7 +304,7 @@ async fn search_bad_filter() {
    let server = Server::new_shared();
    let index = server.unique_index();
    // Also, to trigger the error message we need to effectively create the index or else it'll throw an
-    // index does not exists error.
+    // index does not exist error.
    let (response, _code) = index.create(None).await;
    server.wait_task(response.uid()).await.succeeded();

@ -1263,7 +1263,7 @@ async fn search_with_contains_without_enabling_the_feature() {
    let server = Server::new_shared();
    let index = server.unique_index();
    // Also, to trigger the error message we need to effectively create the index or else it'll throw an
-    // index does not exists error.
+    // index does not exist error.
    let (task, _code) = index.create(None).await;
    server.wait_task(task.uid()).await.succeeded();

--- a/crates/meilisearch/tests/search/filters.rs
+++ b/crates/meilisearch/tests/search/filters.rs
@ -4,10 +4,11 @@ use tempfile::TempDir;

 use super::test_settings_documents_indexing_swapping_and_search;
 use crate::common::{
-    default_settings, shared_index_with_documents, shared_index_with_nested_documents, Server,
-    DOCUMENTS, NESTED_DOCUMENTS,
+    default_settings, shared_index_for_fragments, shared_index_with_documents,
+    shared_index_with_nested_documents, Server, DOCUMENTS, NESTED_DOCUMENTS,
 };
 use crate::json;
+use crate::vector::rest::create_mock;

 #[actix_rt::test]
 async fn search_with_filter_string_notation() {
@ -731,3 +732,467 @@ async fn test_filterable_attributes_priority() {
    )
    .await;
 }
+
+#[actix_rt::test]
+async fn vector_filter_all_embedders() {
+    let index = shared_index_for_fragments().await;
+
+    let (value, _code) = index
+        .search_post(json!({
+            "filter": "_vectors EXISTS",
+            "attributesToRetrieve": ["name"]
+        }))
+        .await;
+    snapshot!(value, @r#"
+    {
+      "hits": [
+        {
+          "name": "kefir"
+        },
+        {
+          "name": "echo"
+        },
+        {
+          "name": "intel"
+        },
+        {
+          "name": "dustin"
+        }
+      ],
+      "query": "",
+      "processingTimeMs": "[duration]",
+      "limit": 20,
+      "offset": 0,
+      "estimatedTotalHits": 4
+    }
+    "#);
+}
+
+#[actix_rt::test]
+async fn vector_filter_missing_fragment() {
+    let index = shared_index_for_fragments().await;
+
+    let (value, _code) = index
+        .search_post(json!({
+            "filter": "_vectors.rest.fragments EXISTS",
+            "attributesToRetrieve": ["name"]
+        }))
+        .await;
+    snapshot!(value, @r#"
+    {
+      "message": "The vector filter is missing a fragment name.\n24:31 _vectors.rest.fragments EXISTS",
+      "code": "invalid_search_filter",
+      "type": "invalid_request",
+      "link": "https://docs.meilisearch.com/errors#invalid_search_filter"
+    }
+    "#);
+}
+
+#[actix_rt::test]
+async fn vector_filter_nonexistent_embedder() {
+    let index = shared_index_for_fragments().await;
+
+    let (value, _code) = index
+        .search_post(json!({
+            "filter": "_vectors.other EXISTS",
+            "attributesToRetrieve": ["name"]
+        }))
+        .await;
+    snapshot!(value, @r#"
+    {
+      "message": "Index `[uuid]`: The embedder `other` does not exist. Available embedders are: `rest`.\n10:15 _vectors.other EXISTS",
+      "code": "invalid_search_filter",
+      "type": "invalid_request",
+      "link": "https://docs.meilisearch.com/errors#invalid_search_filter"
+    }
+    "#);
+}
+
+#[actix_rt::test]
+async fn vector_filter_all_embedders_user_provided() {
+    let index = shared_index_for_fragments().await;
+
+    // This one is counterintuitive, but it is the same as the previous one.
+    // It's because userProvided is interpreted as an embedder name
+    let (value, _code) = index
+        .search_post(json!({
+            "filter": "_vectors.userProvided EXISTS",
+            "attributesToRetrieve": ["name"]
+        }))
+        .await;
+    snapshot!(value, @r#"
+    {
+      "message": "Index `[uuid]`: The embedder `userProvided` does not exist. Available embedders are: `rest`.\n10:22 _vectors.userProvided EXISTS",
+      "code": "invalid_search_filter",
+      "type": "invalid_request",
+      "link": "https://docs.meilisearch.com/errors#invalid_search_filter"
+    }
+    "#);
+}
+
+#[actix_rt::test]
+async fn vector_filter_specific_embedder() {
+    let index = shared_index_for_fragments().await;
+
+    let (value, _code) = index
+        .search_post(json!({
+            "filter": "_vectors.rest EXISTS",
+            "attributesToRetrieve": ["name"]
+        }))
+        .await;
+    snapshot!(value, @r#"
+    {
+      "hits": [
+        {
+          "name": "kefir"
+        },
+        {
+          "name": "echo"
+        },
+        {
+          "name": "intel"
+        },
+        {
+          "name": "dustin"
+        }
+      ],
+      "query": "",
+      "processingTimeMs": "[duration]",
+      "limit": 20,
+      "offset": 0,
+      "estimatedTotalHits": 4
+    }
+    "#);
+}
+
+#[actix_rt::test]
+async fn vector_filter_user_provided() {
+    let index = shared_index_for_fragments().await;
+
+    let (value, _code) = index
+        .search_post(json!({
+            "filter": "_vectors.rest.userProvided EXISTS",
+            "attributesToRetrieve": ["name"]
+        }))
+        .await;
+    snapshot!(value, @r#"
+    {
+      "hits": [
+        {
+          "name": "echo"
+        }
+      ],
+      "query": "",
+      "processingTimeMs": "[duration]",
+      "limit": 20,
+      "offset": 0,
+      "estimatedTotalHits": 1
+    }
+    "#);
+}
+
+#[actix_rt::test]
+async fn vector_filter_specific_fragment() {
+    let index = shared_index_for_fragments().await;
+
+    let (value, _code) = index
+        .search_post(json!({
+            "filter": "_vectors.rest.fragments.withBreed EXISTS",
+            "attributesToRetrieve": ["name"]
+        }))
+        .await;
+    snapshot!(value, @r#"
+    {
+      "hits": [
+        {
+          "name": "intel"
+        },
+        {
+          "name": "dustin"
+        }
+      ],
+      "query": "",
+      "processingTimeMs": "[duration]",
+      "limit": 20,
+      "offset": 0,
+      "estimatedTotalHits": 2
+    }
+    "#);
+
+    let (value, _code) = index
+        .search_post(json!({
+            "filter": "_vectors.rest.fragments.basic EXISTS",
+            "attributesToRetrieve": ["name"]
+        }))
+        .await;
+    snapshot!(value, @r#"
+    {
+      "hits": [
+        {
+          "name": "kefir"
+        },
+        {
+          "name": "intel"
+        },
+        {
+          "name": "dustin"
+        }
+      ],
+      "query": "",
+      "processingTimeMs": "[duration]",
+      "limit": 20,
+      "offset": 0,
+      "estimatedTotalHits": 3
+    }
+    "#);
+}
+
+#[actix_rt::test]
+async fn vector_filter_non_existant_fragment() {
+    let index = shared_index_for_fragments().await;
+
+    let (value, _code) = index
+        .search_post(json!({
+            "filter": "_vectors.rest.fragments.other EXISTS",
+            "attributesToRetrieve": ["name"]
+        }))
+        .await;
+    snapshot!(value, @r#"
+    {
+      "message": "Index `[uuid]`: The fragment `other` does not exist on embedder `rest`. Available fragments on this embedder are: `basic`, `withBreed`.\n25:30 _vectors.rest.fragments.other EXISTS",
+      "code": "invalid_search_filter",
+      "type": "invalid_request",
+      "link": "https://docs.meilisearch.com/errors#invalid_search_filter"
+    }
+    "#);
+}
+
+#[actix_rt::test]
+async fn vector_filter_specific_fragment_user_provided() {
+    let index = shared_index_for_fragments().await;
+
+    let (value, _code) = index
+        .search_post(json!({
+            "filter": "_vectors.rest.fragments.other.userProvided EXISTS",
+            "attributesToRetrieve": ["name"]
+        }))
+        .await;
+    snapshot!(value, @r#"
+    {
+      "message": "The vector filter has leftover tokens.\n30:50 _vectors.rest.fragments.other.userProvided EXISTS",
+      "code": "invalid_search_filter",
+      "type": "invalid_request",
+      "link": "https://docs.meilisearch.com/errors#invalid_search_filter"
+    }
+    "#);
+}
+
+#[actix_rt::test]
+async fn vector_filter_document_template_but_fragments_used() {
+    let index = shared_index_for_fragments().await;
+
+    let (value, _code) = index
+        .search_post(json!({
+            "filter": "_vectors.rest.documentTemplate EXISTS",
+            "attributesToRetrieve": ["name"]
+        }))
+        .await;
+    snapshot!(value, @r#"
+    {
+      "hits": [],
+      "query": "",
+      "processingTimeMs": "[duration]",
+      "limit": 20,
+      "offset": 0,
+      "estimatedTotalHits": 0
+    }
+    "#);
+}
+
+#[actix_rt::test]
+async fn vector_filter_document_template() {
+    let (_mock, setting) = create_mock().await;
+    let server = Server::new().await;
+    let index = server.index("doggo");
+
+    let (_response, code) = server.set_features(json!({"multimodal": true})).await;
+    snapshot!(code, @"200 OK");
+
+    let (response, code) = index
+        .update_settings(json!({
+            "embedders": {
+                "rest": setting,
+            },
+        }))
+        .await;
+    snapshot!(code, @"202 Accepted");
+    server.wait_task(response.uid()).await.succeeded();
+
+    let documents = json!([
+        {"id": 0, "name": "kefir"},
+        {"id": 1, "name": "echo", "_vectors": { "rest": [1, 1, 1] }},
+        {"id": 2, "name": "intel"},
+        {"id": 3, "name": "iko" }
+    ]);
+    let (value, code) = index.add_documents(documents, None).await;
+    snapshot!(code, @"202 Accepted");
+    index.wait_task(value.uid()).await.succeeded();
+
+    let (value, _code) = index
+        .search_post(json!({
+            "filter": "_vectors.rest.documentTemplate EXISTS",
+            "attributesToRetrieve": ["name"]
+        }))
+        .await;
+    snapshot!(value, @r#"
+    {
+      "hits": [
+        {
+          "name": "kefir"
+        },
+        {
+          "name": "intel"
+        },
+        {
+          "name": "iko"
+        }
+      ],
+      "query": "",
+      "processingTimeMs": "[duration]",
+      "limit": 20,
+      "offset": 0,
+      "estimatedTotalHits": 3
+    }
+    "#);
+}
+
+#[actix_rt::test]
+async fn vector_filter_feature_gate() {
+    let index = shared_index_with_documents().await;
+
+    let (value, _code) = index
+        .search_post(json!({
+            "filter": "_vectors EXISTS",
+            "attributesToRetrieve": ["name"]
+        }))
+        .await;
+    snapshot!(value, @r#"
+    {
+      "message": "using a vector filter requires enabling the `multimodal` experimental feature. See https://github.com/orgs/meilisearch/discussions/846\n1:9 _vectors EXISTS",
+      "code": "feature_not_enabled",
+      "type": "invalid_request",
+      "link": "https://docs.meilisearch.com/errors#feature_not_enabled"
+    }
+    "#);
+}
+
+#[actix_rt::test]
+async fn vector_filter_negation() {
+    let index = shared_index_for_fragments().await;
+
+    let (value, _code) = index
+        .search_post(json!({
+            "filter": "_vectors.rest.userProvided NOT EXISTS",
+            "attributesToRetrieve": ["name"]
+        }))
+        .await;
+    snapshot!(value, @r#"
+    {
+      "hits": [
+        {
+          "name": "kefir"
+        },
+        {
+          "name": "intel"
+        },
+        {
+          "name": "dustin"
+        }
+      ],
+      "query": "",
+      "processingTimeMs": "[duration]",
+      "limit": 20,
+      "offset": 0,
+      "estimatedTotalHits": 3
+    }
+    "#);
+}
+
+#[actix_rt::test]
+async fn vector_filter_or_combination() {
+    let index = shared_index_for_fragments().await;
+
+    let (value, _code) = index
+        .search_post(json!({
+            "filter": "_vectors.rest.fragments.withBreed EXISTS OR _vectors.rest.userProvided EXISTS",
+            "attributesToRetrieve": ["name"]
+        }))
+        .await;
+    snapshot!(value, @r#"
+    {
+      "hits": [
+        {
+          "name": "echo"
+        },
+        {
+          "name": "intel"
+        },
+        {
+          "name": "dustin"
+        }
+      ],
+      "query": "",
+      "processingTimeMs": "[duration]",
+      "limit": 20,
+      "offset": 0,
+      "estimatedTotalHits": 3
+    }
+    "#);
+}
+
+#[actix_rt::test]
+async fn vector_filter_regenerate() {
+    let index = shared_index_for_fragments().await;
+
+    let (value, _code) = index
+        .search_post(json!({
+            "filter": format!("_vectors.rest.regenerate EXISTS"),
+            "attributesToRetrieve": ["name"]
+        }))
+        .await;
+    snapshot!(value, @r#"
+    {
+      "hits": [
+        {
+          "name": "kefir"
+        },
+        {
+          "name": "intel"
+        },
+        {
+          "name": "dustin"
+        }
+      ],
+      "query": "",
+      "processingTimeMs": "[duration]",
+      "limit": 20,
+      "offset": 0,
+      "estimatedTotalHits": 3
+    }
+    "#);
+
+    let (value, _code) = index
+        .search_post(json!({
+            "filter": format!("_vectors.rest.fragments.basic.regenerate EXISTS"),
+            "attributesToRetrieve": ["name"]
+        }))
+        .await;
+    snapshot!(value, @r#"
+    {
+      "message": "The vector filter has leftover tokens.\n30:48 _vectors.rest.fragments.basic.regenerate EXISTS",
+      "code": "invalid_search_filter",
+      "type": "invalid_request",
+      "link": "https://docs.meilisearch.com/errors#invalid_search_filter"
+    }
+    "#);
+}
--- a/crates/milli/src/filterable_attributes_rules.rs
+++ b/crates/milli/src/filterable_attributes_rules.rs
@ -111,7 +111,7 @@ impl FilterableAttributesFeatures {
        self.filter.is_filterable_null()
    }

-    /// Check if `IS EXISTS` is allowed
+    /// Check if `EXISTS` is allowed
    pub fn is_filterable_exists(&self) -> bool {
        self.filter.is_filterable_exists()
    }
--- a/crates/milli/src/search/facet/filter.rs
+++ b/crates/milli/src/search/facet/filter.rs
@ -11,7 +11,7 @@ use roaring::{MultiOps, RoaringBitmap};
 use serde_json::Value;

 use super::facet_range_search;
-use crate::constants::RESERVED_GEO_FIELD_NAME;
+use crate::constants::{RESERVED_GEO_FIELD_NAME, RESERVED_VECTORS_FIELD_NAME};
 use crate::error::{Error, UserError};
 use crate::filterable_attributes_rules::{filtered_matching_patterns, matching_features};
 use crate::heed_codec::facet::{
@ -228,6 +228,10 @@ impl<'a> Filter<'a> {
    pub fn use_contains_operator(&self) -> Option<&Token> {
        self.condition.use_contains_operator()
    }
+
+    pub fn use_vector_filter(&self) -> Option<&Token> {
+        self.condition.use_vector_filter()
+    }
 }

 impl<'a> Filter<'a> {
@ -235,10 +239,12 @@ impl<'a> Filter<'a> {
        // to avoid doing this for each recursive call we're going to do it ONCE ahead of time
        let fields_ids_map = index.fields_ids_map(rtxn)?;
        let filterable_attributes_rules = index.filterable_attributes_rules(rtxn)?;
+
        for fid in self.condition.fids(MAX_FILTER_DEPTH) {
            let attribute = fid.value();
            if matching_features(attribute, &filterable_attributes_rules)
                .is_some_and(|(_, features)| features.is_filterable())
+                || attribute == RESERVED_VECTORS_FIELD_NAME
            {
                continue;
            }
@ -542,7 +548,8 @@ impl<'a> Filter<'a> {
                    .union()
            }
            FilterCondition::Condition { fid, op } => {
-                let Some(field_id) = field_ids_map.id(fid.value()) else {
+                let value = fid.value();
+                let Some(field_id) = field_ids_map.id(value) else {
                    return Ok(RoaringBitmap::new());
                };
                let Some((rule_index, features)) =
@ -599,6 +606,9 @@ impl<'a> Filter<'a> {
                    Ok(RoaringBitmap::new())
                }
            }
+            FilterCondition::VectorExists { fid: _, embedder, filter } => {
+                super::filter_vector::evaluate(rtxn, index, universe, embedder.clone(), filter)
+            }
            FilterCondition::GeoLowerThan { point, radius } => {
                if index.is_geo_filtering_enabled(rtxn)? {
                    let base_point: [f64; 2] =
--- a/crates/milli/src/search/facet/filter_vector.rs
+++ b/crates/milli/src/search/facet/filter_vector.rs
@ -0,0 +1,155 @@
+use filter_parser::{Token, VectorFilter};
+use roaring::{MultiOps, RoaringBitmap};
+
+use crate::error::Error;
+use crate::vector::db::IndexEmbeddingConfig;
+use crate::vector::{ArroyStats, ArroyWrapper};
+use crate::Index;
+
+#[derive(Debug, thiserror::Error)]
+pub enum VectorFilterError<'a> {
+    #[error("The embedder `{}` does not exist. {}", embedder.value(), {
+        if available.is_empty() {
+            String::from("This index does not have any configured embedders.")
+        } else {
+            let mut available = available.clone();
+            available.sort_unstable();
+            format!("Available embedders are: {}.", available.iter().map(|e| format!("`{e}`")).collect::<Vec<_>>().join(", "))
+        }
+    })]
+    EmbedderDoesNotExist { embedder: &'a Token<'a>, available: Vec<String> },
+
+    #[error("The fragment `{}` does not exist on embedder `{}`. {}", fragment.value(), embedder.value(), {
+        if available.is_empty() {
+            String::from("This embedder does not have any configured fragments.")
+        } else {
+            let mut available = available.clone();
+            available.sort_unstable();
+            format!("Available fragments on this embedder are: {}.", available.iter().map(|f| format!("`{f}`")).collect::<Vec<_>>().join(", "))
+        }
+    })]
+    FragmentDoesNotExist {
+        embedder: &'a Token<'a>,
+        fragment: &'a Token<'a>,
+        available: Vec<String>,
+    },
+}
+
+use VectorFilterError::*;
+
+impl<'a> From<VectorFilterError<'a>> for Error {
+    fn from(err: VectorFilterError<'a>) -> Self {
+        match &err {
+            EmbedderDoesNotExist { embedder: token, .. }
+            | FragmentDoesNotExist { fragment: token, .. } => token.as_external_error(err).into(),
+        }
+    }
+}
+
+pub(super) fn evaluate(
+    rtxn: &heed::RoTxn<'_>,
+    index: &Index,
+    universe: Option<&RoaringBitmap>,
+    embedder: Option<Token<'_>>,
+    filter: &VectorFilter<'_>,
+) -> crate::Result<RoaringBitmap> {
+    let index_embedding_configs = index.embedding_configs();
+    let embedding_configs = index_embedding_configs.embedding_configs(rtxn)?;
+
+    let embedders = match embedder {
+        Some(embedder) => vec![embedder],
+        None => embedding_configs.iter().map(|config| Token::from(config.name.as_str())).collect(),
+    };
+
+    let mut docids = embedders
+        .iter()
+        .map(|e| evaluate_inner(rtxn, index, e, &embedding_configs, filter))
+        .union()?;
+
+    if let Some(universe) = universe {
+        docids &= universe;
+    }
+
+    Ok(docids)
+}
+
+fn evaluate_inner(
+    rtxn: &heed::RoTxn<'_>,
+    index: &Index,
+    embedder: &Token<'_>,
+    embedding_configs: &[IndexEmbeddingConfig],
+    filter: &VectorFilter<'_>,
+) -> crate::Result<RoaringBitmap> {
+    let embedder_name = embedder.value();
+    let available_embedders =
+        || embedding_configs.iter().map(|c| c.name.clone()).collect::<Vec<_>>();
+
+    let embedding_config = embedding_configs
+        .iter()
+        .find(|config| config.name == embedder_name)
+        .ok_or_else(|| EmbedderDoesNotExist { embedder, available: available_embedders() })?;
+
+    let embedder_info = index
+        .embedding_configs()
+        .embedder_info(rtxn, embedder_name)?
+        .ok_or_else(|| EmbedderDoesNotExist { embedder, available: available_embedders() })?;
+
+    let arroy_wrapper = ArroyWrapper::new(
+        index.vector_arroy,
+        embedder_info.embedder_id,
+        embedding_config.config.quantized(),
+    );
+
+    let docids = match filter {
+        VectorFilter::Fragment(fragment) => {
+            let fragment_name = fragment.value();
+            let fragment_config = embedding_config
+                .fragments
+                .as_slice()
+                .iter()
+                .find(|fragment| fragment.name == fragment_name)
+                .ok_or_else(|| FragmentDoesNotExist {
+                    embedder,
+                    fragment,
+                    available: embedding_config
+                        .fragments
+                        .as_slice()
+                        .iter()
+                        .map(|f| f.name.clone())
+                        .collect(),
+                })?;
+
+            let user_provided_docids = embedder_info.embedding_status.user_provided_docids();
+            arroy_wrapper.items_in_store(rtxn, fragment_config.id, |bitmap| {
+                bitmap.clone() - user_provided_docids
+            })?
+        }
+        VectorFilter::DocumentTemplate => {
+            if !embedding_config.fragments.as_slice().is_empty() {
+                return Ok(RoaringBitmap::new());
+            }
+
+            let user_provided_docids = embedder_info.embedding_status.user_provided_docids();
+            let mut stats = ArroyStats::default();
+            arroy_wrapper.aggregate_stats(rtxn, &mut stats)?;
+            stats.documents - user_provided_docids.clone()
+        }
+        VectorFilter::UserProvided => {
+            let user_provided_docids = embedder_info.embedding_status.user_provided_docids();
+            user_provided_docids.clone()
+        }
+        VectorFilter::Regenerate => {
+            let mut stats = ArroyStats::default();
+            arroy_wrapper.aggregate_stats(rtxn, &mut stats)?;
+            let skip_regenerate = embedder_info.embedding_status.skip_regenerate_docids();
+            stats.documents - skip_regenerate
+        }
+        VectorFilter::None => {
+            let mut stats = ArroyStats::default();
+            arroy_wrapper.aggregate_stats(rtxn, &mut stats)?;
+            stats.documents
+        }
+    };
+
+    Ok(docids)
+}
--- a/crates/milli/src/search/facet/mod.rs
+++ b/crates/milli/src/search/facet/mod.rs
@ -17,6 +17,7 @@ mod facet_range_search;
 mod facet_sort_ascending;
 mod facet_sort_descending;
 mod filter;
+mod filter_vector;
 mod search;

 fn facet_extreme_value<'t>(
--- a/crates/milli/src/test_index.rs
+++ b/crates/milli/src/test_index.rs
@ -1338,10 +1338,9 @@ fn vectors_are_never_indexed_as_searchable_or_filterable() {
    assert!(results.candidates.is_empty());

    let mut search = index.search(&rtxn);
-    let results = search
-        .filter(Filter::from_str("_vectors.doggo = 6789").unwrap().unwrap())
-        .execute()
-        .unwrap();
+    let results =
+        dbg!(search.filter(Filter::from_str("_vectors.doggo = 6789").unwrap().unwrap()).execute())
+            .unwrap();
    assert!(results.candidates.is_empty());

    index
--- a/crates/milli/src/vector/db.rs
+++ b/crates/milli/src/vector/db.rs
@ -128,6 +128,7 @@ impl EmbeddingStatus {
    pub fn is_user_provided(&self, docid: DocumentId) -> bool {
        self.user_provided.contains(docid)
    }
+
    /// Whether vectors should be regenerated for that document and that embedder.
    pub fn must_regenerate(&self, docid: DocumentId) -> bool {
        let invert = self.skip_regenerate_different_from_user_provided.contains(docid);
--- a/crates/milli/src/vector/mod.rs
+++ b/crates/milli/src/vector/mod.rs
@ -556,9 +556,6 @@ impl ArroyWrapper {
            for reader in self.readers(rtxn, self.quantized_db()) {
                let reader = reader?;
                let documents = reader.item_ids();
-                if documents.is_empty() {
-                    break;
-                }
                stats.documents |= documents;
                stats.number_of_embeddings += documents.len();
            }
@ -566,9 +563,6 @@ impl ArroyWrapper {
            for reader in self.readers(rtxn, self.angular_db()) {
                let reader = reader?;
                let documents = reader.item_ids();
-                if documents.is_empty() {
-                    break;
-                }
                stats.documents |= documents;
                stats.number_of_embeddings += documents.len();
            }