WIP: Introducing papaya

WIP: Ooops, I did some unsafe
WIP: I need multiple rtxns and will probably use a bit of unsafe
2025-11-24 05:26:57 +00:00 · 2025-08-19 12:10:03 +02:00 · 2025-08-19 11:49:30 +02:00 · 2025-08-14 17:34:52 +02:00 · 2025-08-14 15:16:26 +02:00 · 2025-08-13 16:39:02 +00:00
36 changed files with 2016 additions and 171 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -736,6 +736,12 @@ dependencies = [
 "syn 2.0.101",
 ]

+[[package]]
+name = "boxcar"
+version = "0.2.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "36f64beae40a84da1b4b26ff2761a5b895c12adc41dc25aaee1c4f2bbfe97a6e"
+
 [[package]]
 name = "brotli"
 version = "8.0.1"
@@ -2031,6 +2037,7 @@ name = "filter-parser"
 version = "1.17.1"
 dependencies = [
 "insta",
+ "levenshtein_automata",
 "nom",
 "nom_locate",
 "unescaper",
@@ -3927,6 +3934,7 @@ dependencies = [
 "big_s",
 "bimap",
 "bincode",
+ "boxcar",
 "bstr",
 "bumpalo",
 "bumparaw-collections",
@@ -3969,6 +3977,7 @@ dependencies = [
 "obkv",
 "once_cell",
 "ordered-float 5.0.0",
+ "papaya",
 "rand 0.8.5",
 "rayon",
 "rhai",
@@ -4410,6 +4419,16 @@ dependencies = [
 "winapi",
 ]

+[[package]]
+name = "papaya"
+version = "0.2.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f92dd0b07c53a0a0c764db2ace8c541dc47320dad97c2200c2a637ab9dd2328f"
+dependencies = [
+ "equivalent",
+ "seize",
+]
+
 [[package]]
 name = "parking_lot"
 version = "0.12.4"
@@ -5461,6 +5480,16 @@ dependencies = [
 "time",
 ]

+[[package]]
+name = "seize"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e4b8d813387d566f627f3ea1b914c068aac94c40ae27ec43f5f33bde65abefe7"
+dependencies = [
+ "libc",
+ "windows-sys 0.52.0",
+]
+
 [[package]]
 name = "semver"
 version = "1.0.26"
--- a/crates/filter-parser/Cargo.toml
+++ b/crates/filter-parser/Cargo.toml
@@ -15,6 +15,7 @@ license.workspace = true
 nom = "7.1.3"
 nom_locate = "4.2.0"
 unescaper = "0.1.6"
+levenshtein_automata = { version = "0.2.1", features = ["fst_automaton"] }

 [dev-dependencies]
 # fixed version due to format breakages in v1.40
--- a/crates/filter-parser/src/condition.rs
+++ b/crates/filter-parser/src/condition.rs
@@ -7,11 +7,22 @@

 use nom::branch::alt;
 use nom::bytes::complete::tag;
+use nom::character::complete::char;
+use nom::character::complete::multispace0;
 use nom::character::complete::multispace1;
 use nom::combinator::cut;
+use nom::combinator::map;
+use nom::combinator::value;
+use nom::sequence::preceded;
 use nom::sequence::{terminated, tuple};
 use Condition::*;

+use crate::error::IResultExt;
+use crate::value::parse_vector_value;
+use crate::value::parse_vector_value_cut;
+use crate::Error;
+use crate::ErrorKind;
+use crate::VectorFilter;
 use crate::{parse_value, FilterCondition, IResult, Span, Token};

 #[derive(Debug, Clone, PartialEq, Eq)]
@@ -113,6 +124,83 @@ pub fn parse_not_exists(input: Span) -> IResult<FilterCondition> {
    Ok((input, FilterCondition::Not(Box::new(FilterCondition::Condition { fid: key, op: Exists }))))
 }

+fn parse_vectors(input: Span) -> IResult<(Token, Option<Token>, VectorFilter<'_>)> {
+    let (input, _) = multispace0(input)?;
+    let (input, fid) = tag("_vectors")(input)?;
+
+    if let Ok((input, _)) = multispace1::<_, crate::Error>(input) {
+        return Ok((input, (Token::from(fid), None, VectorFilter::None)));
+    }
+
+    let (input, _) = char('.')(input)?;
+
+    // From this point, we are certain this is a vector filter, so our errors must be final.
+    // We could use nom's `cut` but it's better to be explicit about the errors
+
+    if let Ok((_, space)) = tag::<_, _, ()>(" ")(input) {
+        return Err(crate::Error::failure_from_kind(space, ErrorKind::VectorFilterMissingEmbedder));
+    }
+
+    let (input, embedder_name) =
+        parse_vector_value_cut(input, ErrorKind::VectorFilterInvalidEmbedder)?;
+
+    let (input, filter) = alt((
+        map(
+            preceded(tag(".fragments"), |input| {
+                let (input, _) = tag(".")(input).map_cut(ErrorKind::VectorFilterMissingFragment)?;
+                parse_vector_value_cut(input, ErrorKind::VectorFilterInvalidFragment)
+            }),
+            VectorFilter::Fragment,
+        ),
+        value(VectorFilter::UserProvided, tag(".userProvided")),
+        value(VectorFilter::DocumentTemplate, tag(".documentTemplate")),
+        value(VectorFilter::Regenerate, tag(".regenerate")),
+        value(VectorFilter::None, nom::combinator::success("")),
+    ))(input)?;
+
+    if let Ok((input, point)) = tag::<_, _, ()>(".")(input) {
+        let opt_value = parse_vector_value(input).ok().map(|(_, v)| v);
+        let value =
+            opt_value.as_ref().map(|v| v.value().to_owned()).unwrap_or_else(|| point.to_string());
+        let context = opt_value.map(|v| v.original_span()).unwrap_or(point);
+        let previous_kind = match filter {
+            VectorFilter::Fragment(_) => Some("fragments"),
+            VectorFilter::DocumentTemplate => Some("documentTemplate"),
+            VectorFilter::UserProvided => Some("userProvided"),
+            VectorFilter::Regenerate => Some("regenerate"),
+            VectorFilter::None => None,
+        };
+        return Err(Error::failure_from_kind(
+            context,
+            ErrorKind::VectorFilterUnknownSuffix(previous_kind, value),
+        ));
+    }
+
+    let (input, _) = multispace1(input).map_cut(ErrorKind::VectorFilterLeftover)?;
+
+    Ok((input, (Token::from(fid), Some(embedder_name), filter)))
+}
+
+/// vectors_exists          = vectors ("EXISTS" | ("NOT" WS+ "EXISTS"))
+pub fn parse_vectors_exists(input: Span) -> IResult<FilterCondition> {
+    let (input, (fid, embedder, filter)) = parse_vectors(input)?;
+
+    // Try parsing "EXISTS" first
+    if let Ok((input, _)) = tag::<_, _, ()>("EXISTS")(input) {
+        return Ok((input, FilterCondition::VectorExists { fid, embedder, filter }));
+    }
+
+    // Try parsing "NOT EXISTS"
+    if let Ok((input, _)) = tuple::<_, _, (), _>((tag("NOT"), multispace1, tag("EXISTS")))(input) {
+        return Ok((
+            input,
+            FilterCondition::Not(Box::new(FilterCondition::VectorExists { fid, embedder, filter })),
+        ));
+    }
+
+    Err(crate::Error::failure_from_kind(input, ErrorKind::VectorFilterOperation))
+}
+
 /// contains        = value "CONTAINS" value
 pub fn parse_contains(input: Span) -> IResult<FilterCondition> {
    let (input, (fid, contains, value)) =
--- a/crates/filter-parser/src/error.rs
+++ b/crates/filter-parser/src/error.rs
@@ -42,6 +42,23 @@ pub fn cut_with_err<'a, O>(
    }
 }

+pub trait IResultExt<'a> {
+    fn map_cut(self, kind: ErrorKind<'a>) -> Self;
+}
+
+impl<'a, T> IResultExt<'a> for IResult<'a, T> {
+    fn map_cut(self, kind: ErrorKind<'a>) -> Self {
+        self.map_err(move |e: nom::Err<Error<'a>>| {
+            let input = match e {
+                nom::Err::Incomplete(_) => return e,
+                nom::Err::Error(e) => *e.context(),
+                nom::Err::Failure(e) => *e.context(),
+            };
+            Error::failure_from_kind(input, kind)
+        })
+    }
+}
+
 #[derive(Debug)]
 pub struct Error<'a> {
    context: Span<'a>,
@@ -61,6 +78,14 @@ pub enum ErrorKind<'a> {
    GeoBoundingBox,
    MisusedGeoRadius,
    MisusedGeoBoundingBox,
+    VectorFilterLeftover,
+    VectorFilterInvalidQuotes,
+    VectorFilterMissingEmbedder,
+    VectorFilterInvalidEmbedder,
+    VectorFilterMissingFragment,
+    VectorFilterInvalidFragment,
+    VectorFilterUnknownSuffix(Option<&'static str>, String),
+    VectorFilterOperation,
    InvalidPrimary,
    InvalidEscapedNumber,
    ExpectedEof,
@@ -91,6 +116,10 @@ impl<'a> Error<'a> {
        Self { context, kind }
    }

+    pub fn failure_from_kind(context: Span<'a>, kind: ErrorKind<'a>) -> nom::Err<Self> {
+        nom::Err::Failure(Self::new_from_kind(context, kind))
+    }
+
    pub fn new_from_external(context: Span<'a>, error: impl std::error::Error) -> Self {
        Self::new_from_kind(context, ErrorKind::External(error.to_string()))
    }
@@ -128,6 +157,20 @@ impl Display for Error<'_> {
        // first line being the diagnostic and the second line being the incriminated filter.
        let escaped_input = input.escape_debug();

+        fn key_suggestion<'a>(key: &str, keys: &[&'a str]) -> Option<&'a str> {
+            let typos =
+                levenshtein_automata::LevenshteinAutomatonBuilder::new(2, true).build_dfa(key);
+            for key in keys.iter() {
+                match typos.eval(key) {
+                    levenshtein_automata::Distance::Exact(_) => {
+                        return Some(key);
+                    }
+                    levenshtein_automata::Distance::AtLeast(_) => continue,
+                }
+            }
+            None
+        }
+
        match &self.kind {
            ErrorKind::ExpectedValue(_) if input.trim().is_empty() => {
                writeln!(f, "Was expecting a value but instead got nothing.")?
@@ -169,6 +212,44 @@ impl Display for Error<'_> {
            ErrorKind::MisusedGeoBoundingBox => {
                writeln!(f, "The `_geoBoundingBox` filter is an operation and can't be used as a value.")?
            }
+            ErrorKind::VectorFilterLeftover => {
+                writeln!(f, "The vector filter has leftover tokens.")?
+            }
+            ErrorKind::VectorFilterUnknownSuffix(_, value) if value.as_str() == "." => {
+                writeln!(f, "Was expecting one of `.fragments`, `.userProvided`, `.documentTemplate`, `.regenerate` or nothing, but instead found a point without a valid value.")?;
+            }
+            ErrorKind::VectorFilterUnknownSuffix(None, value) if ["fragments", "userProvided", "documentTemplate", "regenerate"].contains(&value.as_str()) => {
+                // This will happen with "_vectors.rest.\"userProvided\"" for instance
+                writeln!(f, "Was expecting this part to be unquoted.")?
+            }
+            ErrorKind::VectorFilterUnknownSuffix(None, value) => {
+                if let Some(suggestion) = key_suggestion(value, &["fragments", "userProvided", "documentTemplate", "regenerate"]) {
+                    writeln!(f, "Was expecting one of `fragments`, `userProvided`, `documentTemplate`, `regenerate` or nothing, but instead found `{value}`. Did you mean `{suggestion}`?")?;
+                } else {
+                    writeln!(f, "Was expecting one of `fragments`, `userProvided`, `documentTemplate`, `regenerate` or nothing, but instead found `{value}`.")?;
+                }
+            }
+            ErrorKind::VectorFilterUnknownSuffix(Some(previous_filter_kind), value) => {
+                writeln!(f, "Vector filter can only accept one of `fragments`, `userProvided`, `documentTemplate` or `regenerate`, but found both `{previous_filter_kind}` and `{value}`.")?
+            },
+            ErrorKind::VectorFilterInvalidFragment => {
+                writeln!(f, "The vector filter's fragment name is invalid.")?
+            }
+            ErrorKind::VectorFilterMissingFragment => {
+                writeln!(f, "The vector filter is missing a fragment name.")?
+            }
+            ErrorKind::VectorFilterMissingEmbedder => {
+                writeln!(f, "Was expecting embedder name but found nothing.")?
+            }
+            ErrorKind::VectorFilterInvalidEmbedder => {
+                writeln!(f, "The vector filter's embedder name is invalid.")?
+            }
+            ErrorKind::VectorFilterOperation => {
+                writeln!(f, "Was expecting an operation like `EXISTS` or `NOT EXISTS` after the vector filter.")?
+            }
+            ErrorKind::VectorFilterInvalidQuotes => {
+                writeln!(f, "The quotes in one of the values are inconsistent.")?
+            }
            ErrorKind::ReservedKeyword(word) => {
                writeln!(f, "`{word}` is a reserved keyword and thus cannot be used as a field name unless it is put inside quotes. Use \"{word}\" or \'{word}\' instead.")?
            }
--- a/crates/filter-parser/src/lib.rs
+++ b/crates/filter-parser/src/lib.rs
@@ -65,6 +65,9 @@ use nom_locate::LocatedSpan;
 pub(crate) use value::parse_value;
 use value::word_exact;

+use crate::condition::parse_vectors_exists;
+use crate::error::IResultExt;
+
 pub type Span<'a> = LocatedSpan<&'a str, &'a str>;

 type IResult<'a, Ret> = nom::IResult<Span<'a>, Ret, Error<'a>>;
@@ -136,6 +139,15 @@ impl<'a> From<&'a str> for Token<'a> {
    }
 }

+#[derive(Debug, Clone, PartialEq, Eq)]
+pub enum VectorFilter<'a> {
+    Fragment(Token<'a>),
+    DocumentTemplate,
+    UserProvided,
+    Regenerate,
+    None,
+}
+
 #[derive(Debug, Clone, PartialEq, Eq)]
 pub enum FilterCondition<'a> {
    Not(Box<Self>),
@@ -143,6 +155,7 @@ pub enum FilterCondition<'a> {
    In { fid: Token<'a>, els: Vec<Token<'a>> },
    Or(Vec<Self>),
    And(Vec<Self>),
+    VectorExists { fid: Token<'a>, embedder: Option<Token<'a>>, filter: VectorFilter<'a> },
    GeoLowerThan { point: [Token<'a>; 2], radius: Token<'a> },
    GeoBoundingBox { top_right_point: [Token<'a>; 2], bottom_left_point: [Token<'a>; 2] },
 }
@@ -173,9 +186,24 @@ impl<'a> FilterCondition<'a> {
            FilterCondition::Or(seq) | FilterCondition::And(seq) => {
                seq.iter().find_map(|filter| filter.use_contains_operator())
            }
+            FilterCondition::VectorExists { .. }
+            | FilterCondition::GeoLowerThan { .. }
+            | FilterCondition::GeoBoundingBox { .. }
+            | FilterCondition::In { .. } => None,
+        }
+    }
+
+    pub fn use_vector_filter(&self) -> Option<&Token> {
+        match self {
+            FilterCondition::Condition { .. } => None,
+            FilterCondition::Not(this) => this.use_vector_filter(),
+            FilterCondition::Or(seq) | FilterCondition::And(seq) => {
+                seq.iter().find_map(|filter| filter.use_vector_filter())
+            }
            FilterCondition::GeoLowerThan { .. }
            | FilterCondition::GeoBoundingBox { .. }
            | FilterCondition::In { .. } => None,
+            FilterCondition::VectorExists { fid, .. } => Some(fid),
        }
    }

@@ -263,10 +291,7 @@ fn parse_in_body(input: Span) -> IResult<Vec<Token>> {
    let (input, _) = ws(word_exact("IN"))(input)?;

    // everything after `IN` can be a failure
-    let (input, _) =
-        cut_with_err(tag("["), |_| Error::new_from_kind(input, ErrorKind::InOpeningBracket))(
-            input,
-        )?;
+    let (input, _) = tag("[")(input).map_cut(ErrorKind::InOpeningBracket)?;

    let (input, content) = cut(parse_value_list)(input)?;

@@ -412,7 +437,7 @@ fn parse_geo_bounding_box(input: Span) -> IResult<FilterCondition> {
    let (input, args) = parsed?;

    if args.len() != 2 || args[0].len() != 2 || args[1].len() != 2 {
-        return Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::GeoBoundingBox)));
+        return Err(Error::failure_from_kind(input, ErrorKind::GeoBoundingBox));
    }

    let res = FilterCondition::GeoBoundingBox {
@@ -433,7 +458,7 @@ fn parse_geo_point(input: Span) -> IResult<FilterCondition> {
    ))(input)
    .map_err(|e| e.map(|_| Error::new_from_kind(input, ErrorKind::ReservedGeo("_geoPoint"))))?;
    // if we succeeded we still return a `Failure` because geoPoints are not allowed
-    Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::ReservedGeo("_geoPoint"))))
+    Err(Error::failure_from_kind(input, ErrorKind::ReservedGeo("_geoPoint")))
 }

 /// geoPoint      = WS* "_geoDistance(float WS* "," WS* float WS* "," WS* float)
@@ -447,7 +472,7 @@ fn parse_geo_distance(input: Span) -> IResult<FilterCondition> {
    ))(input)
    .map_err(|e| e.map(|_| Error::new_from_kind(input, ErrorKind::ReservedGeo("_geoDistance"))))?;
    // if we succeeded we still return a `Failure` because `geoDistance` filters are not allowed
-    Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::ReservedGeo("_geoDistance"))))
+    Err(Error::failure_from_kind(input, ErrorKind::ReservedGeo("_geoDistance")))
 }

 /// geo      = WS* "_geo(float WS* "," WS* float WS* "," WS* float)
@@ -461,7 +486,7 @@ fn parse_geo(input: Span) -> IResult<FilterCondition> {
    ))(input)
    .map_err(|e| e.map(|_| Error::new_from_kind(input, ErrorKind::ReservedGeo("_geo"))))?;
    // if we succeeded we still return a `Failure` because `_geo` filter is not allowed
-    Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::ReservedGeo("_geo"))))
+    Err(Error::failure_from_kind(input, ErrorKind::ReservedGeo("_geo")))
 }

 fn parse_error_reserved_keyword(input: Span) -> IResult<FilterCondition> {
@@ -500,8 +525,7 @@ fn parse_primary(input: Span, depth: usize) -> IResult<FilterCondition> {
        parse_is_not_null,
        parse_is_empty,
        parse_is_not_empty,
-        parse_exists,
-        parse_not_exists,
+        alt((parse_vectors_exists, parse_exists, parse_not_exists)),
        parse_to,
        parse_contains,
        parse_not_contains,
@@ -557,6 +581,22 @@ impl std::fmt::Display for FilterCondition<'_> {
                }
                write!(f, "]")
            }
+            FilterCondition::VectorExists { fid: _, embedder, filter: inner } => {
+                write!(f, "_vectors")?;
+                if let Some(embedder) = embedder {
+                    write!(f, ".{:?}", embedder.value())?;
+                }
+                match inner {
+                    VectorFilter::Fragment(fragment) => {
+                        write!(f, ".fragments.{:?}", fragment.value())?
+                    }
+                    VectorFilter::DocumentTemplate => write!(f, ".documentTemplate")?,
+                    VectorFilter::UserProvided => write!(f, ".userProvided")?,
+                    VectorFilter::Regenerate => write!(f, ".regenerate")?,
+                    VectorFilter::None => (),
+                }
+                write!(f, " EXISTS")
+            }
            FilterCondition::GeoLowerThan { point, radius } => {
                write!(f, "_geoRadius({}, {}, {})", point[0], point[1], radius)
            }
@@ -630,6 +670,9 @@ pub mod tests {
        insta::assert_snapshot!(p(r"title = 'foo\\\\\\\\'"), @r#"{title} = {foo\\\\}"#);
        // but it also works with other sequences
        insta::assert_snapshot!(p(r#"title = 'foo\x20\n\t\"\'"'"#), @"{title} = {foo \n\t\"\'\"}");
+
+        insta::assert_snapshot!(p(r#"_vectors." valid.name  ".fragments."also.. valid! " EXISTS"#), @r#"_vectors." valid.name  ".fragments."also.. valid! " EXISTS"#);
+        insta::assert_snapshot!(p("_vectors.\"\n\t\r\\\"\" EXISTS"), @r#"_vectors."\n\t\r\"" EXISTS"#);
    }

    #[test]
@@ -692,6 +735,18 @@ pub mod tests {
        insta::assert_snapshot!(p("NOT subscribers IS NOT EMPTY"), @"{subscribers} IS EMPTY");
        insta::assert_snapshot!(p("subscribers  IS   NOT   EMPTY"), @"NOT ({subscribers} IS EMPTY)");

+        // Test _vectors EXISTS + _vectors NOT EXITS
+        insta::assert_snapshot!(p("_vectors EXISTS"), @"_vectors EXISTS");
+        insta::assert_snapshot!(p("_vectors.embedderName EXISTS"), @r#"_vectors."embedderName" EXISTS"#);
+        insta::assert_snapshot!(p("_vectors.embedderName.documentTemplate EXISTS"), @r#"_vectors."embedderName".documentTemplate EXISTS"#);
+        insta::assert_snapshot!(p("_vectors.embedderName.regenerate EXISTS"), @r#"_vectors."embedderName".regenerate EXISTS"#);
+        insta::assert_snapshot!(p("_vectors.embedderName.regenerate EXISTS"), @r#"_vectors."embedderName".regenerate EXISTS"#);
+        insta::assert_snapshot!(p("_vectors.embedderName.fragments.fragmentName EXISTS"), @r#"_vectors."embedderName".fragments."fragmentName" EXISTS"#);
+        insta::assert_snapshot!(p("  _vectors.embedderName.fragments.fragmentName   EXISTS"), @r#"_vectors."embedderName".fragments."fragmentName" EXISTS"#);
+        insta::assert_snapshot!(p("NOT _vectors EXISTS"), @"NOT (_vectors EXISTS)");
+        insta::assert_snapshot!(p(" NOT  _vectors   EXISTS"), @"NOT (_vectors EXISTS)");
+        insta::assert_snapshot!(p("  _vectors  NOT  EXISTS"), @"NOT (_vectors EXISTS)");
+
        // Test EXISTS + NOT EXITS
        insta::assert_snapshot!(p("subscribers EXISTS"), @"{subscribers} EXISTS");
        insta::assert_snapshot!(p("NOT subscribers EXISTS"), @"NOT ({subscribers} EXISTS)");
@@ -946,6 +1001,71 @@ pub mod tests {
        "###
        );

+        insta::assert_snapshot!(p(r#"_vectors _vectors EXISTS"#), @r"
+        Was expecting an operation like `EXISTS` or `NOT EXISTS` after the vector filter.
+        10:25 _vectors _vectors EXISTS
+        ");
+        insta::assert_snapshot!(p(r#"_vectors. embedderName EXISTS"#), @r"
+        Was expecting embedder name but found nothing.
+        10:11 _vectors. embedderName EXISTS
+        ");
+        insta::assert_snapshot!(p(r#"_vectors .embedderName EXISTS"#), @r"
+        Was expecting an operation like `EXISTS` or `NOT EXISTS` after the vector filter.
+        10:30 _vectors .embedderName EXISTS
+        ");
+        insta::assert_snapshot!(p(r#"_vectors.embedderName. EXISTS"#), @r"
+        Was expecting one of `.fragments`, `.userProvided`, `.documentTemplate`, `.regenerate` or nothing, but instead found a point without a valid value.
+        22:23 _vectors.embedderName. EXISTS
+        ");
+        insta::assert_snapshot!(p(r#"_vectors."embedderName EXISTS"#), @r#"
+        The quotes in one of the values are inconsistent.
+        10:30 _vectors."embedderName EXISTS
+        "#);
+        insta::assert_snapshot!(p(r#"_vectors."embedderNam"e EXISTS"#), @r#"
+        The vector filter has leftover tokens.
+        23:31 _vectors."embedderNam"e EXISTS
+        "#);
+        insta::assert_snapshot!(p(r#"_vectors.embedderName.documentTemplate. EXISTS"#), @r"
+        Was expecting one of `.fragments`, `.userProvided`, `.documentTemplate`, `.regenerate` or nothing, but instead found a point without a valid value.
+        39:40 _vectors.embedderName.documentTemplate. EXISTS
+        ");
+        insta::assert_snapshot!(p(r#"_vectors.embedderName.fragments EXISTS"#), @r"
+        The vector filter is missing a fragment name.
+        32:39 _vectors.embedderName.fragments EXISTS
+        ");
+        insta::assert_snapshot!(p(r#"_vectors.embedderName.fragments. EXISTS"#), @r"
+        The vector filter's fragment name is invalid.
+        33:40 _vectors.embedderName.fragments. EXISTS
+        ");
+        insta::assert_snapshot!(p(r#"_vectors.embedderName.fragments.test test EXISTS"#), @r"
+        Was expecting an operation like `EXISTS` or `NOT EXISTS` after the vector filter.
+        38:49 _vectors.embedderName.fragments.test test EXISTS
+        ");
+        insta::assert_snapshot!(p(r#"_vectors.embedderName.fragments. test EXISTS"#), @r"
+        The vector filter's fragment name is invalid.
+        33:45 _vectors.embedderName.fragments. test EXISTS
+        ");
+        insta::assert_snapshot!(p(r#"_vectors.embedderName .fragments. test EXISTS"#), @r"
+        Was expecting an operation like `EXISTS` or `NOT EXISTS` after the vector filter.
+        23:46 _vectors.embedderName .fragments. test EXISTS
+        ");
+        insta::assert_snapshot!(p(r#"_vectors.embedderName .fragments.test EXISTS"#), @r"
+        Was expecting an operation like `EXISTS` or `NOT EXISTS` after the vector filter.
+        23:45 _vectors.embedderName .fragments.test EXISTS
+        ");
+        insta::assert_snapshot!(p(r#"_vectors.embedderName.fargments.test EXISTS"#), @r"
+        Was expecting one of `fragments`, `userProvided`, `documentTemplate`, `regenerate` or nothing, but instead found `fargments`. Did you mean `fragments`?
+        23:32 _vectors.embedderName.fargments.test EXISTS
+        ");
+        insta::assert_snapshot!(p(r#"_vectors.embedderName."userProvided" EXISTS"#), @r#"
+        Was expecting this part to be unquoted.
+        24:36 _vectors.embedderName."userProvided" EXISTS
+        "#);
+        insta::assert_snapshot!(p(r#"_vectors.embedderName.userProvided.fragments.test EXISTS"#), @r"
+        Vector filter can only accept one of `fragments`, `userProvided`, `documentTemplate` or `regenerate`, but found both `userProvided` and `fragments`.
+        36:45 _vectors.embedderName.userProvided.fragments.test EXISTS
+        ");
+
        insta::assert_snapshot!(p(r#"NOT OR EXISTS AND EXISTS NOT EXISTS"#), @r###"
        Was expecting a value but instead got `OR`, which is a reserved keyword. To use `OR` as a field name or a value, surround it by quotes.
        5:7 NOT OR EXISTS AND EXISTS NOT EXISTS
--- a/crates/filter-parser/src/value.rs
+++ b/crates/filter-parser/src/value.rs
@@ -80,6 +80,51 @@ pub fn word_exact<'a, 'b: 'a>(tag: &'b str) -> impl Fn(Span<'a>) -> IResult<'a,
    }
 }

+/// vector_value          = ( non_dot_word | singleQuoted | doubleQuoted)
+pub fn parse_vector_value(input: Span) -> IResult<Token> {
+    pub fn non_dot_word(input: Span) -> IResult<Token> {
+        let (input, word) = take_while1(|c| is_value_component(c) && c != '.')(input)?;
+        Ok((input, word.into()))
+    }
+
+    let (input, value) = alt((
+        delimited(char('\''), cut(|input| quoted_by('\'', input)), cut(char('\''))),
+        delimited(char('"'), cut(|input| quoted_by('"', input)), cut(char('"'))),
+        non_dot_word,
+    ))(input)?;
+
+    match unescaper::unescape(value.value()) {
+        Ok(content) => {
+            if content.len() != value.value().len() {
+                Ok((input, Token::new(value.original_span(), Some(content))))
+            } else {
+                Ok((input, value))
+            }
+        }
+        Err(unescaper::Error::IncompleteStr(_)) => Err(nom::Err::Incomplete(nom::Needed::Unknown)),
+        Err(unescaper::Error::ParseIntError { .. }) => Err(nom::Err::Error(Error::new_from_kind(
+            value.original_span(),
+            ErrorKind::InvalidEscapedNumber,
+        ))),
+        Err(unescaper::Error::InvalidChar { .. }) => Err(nom::Err::Error(Error::new_from_kind(
+            value.original_span(),
+            ErrorKind::MalformedValue,
+        ))),
+    }
+}
+
+pub fn parse_vector_value_cut<'a>(input: Span<'a>, kind: ErrorKind<'a>) -> IResult<'a, Token<'a>> {
+    parse_vector_value(input).map_err(|e| match e {
+        nom::Err::Failure(e) => match e.kind() {
+            ErrorKind::Char(c) if *c == '"' || *c == '\'' => {
+                crate::Error::failure_from_kind(input, ErrorKind::VectorFilterInvalidQuotes)
+            }
+            _ => crate::Error::failure_from_kind(input, kind),
+        },
+        _ => crate::Error::failure_from_kind(input, kind),
+    })
+}
+
 /// value          = WS* ( word | singleQuoted | doubleQuoted) WS+
 pub fn parse_value(input: Span) -> IResult<Token> {
    // to get better diagnostic message we are going to strip the left whitespaces from the input right now
@@ -99,31 +144,21 @@ pub fn parse_value(input: Span) -> IResult<Token> {
    }

    match parse_geo_radius(input) {
-        Ok(_) => {
-            return Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::MisusedGeoRadius)))
-        }
+        Ok(_) => return Err(Error::failure_from_kind(input, ErrorKind::MisusedGeoRadius)),
        // if we encountered a failure it means the user badly wrote a _geoRadius filter.
        // But instead of showing them how to fix his syntax we are going to tell them they should not use this filter as a value.
        Err(e) if e.is_failure() => {
-            return Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::MisusedGeoRadius)))
+            return Err(Error::failure_from_kind(input, ErrorKind::MisusedGeoRadius))
        }
        _ => (),
    }

    match parse_geo_bounding_box(input) {
-        Ok(_) => {
-            return Err(nom::Err::Failure(Error::new_from_kind(
-                input,
-                ErrorKind::MisusedGeoBoundingBox,
-            )))
-        }
+        Ok(_) => return Err(Error::failure_from_kind(input, ErrorKind::MisusedGeoBoundingBox)),
        // if we encountered a failure it means the user badly wrote a _geoBoundingBox filter.
        // But instead of showing them how to fix his syntax we are going to tell them they should not use this filter as a value.
        Err(e) if e.is_failure() => {
-            return Err(nom::Err::Failure(Error::new_from_kind(
-                input,
-                ErrorKind::MisusedGeoBoundingBox,
-            )))
+            return Err(Error::failure_from_kind(input, ErrorKind::MisusedGeoBoundingBox))
        }
        _ => (),
    }
--- a/crates/index-scheduler/src/scheduler/test_document_addition.rs
+++ b/crates/index-scheduler/src/scheduler/test_document_addition.rs
@@ -736,7 +736,7 @@ fn test_document_addition_mixed_rights_with_index() {
 #[test]
 fn test_document_addition_mixed_right_without_index_starts_with_cant_create() {
    // We're going to autobatch multiple document addition.
-    // - The index does not exists
+    // - The index does not exist
    // - The first document addition don't have the right to create an index
    // - The second do. They should not batch together.
    // - The second should batch with everything else as it's going to create an index.
--- a/crates/meilisearch/src/routes/indexes/documents.rs
+++ b/crates/meilisearch/src/routes/indexes/documents.rs
@@ -139,6 +139,8 @@ pub struct DocumentsFetchAggregator<Method: AggregateMethod> {
    per_document_id: bool,
    // if a filter was used
    per_filter: bool,
+    with_vector_filter: bool,
+
    // if documents were sorted
    sort: bool,

@@ -166,6 +168,7 @@ impl<Method: AggregateMethod> Aggregate for DocumentsFetchAggregator<Method> {
        Box::new(Self {
            per_document_id: self.per_document_id | new.per_document_id,
            per_filter: self.per_filter | new.per_filter,
+            with_vector_filter: self.with_vector_filter | new.with_vector_filter,
            sort: self.sort | new.sort,
            retrieve_vectors: self.retrieve_vectors | new.retrieve_vectors,
            max_limit: self.max_limit.max(new.max_limit),
@@ -250,6 +253,7 @@ pub async fn get_document(
            retrieve_vectors: param_retrieve_vectors.0,
            per_document_id: true,
            per_filter: false,
+            with_vector_filter: false,
            sort: false,
            max_limit: 0,
            max_offset: 0,
@@ -475,6 +479,10 @@ pub async fn documents_by_query_post(
    analytics.publish(
        DocumentsFetchAggregator::<DocumentsPOST> {
            per_filter: body.filter.is_some(),
+            with_vector_filter: body
+                .filter
+                .as_ref()
+                .is_some_and(|f| f.to_string().contains("_vectors")),
            sort: body.sort.is_some(),
            retrieve_vectors: body.retrieve_vectors,
            max_limit: body.limit,
@@ -576,6 +584,10 @@ pub async fn get_documents(
    analytics.publish(
        DocumentsFetchAggregator::<DocumentsGET> {
            per_filter: query.filter.is_some(),
+            with_vector_filter: query
+                .filter
+                .as_ref()
+                .is_some_and(|f| f.to_string().contains("_vectors")),
            sort: query.sort.is_some(),
            retrieve_vectors: query.retrieve_vectors,
            max_limit: query.limit,
@@ -1455,8 +1467,6 @@ fn some_documents<'a, 't: 'a>(
                    document.remove("_vectors");
                }
                RetrieveVectors::Retrieve => {
-                    // Clippy is simply wrong
-                    #[allow(clippy::manual_unwrap_or_default)]
                    let mut vectors = match document.remove("_vectors") {
                        Some(Value::Object(map)) => map,
                        _ => Default::default(),
--- a/crates/meilisearch/src/routes/indexes/search_analytics.rs
+++ b/crates/meilisearch/src/routes/indexes/search_analytics.rs
@@ -40,6 +40,7 @@ pub struct SearchAggregator<Method: AggregateMethod> {
    // filter
    filter_with_geo_radius: bool,
    filter_with_geo_bounding_box: bool,
+    filter_on_vectors: bool,
    // every time a request has a filter, this field must be incremented by the number of terms it contains
    filter_sum_of_criteria_terms: usize,
    // every time a request has a filter, this field must be incremented by one
@@ -163,6 +164,7 @@ impl<Method: AggregateMethod> SearchAggregator<Method> {
            let stringified_filters = filter.to_string();
            ret.filter_with_geo_radius = stringified_filters.contains("_geoRadius(");
            ret.filter_with_geo_bounding_box = stringified_filters.contains("_geoBoundingBox(");
+            ret.filter_on_vectors = stringified_filters.contains("_vectors");
            ret.filter_sum_of_criteria_terms = RE.split(&stringified_filters).count();
        }

@@ -224,6 +226,7 @@ impl<Method: AggregateMethod> SearchAggregator<Method> {
        let SearchResult {
            hits: _,
            query: _,
+            query_vector: _,
            processing_time_ms,
            hits_info: _,
            semantic_hit_count: _,
@@ -260,6 +263,7 @@ impl<Method: AggregateMethod> Aggregate for SearchAggregator<Method> {
            distinct,
            filter_with_geo_radius,
            filter_with_geo_bounding_box,
+            filter_on_vectors,
            filter_sum_of_criteria_terms,
            filter_total_number_of_criteria,
            used_syntax,
@@ -314,6 +318,7 @@ impl<Method: AggregateMethod> Aggregate for SearchAggregator<Method> {
        // filter
        self.filter_with_geo_radius |= filter_with_geo_radius;
        self.filter_with_geo_bounding_box |= filter_with_geo_bounding_box;
+        self.filter_on_vectors |= filter_on_vectors;
        self.filter_sum_of_criteria_terms =
            self.filter_sum_of_criteria_terms.saturating_add(filter_sum_of_criteria_terms);
        self.filter_total_number_of_criteria =
@@ -388,6 +393,7 @@ impl<Method: AggregateMethod> Aggregate for SearchAggregator<Method> {
            distinct,
            filter_with_geo_radius,
            filter_with_geo_bounding_box,
+            filter_on_vectors,
            filter_sum_of_criteria_terms,
            filter_total_number_of_criteria,
            used_syntax,
@@ -445,6 +451,7 @@ impl<Method: AggregateMethod> Aggregate for SearchAggregator<Method> {
            "filter": {
               "with_geoRadius": filter_with_geo_radius,
               "with_geoBoundingBox": filter_with_geo_bounding_box,
+               "on_vectors": filter_on_vectors,
               "avg_criteria_number": format!("{:.2}", filter_sum_of_criteria_terms as f64 / filter_total_number_of_criteria as f64),
               "most_used_syntax": used_syntax.iter().max_by_key(|(_, v)| *v).map(|(k, _)| json!(k)).unwrap_or_else(|| json!(null)),
            },
--- a/crates/meilisearch/src/routes/tasks.rs
+++ b/crates/meilisearch/src/routes/tasks.rs
@@ -336,7 +336,7 @@ impl<Method: AggregateMethod + 'static> Aggregate for TaskFilterAnalytics<Method
                "link": "https://docs.meilisearch.com/errors#missing_authorization_header"
            }
        )),
-        (status = 404, description = "The task uid does not exists", body = ResponseError, content_type = "application/json", example = json!(
+        (status = 404, description = "The task uid does not exist", body = ResponseError, content_type = "application/json", example = json!(
            {
                "message": "Task :taskUid not found.",
                "code": "task_not_found",
@@ -430,7 +430,7 @@ async fn cancel_tasks(
                "link": "https://docs.meilisearch.com/errors#missing_authorization_header"
            }
        )),
-        (status = 404, description = "The task uid does not exists", body = ResponseError, content_type = "application/json", example = json!(
+        (status = 404, description = "The task uid does not exist", body = ResponseError, content_type = "application/json", example = json!(
            {
                "message": "Task :taskUid not found.",
                "code": "task_not_found",
@@ -611,7 +611,7 @@ async fn get_tasks(
                "link": "https://docs.meilisearch.com/errors#missing_authorization_header"
            }
        )),
-        (status = 404, description = "The task uid does not exists", body = ResponseError, content_type = "application/json", example = json!(
+        (status = 404, description = "The task uid does not exist", body = ResponseError, content_type = "application/json", example = json!(
            {
                "message": "Task :taskUid not found.",
                "code": "task_not_found",
@@ -665,7 +665,7 @@ async fn get_task(
                "link": "https://docs.meilisearch.com/errors#missing_authorization_header"
            }
        )),
-        (status = 404, description = "The task uid does not exists", body = ResponseError, content_type = "application/json", example = json!(
+        (status = 404, description = "The task uid does not exist", body = ResponseError, content_type = "application/json", example = json!(
            {
                "message": "Task :taskUid not found.",
                "code": "task_not_found",
--- a/crates/meilisearch/src/search/federated/perform.rs
+++ b/crates/meilisearch/src/search/federated/perform.rs
@@ -13,6 +13,7 @@ use meilisearch_types::error::ResponseError;
 use meilisearch_types::features::{Network, Remote};
 use meilisearch_types::milli::order_by_map::OrderByMap;
 use meilisearch_types::milli::score_details::{ScoreDetails, WeightedScoreValue};
+use meilisearch_types::milli::vector::Embedding;
 use meilisearch_types::milli::{self, DocumentId, OrderBy, TimeBudget, DEFAULT_VALUES_PER_FACET};
 use roaring::RoaringBitmap;
 use tokio::task::JoinHandle;
@@ -46,6 +47,7 @@ pub async fn perform_federated_search(
    let deadline = before_search + std::time::Duration::from_secs(9);

    let required_hit_count = federation.limit + federation.offset;
+    let retrieve_vectors = queries.iter().any(|q| q.retrieve_vectors);

    let network = index_scheduler.network();

@@ -91,6 +93,7 @@ pub async fn perform_federated_search(
        federation,
        mut semantic_hit_count,
        mut results_by_index,
+        mut query_vectors,
        previous_query_data: _,
        facet_order,
    } = search_by_index;
@@ -122,7 +125,26 @@ pub async fn perform_federated_search(
        .map(|hit| hit.hit())
        .collect();

-    // 3.3. merge facets
+    // 3.3. merge query vectors
+    let query_vectors = if retrieve_vectors {
+        for remote_results in remote_results.iter_mut() {
+            if let Some(remote_vectors) = remote_results.query_vectors.take() {
+                for (key, value) in remote_vectors.into_iter() {
+                    debug_assert!(
+                        !query_vectors.contains_key(&key),
+                        "Query vector for query {key} already exists"
+                    );
+                    query_vectors.insert(key, value);
+                }
+            }
+        }
+
+        Some(query_vectors)
+    } else {
+        None
+    };
+
+    // 3.4. merge facets
    let (facet_distribution, facet_stats, facets_by_index) =
        facet_order.merge(federation.merge_facets, remote_results, facets);

@@ -140,6 +162,7 @@ pub async fn perform_federated_search(
            offset: federation.offset,
            estimated_total_hits,
        },
+        query_vectors,
        semantic_hit_count,
        degraded,
        used_negative_operator,
@@ -408,6 +431,7 @@ fn merge_metadata(
        hits: _,
        processing_time_ms,
        hits_info,
+        query_vectors: _,
        semantic_hit_count: _,
        facet_distribution: _,
        facet_stats: _,
@@ -657,6 +681,7 @@ struct SearchByIndex {
    // Then when merging, we'll update its value if there is any semantic hit
    semantic_hit_count: Option<u32>,
    results_by_index: Vec<SearchResultByIndex>,
+    query_vectors: BTreeMap<usize, Embedding>,
    previous_query_data: Option<(RankingRules, usize, String)>,
    // remember the order and name of first index for each facet when merging with index settings
    // to detect if the order is inconsistent for a facet.
@@ -674,6 +699,7 @@ impl SearchByIndex {
            federation,
            semantic_hit_count: None,
            results_by_index: Vec::with_capacity(index_count),
+            query_vectors: BTreeMap::new(),
            previous_query_data: None,
        }
    }
@@ -837,8 +863,19 @@ impl SearchByIndex {
                    document_scores,
                    degraded: query_degraded,
                    used_negative_operator: query_used_negative_operator,
+                    query_vector,
                } = result;

+                if query.retrieve_vectors {
+                    if let Some(query_vector) = query_vector {
+                        debug_assert!(
+                            !self.query_vectors.contains_key(&query_index),
+                            "Query vector for query {query_index} already exists"
+                        );
+                        self.query_vectors.insert(query_index, query_vector);
+                    }
+                }
+
                candidates |= query_candidates;
                degraded |= query_degraded;
                used_negative_operator |= query_used_negative_operator;
--- a/crates/meilisearch/src/search/federated/types.rs
+++ b/crates/meilisearch/src/search/federated/types.rs
@@ -18,6 +18,7 @@ use serde::{Deserialize, Serialize};
 use utoipa::ToSchema;

 use super::super::{ComputedFacets, FacetStats, HitsInfo, SearchHit, SearchQueryWithIndex};
+use crate::milli::vector::Embedding;

 pub const DEFAULT_FEDERATED_WEIGHT: f64 = 1.0;

@@ -117,6 +118,9 @@ pub struct FederatedSearchResult {
    #[serde(flatten)]
    pub hits_info: HitsInfo,

+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub query_vectors: Option<BTreeMap<usize, Embedding>>,
+
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub semantic_hit_count: Option<u32>,

@@ -144,6 +148,7 @@ impl fmt::Debug for FederatedSearchResult {
            hits,
            processing_time_ms,
            hits_info,
+            query_vectors,
            semantic_hit_count,
            degraded,
            used_negative_operator,
@@ -158,6 +163,10 @@ impl fmt::Debug for FederatedSearchResult {
        debug.field("processing_time_ms", &processing_time_ms);
        debug.field("hits", &format!("[{} hits returned]", hits.len()));
        debug.field("hits_info", &hits_info);
+        if let Some(query_vectors) = query_vectors {
+            let known = query_vectors.len();
+            debug.field("query_vectors", &format!("[{known} known vectors]"));
+        }
        if *used_negative_operator {
            debug.field("used_negative_operator", used_negative_operator);
        }
--- a/crates/meilisearch/src/search/mod.rs
+++ b/crates/meilisearch/src/search/mod.rs
@@ -841,6 +841,8 @@ pub struct SearchHit {
 pub struct SearchResult {
    pub hits: Vec<SearchHit>,
    pub query: String,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub query_vector: Option<Vec<f32>>,
    pub processing_time_ms: u128,
    #[serde(flatten)]
    pub hits_info: HitsInfo,
@@ -865,6 +867,7 @@ impl fmt::Debug for SearchResult {
        let SearchResult {
            hits,
            query,
+            query_vector,
            processing_time_ms,
            hits_info,
            facet_distribution,
@@ -879,6 +882,9 @@ impl fmt::Debug for SearchResult {
        debug.field("processing_time_ms", &processing_time_ms);
        debug.field("hits", &format!("[{} hits returned]", hits.len()));
        debug.field("query", &query);
+        if query_vector.is_some() {
+            debug.field("query_vector", &"[...]");
+        }
        debug.field("hits_info", &hits_info);
        if *used_negative_operator {
            debug.field("used_negative_operator", used_negative_operator);
@@ -1050,6 +1056,7 @@ pub fn prepare_search<'t>(
        .map(|x| x as usize)
        .unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS);

+    search.retrieve_vectors(query.retrieve_vectors);
    search.exhaustive_number_hits(is_finite_pagination);
    search.max_total_hits(Some(max_total_hits));
    search.scoring_strategy(
@@ -1132,6 +1139,7 @@ pub fn perform_search(
            document_scores,
            degraded,
            used_negative_operator,
+            query_vector,
        },
        semantic_hit_count,
    ) = search_from_kind(index_uid, search_kind, search)?;
@@ -1222,6 +1230,7 @@ pub fn perform_search(
        hits: documents,
        hits_info,
        query: q.unwrap_or_default(),
+        query_vector,
        processing_time_ms: before_search.elapsed().as_millis(),
        facet_distribution,
        facet_stats,
@@ -1734,6 +1743,7 @@ pub fn perform_similar(
        document_scores,
        degraded: _,
        used_negative_operator: _,
+        query_vector: _,
    } = similar.execute().map_err(|err| match err {
        milli::Error::UserError(milli::UserError::InvalidFilter(_)) => {
            ResponseError::from_msg(err.to_string(), Code::InvalidSimilarFilter)
@@ -2081,7 +2091,7 @@ pub(crate) fn parse_filter(
    })?;

    if let Some(ref filter) = filter {
-        // If the contains operator is used while the contains filter features is not enabled, errors out
+        // If the contains operator is used while the contains filter feature is not enabled, errors out
        if let Some((token, error)) =
            filter.use_contains_operator().zip(features.check_contains_filter().err())
        {
@@ -2092,6 +2102,18 @@ pub(crate) fn parse_filter(
        }
    }

+    if let Some(ref filter) = filter {
+        // If a vector filter is used while the multi modal feature is not enabled, errors out
+        if let Some((token, error)) =
+            filter.use_vector_filter().zip(features.check_multimodal("using a vector filter").err())
+        {
+            return Err(ResponseError::from_msg(
+                token.as_external_error(error).to_string(),
+                Code::FeatureNotEnabled,
+            ));
+        }
+    }
+
    Ok(filter)
 }

--- a/crates/meilisearch/tests/documents/errors.rs
+++ b/crates/meilisearch/tests/documents/errors.rs
@@ -557,7 +557,7 @@ async fn delete_document_by_filter() {
    "###);

    let index = shared_does_not_exists_index().await;
-    // index does not exists
+    // index does not exist
    let (response, _code) =
        index.delete_document_by_filter_fail(json!({ "filter": "doggo = bernese"}), server).await;
    snapshot!(response, @r###"
--- a/crates/meilisearch/tests/search/errors.rs
+++ b/crates/meilisearch/tests/search/errors.rs
@@ -304,7 +304,7 @@ async fn search_bad_filter() {
    let server = Server::new_shared();
    let index = server.unique_index();
    // Also, to trigger the error message we need to effectively create the index or else it'll throw an
-    // index does not exists error.
+    // index does not exist error.
    let (response, _code) = index.create(None).await;
    server.wait_task(response.uid()).await.succeeded();

@@ -1263,7 +1263,7 @@ async fn search_with_contains_without_enabling_the_feature() {
    let server = Server::new_shared();
    let index = server.unique_index();
    // Also, to trigger the error message we need to effectively create the index or else it'll throw an
-    // index does not exists error.
+    // index does not exist error.
    let (task, _code) = index.create(None).await;
    server.wait_task(task.uid()).await.succeeded();

--- a/crates/meilisearch/tests/search/filters.rs
+++ b/crates/meilisearch/tests/search/filters.rs
@@ -4,8 +4,8 @@ use tempfile::TempDir;

 use super::test_settings_documents_indexing_swapping_and_search;
 use crate::common::{
-    default_settings, shared_index_with_documents, shared_index_with_nested_documents, Server,
-    DOCUMENTS, NESTED_DOCUMENTS,
+    default_settings, shared_index_for_fragments, shared_index_with_documents,
+    shared_index_with_nested_documents, Server, DOCUMENTS, NESTED_DOCUMENTS,
 };
 use crate::json;

@@ -731,3 +731,432 @@ async fn test_filterable_attributes_priority() {
    )
    .await;
 }
+
+#[actix_rt::test]
+async fn vector_filter_all_embedders() {
+    let index = shared_index_for_fragments().await;
+
+    let (value, _code) = index
+        .search_post(json!({
+            "filter": "_vectors EXISTS",
+            "attributesToRetrieve": ["name"]
+        }))
+        .await;
+    snapshot!(value, @r#"
+    {
+      "hits": [
+        {
+          "name": "kefir"
+        },
+        {
+          "name": "echo"
+        },
+        {
+          "name": "intel"
+        },
+        {
+          "name": "dustin"
+        }
+      ],
+      "query": "",
+      "processingTimeMs": "[duration]",
+      "limit": 20,
+      "offset": 0,
+      "estimatedTotalHits": 4
+    }
+    "#);
+}
+
+#[actix_rt::test]
+async fn vector_filter_missing_fragment() {
+    let index = shared_index_for_fragments().await;
+
+    let (value, _code) = index
+        .search_post(json!({
+            "filter": "_vectors.rest.fragments EXISTS",
+            "attributesToRetrieve": ["name"]
+        }))
+        .await;
+    snapshot!(value, @r#"
+    {
+      "message": "The vector filter is missing a fragment name.\n24:31 _vectors.rest.fragments EXISTS",
+      "code": "invalid_search_filter",
+      "type": "invalid_request",
+      "link": "https://docs.meilisearch.com/errors#invalid_search_filter"
+    }
+    "#);
+}
+
+#[actix_rt::test]
+async fn vector_filter_nonexistent_embedder() {
+    let index = shared_index_for_fragments().await;
+
+    let (value, _code) = index
+        .search_post(json!({
+            "filter": "_vectors.other EXISTS",
+            "attributesToRetrieve": ["name"]
+        }))
+        .await;
+    snapshot!(value, @r#"
+    {
+      "message": "Index `[uuid]`: The embedder `other` does not exist. Available embedders are: `rest`.\n10:15 _vectors.other EXISTS",
+      "code": "invalid_search_filter",
+      "type": "invalid_request",
+      "link": "https://docs.meilisearch.com/errors#invalid_search_filter"
+    }
+    "#);
+}
+
+#[actix_rt::test]
+async fn vector_filter_all_embedders_user_provided() {
+    let index = shared_index_for_fragments().await;
+
+    // This one is counterintuitive, but it is the same as the previous one.
+    // It's because userProvided is interpreted as an embedder name
+    let (value, _code) = index
+        .search_post(json!({
+            "filter": "_vectors.userProvided EXISTS",
+            "attributesToRetrieve": ["name"]
+        }))
+        .await;
+    snapshot!(value, @r#"
+    {
+      "message": "Index `[uuid]`: The embedder `userProvided` does not exist. Available embedders are: `rest`.\n10:22 _vectors.userProvided EXISTS",
+      "code": "invalid_search_filter",
+      "type": "invalid_request",
+      "link": "https://docs.meilisearch.com/errors#invalid_search_filter"
+    }
+    "#);
+}
+
+#[actix_rt::test]
+async fn vector_filter_specific_embedder() {
+    let index = shared_index_for_fragments().await;
+
+    let (value, _code) = index
+        .search_post(json!({
+            "filter": "_vectors.rest EXISTS",
+            "attributesToRetrieve": ["name"]
+        }))
+        .await;
+    snapshot!(value, @r#"
+    {
+      "hits": [
+        {
+          "name": "kefir"
+        },
+        {
+          "name": "echo"
+        },
+        {
+          "name": "intel"
+        },
+        {
+          "name": "dustin"
+        }
+      ],
+      "query": "",
+      "processingTimeMs": "[duration]",
+      "limit": 20,
+      "offset": 0,
+      "estimatedTotalHits": 4
+    }
+    "#);
+}
+
+#[actix_rt::test]
+async fn vector_filter_user_provided() {
+    let index = shared_index_for_fragments().await;
+
+    let (value, _code) = index
+        .search_post(json!({
+            "filter": "_vectors.rest.userProvided EXISTS",
+            "attributesToRetrieve": ["name"]
+        }))
+        .await;
+    snapshot!(value, @r#"
+    {
+      "hits": [
+        {
+          "name": "echo"
+        }
+      ],
+      "query": "",
+      "processingTimeMs": "[duration]",
+      "limit": 20,
+      "offset": 0,
+      "estimatedTotalHits": 1
+    }
+    "#);
+}
+
+#[actix_rt::test]
+async fn vector_filter_specific_fragment() {
+    let index = shared_index_for_fragments().await;
+
+    let (value, _code) = index
+        .search_post(json!({
+            "filter": "_vectors.rest.fragments.withBreed EXISTS",
+            "attributesToRetrieve": ["name"]
+        }))
+        .await;
+    snapshot!(value, @r#"
+    {
+      "hits": [
+        {
+          "name": "intel"
+        },
+        {
+          "name": "dustin"
+        }
+      ],
+      "query": "",
+      "processingTimeMs": "[duration]",
+      "limit": 20,
+      "offset": 0,
+      "estimatedTotalHits": 2
+    }
+    "#);
+
+    let (value, _code) = index
+        .search_post(json!({
+            "filter": "_vectors.rest.fragments.basic EXISTS",
+            "attributesToRetrieve": ["name"]
+        }))
+        .await;
+    snapshot!(value, @r#"
+    {
+      "hits": [
+        {
+          "name": "kefir"
+        },
+        {
+          "name": "intel"
+        },
+        {
+          "name": "dustin"
+        }
+      ],
+      "query": "",
+      "processingTimeMs": "[duration]",
+      "limit": 20,
+      "offset": 0,
+      "estimatedTotalHits": 3
+    }
+    "#);
+}
+
+#[actix_rt::test]
+async fn vector_filter_non_existant_fragment() {
+    let index = shared_index_for_fragments().await;
+
+    let (value, _code) = index
+        .search_post(json!({
+            "filter": "_vectors.rest.fragments.withBred EXISTS",
+            "attributesToRetrieve": ["name"]
+        }))
+        .await;
+    snapshot!(value, @r#"
+    {
+      "message": "Index `[uuid]`: The fragment `withBred` does not exist on embedder `rest`. Available fragments on this embedder are: `basic`, `withBreed`. Did you mean `withBreed`?\n25:33 _vectors.rest.fragments.withBred EXISTS",
+      "code": "invalid_search_filter",
+      "type": "invalid_request",
+      "link": "https://docs.meilisearch.com/errors#invalid_search_filter"
+    }
+    "#);
+}
+
+#[actix_rt::test]
+async fn vector_filter_document_template_but_fragments_used() {
+    let index = shared_index_for_fragments().await;
+
+    let (value, _code) = index
+        .search_post(json!({
+            "filter": "_vectors.rest.documentTemplate EXISTS",
+            "attributesToRetrieve": ["name"]
+        }))
+        .await;
+    snapshot!(value, @r#"
+    {
+      "hits": [],
+      "query": "",
+      "processingTimeMs": "[duration]",
+      "limit": 20,
+      "offset": 0,
+      "estimatedTotalHits": 0
+    }
+    "#);
+}
+
+#[actix_rt::test]
+async fn vector_filter_document_template() {
+    let (_mock, setting) = crate::vector::create_mock().await;
+    let server = crate::vector::get_server_vector().await;
+    let index = server.index("doggo");
+
+    let (_response, code) = server.set_features(json!({"multimodal": true})).await;
+    snapshot!(code, @"200 OK");
+
+    let (response, code) = index
+        .update_settings(json!({
+            "embedders": {
+                "rest": setting,
+            },
+        }))
+        .await;
+    snapshot!(code, @"202 Accepted");
+    server.wait_task(response.uid()).await.succeeded();
+
+    let documents = json!([
+        {"id": 0, "name": "kefir"},
+        {"id": 1, "name": "echo", "_vectors": { "rest": [1, 1, 1] }},
+        {"id": 2, "name": "intel"},
+        {"id": 3, "name": "iko" }
+    ]);
+    let (value, code) = index.add_documents(documents, None).await;
+    snapshot!(code, @"202 Accepted");
+    server.wait_task(value.uid()).await.succeeded();
+
+    let (value, _code) = index
+        .search_post(json!({
+            "filter": "_vectors.rest.documentTemplate EXISTS",
+            "attributesToRetrieve": ["name"]
+        }))
+        .await;
+    snapshot!(value, @r#"
+    {
+      "hits": [
+        {
+          "name": "kefir"
+        },
+        {
+          "name": "intel"
+        },
+        {
+          "name": "iko"
+        }
+      ],
+      "query": "",
+      "processingTimeMs": "[duration]",
+      "limit": 20,
+      "offset": 0,
+      "estimatedTotalHits": 3
+    }
+    "#);
+}
+
+#[actix_rt::test]
+async fn vector_filter_feature_gate() {
+    let index = shared_index_with_documents().await;
+
+    let (value, _code) = index
+        .search_post(json!({
+            "filter": "_vectors EXISTS",
+            "attributesToRetrieve": ["name"]
+        }))
+        .await;
+    snapshot!(value, @r#"
+    {
+      "message": "using a vector filter requires enabling the `multimodal` experimental feature. See https://github.com/orgs/meilisearch/discussions/846\n1:9 _vectors EXISTS",
+      "code": "feature_not_enabled",
+      "type": "invalid_request",
+      "link": "https://docs.meilisearch.com/errors#feature_not_enabled"
+    }
+    "#);
+}
+
+#[actix_rt::test]
+async fn vector_filter_negation() {
+    let index = shared_index_for_fragments().await;
+
+    let (value, _code) = index
+        .search_post(json!({
+            "filter": "_vectors.rest.userProvided NOT EXISTS",
+            "attributesToRetrieve": ["name"]
+        }))
+        .await;
+    snapshot!(value, @r#"
+    {
+      "hits": [
+        {
+          "name": "kefir"
+        },
+        {
+          "name": "intel"
+        },
+        {
+          "name": "dustin"
+        }
+      ],
+      "query": "",
+      "processingTimeMs": "[duration]",
+      "limit": 20,
+      "offset": 0,
+      "estimatedTotalHits": 3
+    }
+    "#);
+}
+
+#[actix_rt::test]
+async fn vector_filter_or_combination() {
+    let index = shared_index_for_fragments().await;
+
+    let (value, _code) = index
+        .search_post(json!({
+            "filter": "_vectors.rest.fragments.withBreed EXISTS OR _vectors.rest.userProvided EXISTS",
+            "attributesToRetrieve": ["name"]
+        }))
+        .await;
+    snapshot!(value, @r#"
+    {
+      "hits": [
+        {
+          "name": "echo"
+        },
+        {
+          "name": "intel"
+        },
+        {
+          "name": "dustin"
+        }
+      ],
+      "query": "",
+      "processingTimeMs": "[duration]",
+      "limit": 20,
+      "offset": 0,
+      "estimatedTotalHits": 3
+    }
+    "#);
+}
+
+#[actix_rt::test]
+async fn vector_filter_regenerate() {
+    let index = shared_index_for_fragments().await;
+
+    let (value, _code) = index
+        .search_post(json!({
+            "filter": format!("_vectors.rest.regenerate EXISTS"),
+            "attributesToRetrieve": ["name"]
+        }))
+        .await;
+    snapshot!(value, @r#"
+    {
+      "hits": [
+        {
+          "name": "kefir"
+        },
+        {
+          "name": "intel"
+        },
+        {
+          "name": "dustin"
+        }
+      ],
+      "query": "",
+      "processingTimeMs": "[duration]",
+      "limit": 20,
+      "offset": 0,
+      "estimatedTotalHits": 3
+    }
+    "#);
+}
--- a/crates/meilisearch/tests/search/hybrid.rs
+++ b/crates/meilisearch/tests/search/hybrid.rs
@@ -148,7 +148,70 @@ async fn simple_search() {
        )
        .await;
    snapshot!(code, @"200 OK");
-    snapshot!(response["hits"], @r###"[{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":{"embeddings":[[1.0,2.0]],"regenerate":false}}},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":{"embeddings":[[2.0,3.0]],"regenerate":false}}},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":{"embeddings":[[1.0,3.0]],"regenerate":false}}}]"###);
+    snapshot!(response, @r#"
+    {
+      "hits": [
+        {
+          "title": "Captain Planet",
+          "desc": "He's not part of the Marvel Cinematic Universe",
+          "id": "2",
+          "_vectors": {
+            "default": {
+              "embeddings": [
+                [
+                  1.0,
+                  2.0
+                ]
+              ],
+              "regenerate": false
+            }
+          }
+        },
+        {
+          "title": "Captain Marvel",
+          "desc": "a Shazam ersatz",
+          "id": "3",
+          "_vectors": {
+            "default": {
+              "embeddings": [
+                [
+                  2.0,
+                  3.0
+                ]
+              ],
+              "regenerate": false
+            }
+          }
+        },
+        {
+          "title": "Shazam!",
+          "desc": "a Captain Marvel ersatz",
+          "id": "1",
+          "_vectors": {
+            "default": {
+              "embeddings": [
+                [
+                  1.0,
+                  3.0
+                ]
+              ],
+              "regenerate": false
+            }
+          }
+        }
+      ],
+      "query": "Captain",
+      "queryVector": [
+        1.0,
+        1.0
+      ],
+      "processingTimeMs": "[duration]",
+      "limit": 20,
+      "offset": 0,
+      "estimatedTotalHits": 3,
+      "semanticHitCount": 0
+    }
+    "#);
    snapshot!(response["semanticHitCount"], @"0");

    let (response, code) = index
@@ -157,7 +220,73 @@ async fn simple_search() {
        )
        .await;
    snapshot!(code, @"200 OK");
-    snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":{"embeddings":[[2.0,3.0]],"regenerate":false}},"_rankingScore":0.990290343761444},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":{"embeddings":[[1.0,2.0]],"regenerate":false}},"_rankingScore":0.9848484848484848},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":{"embeddings":[[1.0,3.0]],"regenerate":false}},"_rankingScore":0.9472135901451112}]"###);
+    snapshot!(response, @r#"
+    {
+      "hits": [
+        {
+          "title": "Captain Marvel",
+          "desc": "a Shazam ersatz",
+          "id": "3",
+          "_vectors": {
+            "default": {
+              "embeddings": [
+                [
+                  2.0,
+                  3.0
+                ]
+              ],
+              "regenerate": false
+            }
+          },
+          "_rankingScore": 0.990290343761444
+        },
+        {
+          "title": "Captain Planet",
+          "desc": "He's not part of the Marvel Cinematic Universe",
+          "id": "2",
+          "_vectors": {
+            "default": {
+              "embeddings": [
+                [
+                  1.0,
+                  2.0
+                ]
+              ],
+              "regenerate": false
+            }
+          },
+          "_rankingScore": 0.9848484848484848
+        },
+        {
+          "title": "Shazam!",
+          "desc": "a Captain Marvel ersatz",
+          "id": "1",
+          "_vectors": {
+            "default": {
+              "embeddings": [
+                [
+                  1.0,
+                  3.0
+                ]
+              ],
+              "regenerate": false
+            }
+          },
+          "_rankingScore": 0.9472135901451112
+        }
+      ],
+      "query": "Captain",
+      "queryVector": [
+        1.0,
+        1.0
+      ],
+      "processingTimeMs": "[duration]",
+      "limit": 20,
+      "offset": 0,
+      "estimatedTotalHits": 3,
+      "semanticHitCount": 2
+    }
+    "#);
    snapshot!(response["semanticHitCount"], @"2");

    let (response, code) = index
@@ -166,7 +295,73 @@ async fn simple_search() {
        )
        .await;
    snapshot!(code, @"200 OK");
-    snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":{"embeddings":[[2.0,3.0]],"regenerate":false}},"_rankingScore":0.990290343761444},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":{"embeddings":[[1.0,2.0]],"regenerate":false}},"_rankingScore":0.974341630935669},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":{"embeddings":[[1.0,3.0]],"regenerate":false}},"_rankingScore":0.9472135901451112}]"###);
+    snapshot!(response, @r#"
+    {
+      "hits": [
+        {
+          "title": "Captain Marvel",
+          "desc": "a Shazam ersatz",
+          "id": "3",
+          "_vectors": {
+            "default": {
+              "embeddings": [
+                [
+                  2.0,
+                  3.0
+                ]
+              ],
+              "regenerate": false
+            }
+          },
+          "_rankingScore": 0.990290343761444
+        },
+        {
+          "title": "Captain Planet",
+          "desc": "He's not part of the Marvel Cinematic Universe",
+          "id": "2",
+          "_vectors": {
+            "default": {
+              "embeddings": [
+                [
+                  1.0,
+                  2.0
+                ]
+              ],
+              "regenerate": false
+            }
+          },
+          "_rankingScore": 0.974341630935669
+        },
+        {
+          "title": "Shazam!",
+          "desc": "a Captain Marvel ersatz",
+          "id": "1",
+          "_vectors": {
+            "default": {
+              "embeddings": [
+                [
+                  1.0,
+                  3.0
+                ]
+              ],
+              "regenerate": false
+            }
+          },
+          "_rankingScore": 0.9472135901451112
+        }
+      ],
+      "query": "Captain",
+      "queryVector": [
+        1.0,
+        1.0
+      ],
+      "processingTimeMs": "[duration]",
+      "limit": 20,
+      "offset": 0,
+      "estimatedTotalHits": 3,
+      "semanticHitCount": 3
+    }
+    "#);
    snapshot!(response["semanticHitCount"], @"3");
 }

--- a/crates/meilisearch/tests/search/multi/mod.rs
+++ b/crates/meilisearch/tests/search/multi/mod.rs
@@ -3703,7 +3703,7 @@ async fn federation_vector_two_indexes() {
        ]}))
        .await;
    snapshot!(code, @"200 OK");
-    snapshot!(json_string!(response, { ".processingTimeMs" => "[duration]", ".**._rankingScore" => "[score]" }), @r###"
+    snapshot!(json_string!(response, { ".processingTimeMs" => "[duration]", ".**._rankingScore" => "[score]" }), @r#"
    {
      "hits": [
        {
@@ -3911,9 +3911,20 @@ async fn federation_vector_two_indexes() {
      "limit": 20,
      "offset": 0,
      "estimatedTotalHits": 8,
+      "queryVectors": {
+        "0": [
+          1.0,
+          0.0,
+          0.5
+        ],
+        "1": [
+          0.8,
+          0.6
+        ]
+      },
      "semanticHitCount": 6
    }
-    "###);
+    "#);

    // hybrid search, distinct embedder
    let (response, code) = server
@@ -3923,7 +3934,7 @@ async fn federation_vector_two_indexes() {
        ]}))
        .await;
    snapshot!(code, @"200 OK");
-    snapshot!(json_string!(response, { ".processingTimeMs" => "[duration]", ".**._rankingScore" => "[score]" }), @r###"
+    snapshot!(json_string!(response, { ".processingTimeMs" => "[duration]", ".**._rankingScore" => "[score]" }), @r#"
    {
      "hits": [
        {
@@ -4139,9 +4150,20 @@ async fn federation_vector_two_indexes() {
      "limit": 20,
      "offset": 0,
      "estimatedTotalHits": 8,
+      "queryVectors": {
+        "0": [
+          1.0,
+          0.0,
+          0.5
+        ],
+        "1": [
+          -1.0,
+          0.6
+        ]
+      },
      "semanticHitCount": 8
    }
-    "###);
+    "#);
 }

 #[actix_rt::test]
--- a/crates/meilisearch/tests/search/multi/proxy.rs
+++ b/crates/meilisearch/tests/search/multi/proxy.rs
@@ -2,8 +2,9 @@ use std::sync::Arc;

 use actix_http::StatusCode;
 use meili_snap::{json_string, snapshot};
-use wiremock::matchers::AnyMatcher;
-use wiremock::{Mock, MockServer, ResponseTemplate};
+use wiremock::matchers::method;
+use wiremock::matchers::{path, AnyMatcher};
+use wiremock::{Mock, MockServer, Request, ResponseTemplate};

 use crate::common::{Server, Value, SCORE_DOCUMENTS};
 use crate::json;
@@ -415,6 +416,503 @@ async fn remote_sharding() {
    "###);
 }

+#[actix_rt::test]
+async fn remote_sharding_retrieve_vectors() {
+    let ms0 = Server::new().await;
+    let ms1 = Server::new().await;
+    let ms2 = Server::new().await;
+    let index0 = ms0.index("test");
+    let index1 = ms1.index("test");
+    let index2 = ms2.index("test");
+
+    // enable feature
+
+    let (response, code) = ms0.set_features(json!({"network": true})).await;
+    snapshot!(code, @"200 OK");
+    snapshot!(json_string!(response["network"]), @"true");
+    let (response, code) = ms1.set_features(json!({"network": true})).await;
+    snapshot!(code, @"200 OK");
+    snapshot!(json_string!(response["network"]), @"true");
+    let (response, code) = ms2.set_features(json!({"network": true})).await;
+    snapshot!(code, @"200 OK");
+    snapshot!(json_string!(response["network"]), @"true");
+
+    // set self
+
+    let (response, code) = ms0.set_network(json!({"self": "ms0"})).await;
+    snapshot!(code, @"200 OK");
+    snapshot!(json_string!(response), @r###"
+    {
+      "self": "ms0",
+      "remotes": {}
+    }
+    "###);
+    let (response, code) = ms1.set_network(json!({"self": "ms1"})).await;
+    snapshot!(code, @"200 OK");
+    snapshot!(json_string!(response), @r###"
+    {
+      "self": "ms1",
+      "remotes": {}
+    }
+    "###);
+    let (response, code) = ms2.set_network(json!({"self": "ms2"})).await;
+    snapshot!(code, @"200 OK");
+    snapshot!(json_string!(response), @r###"
+    {
+      "self": "ms2",
+      "remotes": {}
+    }
+    "###);
+
+    // setup embedders
+
+    let mock_server = MockServer::start().await;
+    Mock::given(method("POST"))
+        .and(path("/"))
+        .respond_with(move |req: &Request| {
+            println!("Received request: {:?}", req);
+            let text = req.body_json::<String>().unwrap().to_lowercase();
+            let patterns = [
+                ("batman", [1.0, 0.0, 0.0]),
+                ("dark", [0.0, 0.1, 0.0]),
+                ("knight", [0.1, 0.1, 0.0]),
+                ("returns", [0.0, 0.0, 0.2]),
+                ("part", [0.05, 0.1, 0.0]),
+                ("1", [0.3, 0.05, 0.0]),
+                ("2", [0.2, 0.05, 0.0]),
+            ];
+            let mut embedding = vec![0.; 3];
+            for (pattern, vector) in patterns {
+                if text.contains(pattern) {
+                    for (i, v) in vector.iter().enumerate() {
+                        embedding[i] += v;
+                    }
+                }
+            }
+            ResponseTemplate::new(200).set_body_json(json!({ "data": embedding }))
+        })
+        .mount(&mock_server)
+        .await;
+    let url = mock_server.uri();
+
+    for (server, index) in [(&ms0, &index0), (&ms1, &index1), (&ms2, &index2)] {
+        let (response, code) = index
+            .update_settings(json!({
+                "embedders": {
+                    "rest": {
+                        "source": "rest",
+                        "url": url,
+                        "dimensions": 3,
+                        "request": "{{text}}",
+                        "response": { "data": "{{embedding}}" },
+                        "documentTemplate": "{{doc.name}}",
+                    },
+                },
+            }))
+            .await;
+        snapshot!(code, @"202 Accepted");
+        server.wait_task(response.uid()).await.succeeded();
+    }
+
+    // wrap servers
+    let ms0 = Arc::new(ms0);
+    let ms1 = Arc::new(ms1);
+    let ms2 = Arc::new(ms2);
+
+    let rms0 = LocalMeili::new(ms0.clone()).await;
+    let rms1 = LocalMeili::new(ms1.clone()).await;
+    let rms2 = LocalMeili::new(ms2.clone()).await;
+
+    // set network
+    let network = json!({"remotes": {
+        "ms0": {
+            "url": rms0.url()
+        },
+        "ms1": {
+            "url": rms1.url()
+        },
+        "ms2": {
+            "url": rms2.url()
+        }
+    }});
+
+    let (_response, status_code) = ms0.set_network(network.clone()).await;
+    snapshot!(status_code, @"200 OK");
+    let (_response, status_code) = ms1.set_network(network.clone()).await;
+    snapshot!(status_code, @"200 OK");
+    let (_response, status_code) = ms2.set_network(network.clone()).await;
+    snapshot!(status_code, @"200 OK");
+
+    // multi vector search: one query per remote
+
+    let request = json!({
+        "federation": {},
+        "queries": [
+            {
+                "q": "batman",
+                "indexUid": "test",
+                "hybrid": {
+                    "semanticRatio": 1.0,
+                    "embedder": "rest"
+                },
+                "retrieveVectors": true,
+                "federationOptions": {
+                    "remote": "ms0"
+                }
+            },
+            {
+                "q": "dark knight",
+                "indexUid": "test",
+                "hybrid": {
+                    "semanticRatio": 1.0,
+                    "embedder": "rest"
+                },
+                "retrieveVectors": true,
+                "federationOptions": {
+                    "remote": "ms1"
+                }
+            },
+            {
+                "q": "returns",
+                "indexUid": "test",
+                "hybrid": {
+                    "semanticRatio": 1.0,
+                    "embedder": "rest"
+                },
+                "retrieveVectors": true,
+                "federationOptions": {
+                    "remote": "ms2"
+                }
+            },
+        ]
+    });
+
+    let (response, _status_code) = ms0.multi_search(request.clone()).await;
+    snapshot!(code, @"200 OK");
+    snapshot!(json_string!(response, { ".processingTimeMs" => "[time]" }), @r#"
+    {
+      "hits": [],
+      "processingTimeMs": "[time]",
+      "limit": 20,
+      "offset": 0,
+      "estimatedTotalHits": 0,
+      "queryVectors": {
+        "0": [
+          1.0,
+          0.0,
+          0.0
+        ],
+        "1": [
+          0.1,
+          0.2,
+          0.0
+        ],
+        "2": [
+          0.0,
+          0.0,
+          0.2
+        ]
+      },
+      "semanticHitCount": 0,
+      "remoteErrors": {}
+    }
+    "#);
+
+    // multi vector search: two local queries, one remote
+
+    let request = json!({
+        "federation": {},
+        "queries": [
+            {
+                "q": "batman",
+                "indexUid": "test",
+                "hybrid": {
+                    "semanticRatio": 1.0,
+                    "embedder": "rest"
+                },
+                "retrieveVectors": true,
+                "federationOptions": {
+                    "remote": "ms0"
+                }
+            },
+            {
+                "q": "dark knight",
+                "indexUid": "test",
+                "hybrid": {
+                    "semanticRatio": 1.0,
+                    "embedder": "rest"
+                },
+                "retrieveVectors": true,
+                "federationOptions": {
+                    "remote": "ms0"
+                }
+            },
+            {
+                "q": "returns",
+                "indexUid": "test",
+                "hybrid": {
+                    "semanticRatio": 1.0,
+                    "embedder": "rest"
+                },
+                "retrieveVectors": true,
+                "federationOptions": {
+                    "remote": "ms2"
+                }
+            },
+        ]
+    });
+
+    let (response, _status_code) = ms0.multi_search(request.clone()).await;
+    snapshot!(code, @"200 OK");
+    snapshot!(json_string!(response, { ".processingTimeMs" => "[time]" }), @r#"
+    {
+      "hits": [],
+      "processingTimeMs": "[time]",
+      "limit": 20,
+      "offset": 0,
+      "estimatedTotalHits": 0,
+      "queryVectors": {
+        "0": [
+          1.0,
+          0.0,
+          0.0
+        ],
+        "1": [
+          0.1,
+          0.2,
+          0.0
+        ],
+        "2": [
+          0.0,
+          0.0,
+          0.2
+        ]
+      },
+      "semanticHitCount": 0,
+      "remoteErrors": {}
+    }
+    "#);
+
+    // multi vector search: two queries on the same remote
+
+    let request = json!({
+        "federation": {},
+        "queries": [
+            {
+                "q": "batman",
+                "indexUid": "test",
+                "hybrid": {
+                    "semanticRatio": 1.0,
+                    "embedder": "rest"
+                },
+                "retrieveVectors": true,
+                "federationOptions": {
+                    "remote": "ms0"
+                }
+            },
+            {
+                "q": "dark knight",
+                "indexUid": "test",
+                "hybrid": {
+                    "semanticRatio": 1.0,
+                    "embedder": "rest"
+                },
+                "retrieveVectors": true,
+                "federationOptions": {
+                    "remote": "ms1"
+                }
+            },
+            {
+                "q": "returns",
+                "indexUid": "test",
+                "hybrid": {
+                    "semanticRatio": 1.0,
+                    "embedder": "rest"
+                },
+                "retrieveVectors": true,
+                "federationOptions": {
+                    "remote": "ms1"
+                }
+            },
+        ]
+    });
+
+    let (response, _status_code) = ms0.multi_search(request.clone()).await;
+    snapshot!(code, @"200 OK");
+    snapshot!(json_string!(response, { ".processingTimeMs" => "[time]" }), @r#"
+    {
+      "hits": [],
+      "processingTimeMs": "[time]",
+      "limit": 20,
+      "offset": 0,
+      "estimatedTotalHits": 0,
+      "queryVectors": {
+        "0": [
+          1.0,
+          0.0,
+          0.0
+        ],
+        "1": [
+          0.1,
+          0.2,
+          0.0
+        ],
+        "2": [
+          0.0,
+          0.0,
+          0.2
+        ]
+      },
+      "semanticHitCount": 0,
+      "remoteErrors": {}
+    }
+    "#);
+
+    // multi search: two vector, one keyword
+
+    let request = json!({
+        "federation": {},
+        "queries": [
+            {
+                "q": "batman",
+                "indexUid": "test",
+                "hybrid": {
+                    "semanticRatio": 1.0,
+                    "embedder": "rest"
+                },
+                "retrieveVectors": true,
+                "federationOptions": {
+                    "remote": "ms0"
+                }
+            },
+            {
+                "q": "dark knight",
+                "indexUid": "test",
+                "hybrid": {
+                    "semanticRatio": 0.0,
+                    "embedder": "rest"
+                },
+                "retrieveVectors": true,
+                "federationOptions": {
+                    "remote": "ms1"
+                }
+            },
+            {
+                "q": "returns",
+                "indexUid": "test",
+                "hybrid": {
+                    "semanticRatio": 1.0,
+                    "embedder": "rest"
+                },
+                "retrieveVectors": true,
+                "federationOptions": {
+                    "remote": "ms1"
+                }
+            },
+        ]
+    });
+
+    let (response, _status_code) = ms0.multi_search(request.clone()).await;
+    snapshot!(code, @"200 OK");
+    snapshot!(json_string!(response, { ".processingTimeMs" => "[time]" }), @r#"
+    {
+      "hits": [],
+      "processingTimeMs": "[time]",
+      "limit": 20,
+      "offset": 0,
+      "estimatedTotalHits": 0,
+      "queryVectors": {
+        "0": [
+          1.0,
+          0.0,
+          0.0
+        ],
+        "2": [
+          0.0,
+          0.0,
+          0.2
+        ]
+      },
+      "semanticHitCount": 0,
+      "remoteErrors": {}
+    }
+    "#);
+
+    // multi vector search: no local queries, all remote
+
+    let request = json!({
+        "federation": {},
+        "queries": [
+            {
+                "q": "batman",
+                "indexUid": "test",
+                "hybrid": {
+                    "semanticRatio": 1.0,
+                    "embedder": "rest"
+                },
+                "retrieveVectors": true,
+                "federationOptions": {
+                    "remote": "ms1"
+                }
+            },
+            {
+                "q": "dark knight",
+                "indexUid": "test",
+                "hybrid": {
+                    "semanticRatio": 1.0,
+                    "embedder": "rest"
+                },
+                "retrieveVectors": true,
+                "federationOptions": {
+                    "remote": "ms1"
+                }
+            },
+            {
+                "q": "returns",
+                "indexUid": "test",
+                "hybrid": {
+                    "semanticRatio": 1.0,
+                    "embedder": "rest"
+                },
+                "retrieveVectors": true,
+                "federationOptions": {
+                    "remote": "ms1"
+                }
+            },
+        ]
+    });
+
+    let (response, _status_code) = ms0.multi_search(request.clone()).await;
+    snapshot!(code, @"200 OK");
+    snapshot!(json_string!(response, { ".processingTimeMs" => "[time]" }), @r#"
+    {
+      "hits": [],
+      "processingTimeMs": "[time]",
+      "limit": 20,
+      "offset": 0,
+      "estimatedTotalHits": 0,
+      "queryVectors": {
+        "0": [
+          1.0,
+          0.0,
+          0.0
+        ],
+        "1": [
+          0.1,
+          0.2,
+          0.0
+        ],
+        "2": [
+          0.0,
+          0.0,
+          0.2
+        ]
+      },
+      "remoteErrors": {}
+    }
+    "#);
+}
+
 #[actix_rt::test]
 async fn error_unregistered_remote() {
    let ms0 = Server::new().await;
--- a/crates/meilisearch/tests/vector/binary_quantized.rs
+++ b/crates/meilisearch/tests/vector/binary_quantized.rs
@@ -323,7 +323,7 @@ async fn binary_quantize_clear_documents() {
    // Make sure the arroy DB has been cleared
    let (documents, _code) =
        index.search_post(json!({ "hybrid": { "embedder": "manual" }, "vector": [1, 1, 1] })).await;
-    snapshot!(documents, @r###"
+    snapshot!(documents, @r#"
    {
      "hits": [],
      "query": "",
@@ -333,5 +333,5 @@ async fn binary_quantize_clear_documents() {
      "estimatedTotalHits": 0,
      "semanticHitCount": 0
    }
-    "###);
+    "#);
 }
--- a/crates/meilisearch/tests/vector/mod.rs
+++ b/crates/meilisearch/tests/vector/mod.rs
@@ -14,8 +14,9 @@ use meilisearch::option::MaxThreads;
 use crate::common::index::Index;
 use crate::common::{default_settings, GetAllDocumentsOptions, Server};
 use crate::json;
+pub use rest::create_mock;

-async fn get_server_vector() -> Server {
+pub async fn get_server_vector() -> Server {
    Server::new().await
 }

@@ -686,7 +687,7 @@ async fn clear_documents() {
    // Make sure the arroy DB has been cleared
    let (documents, _code) =
        index.search_post(json!({ "vector": [1, 1, 1], "hybrid": {"embedder": "manual"} })).await;
-    snapshot!(documents, @r###"
+    snapshot!(documents, @r#"
    {
      "hits": [],
      "query": "",
@@ -696,7 +697,7 @@ async fn clear_documents() {
      "estimatedTotalHits": 0,
      "semanticHitCount": 0
    }
-    "###);
+    "#);
 }

 #[actix_rt::test]
@@ -740,7 +741,7 @@ async fn add_remove_one_vector_4588() {
            json!({"vector": [1, 1, 1], "hybrid": {"semanticRatio": 1.0, "embedder": "manual"} }),
        )
        .await;
-    snapshot!(documents, @r###"
+    snapshot!(documents, @r#"
    {
      "hits": [
        {
@@ -755,7 +756,7 @@ async fn add_remove_one_vector_4588() {
      "estimatedTotalHits": 1,
      "semanticHitCount": 1
    }
-    "###);
+    "#);

    let (documents, _code) = index
        .get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() })
--- a/crates/meilisearch/tests/vector/rest.rs
+++ b/crates/meilisearch/tests/vector/rest.rs
@@ -12,7 +12,7 @@ use crate::common::Value;
 use crate::json;
 use crate::vector::{get_server_vector, GetAllDocumentsOptions};

-async fn create_mock() -> (&'static MockServer, Value) {
+pub async fn create_mock() -> (&'static MockServer, Value) {
    let mock_server = Box::leak(Box::new(MockServer::start().await));

    let text_to_embedding: BTreeMap<_, _> = vec![
--- a/crates/milli/Cargo.toml
+++ b/crates/milli/Cargo.toml
@@ -109,6 +109,8 @@ utoipa = { version = "5.4.0", features = [
    "openapi_extensions",
 ] }
 lru = "0.14.0"
+boxcar = "0.2.14"
+papaya = "0.2.3"

 [dev-dependencies]
 mimalloc = { version = "0.1.47", default-features = false }
--- a/crates/milli/src/error.rs
+++ b/crates/milli/src/error.rs
@@ -639,3 +639,29 @@ fn conditionally_lookup_for_error_message() {
        assert_eq!(err.to_string(), format!("{} {}", prefix, suffix));
    }
 }
+
+pub struct DidYouMean<'a>(Option<&'a str>);
+
+impl<'a> DidYouMean<'a> {
+    pub fn new(key: &str, keys: &'a [String]) -> DidYouMean<'a> {
+        let typos = levenshtein_automata::LevenshteinAutomatonBuilder::new(2, true).build_dfa(key);
+        for key in keys.iter() {
+            match typos.eval(key) {
+                levenshtein_automata::Distance::Exact(_) => {
+                    return DidYouMean(Some(key));
+                }
+                levenshtein_automata::Distance::AtLeast(_) => continue,
+            }
+        }
+        DidYouMean(None)
+    }
+}
+
+impl std::fmt::Display for DidYouMean<'_> {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        if let Some(suggestion) = self.0 {
+            write!(f, " Did you mean `{suggestion}`?")?;
+        }
+        Ok(())
+    }
+}
--- a/crates/milli/src/filterable_attributes_rules.rs
+++ b/crates/milli/src/filterable_attributes_rules.rs
@@ -111,7 +111,7 @@ impl FilterableAttributesFeatures {
        self.filter.is_filterable_null()
    }

-    /// Check if `IS EXISTS` is allowed
+    /// Check if `EXISTS` is allowed
    pub fn is_filterable_exists(&self) -> bool {
        self.filter.is_filterable_exists()
    }
--- a/crates/milli/src/search/facet/filter.rs
+++ b/crates/milli/src/search/facet/filter.rs
@@ -12,7 +12,7 @@ use roaring::{MultiOps, RoaringBitmap};
 use serde_json::Value;

 use super::facet_range_search;
-use crate::constants::RESERVED_GEO_FIELD_NAME;
+use crate::constants::{RESERVED_GEO_FIELD_NAME, RESERVED_VECTORS_FIELD_NAME};
 use crate::error::{Error, UserError};
 use crate::filterable_attributes_rules::{filtered_matching_patterns, matching_features};
 use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec};
@@ -228,6 +228,10 @@ impl<'a> Filter<'a> {
    pub fn use_contains_operator(&self) -> Option<&Token> {
        self.condition.use_contains_operator()
    }
+
+    pub fn use_vector_filter(&self) -> Option<&Token> {
+        self.condition.use_vector_filter()
+    }
 }

 impl<'a> Filter<'a> {
@@ -235,10 +239,12 @@ impl<'a> Filter<'a> {
        // to avoid doing this for each recursive call we're going to do it ONCE ahead of time
        let fields_ids_map = index.fields_ids_map(rtxn)?;
        let filterable_attributes_rules = index.filterable_attributes_rules(rtxn)?;
+
        for fid in self.condition.fids(MAX_FILTER_DEPTH) {
            let attribute = fid.value();
            if matching_features(attribute, &filterable_attributes_rules)
                .is_some_and(|(_, features)| features.is_filterable())
+                || attribute == RESERVED_VECTORS_FIELD_NAME
            {
                continue;
            }
@@ -578,7 +584,8 @@ impl<'a> Filter<'a> {
                    .union()
            }
            FilterCondition::Condition { fid, op } => {
-                let Some(field_id) = field_ids_map.id(fid.value()) else {
+                let value = fid.value();
+                let Some(field_id) = field_ids_map.id(value) else {
                    return Ok(RoaringBitmap::new());
                };
                let Some((rule_index, features)) =
@@ -635,6 +642,9 @@ impl<'a> Filter<'a> {
                    Ok(RoaringBitmap::new())
                }
            }
+            FilterCondition::VectorExists { fid: _, embedder, filter } => {
+                super::filter_vector::evaluate(rtxn, index, universe, embedder.clone(), filter)
+            }
            FilterCondition::GeoLowerThan { point, radius } => {
                if index.is_geo_filtering_enabled(rtxn)? {
                    let base_point: [f64; 2] =
--- a/crates/milli/src/search/facet/filter_vector.rs
+++ b/crates/milli/src/search/facet/filter_vector.rs
@@ -0,0 +1,157 @@
+use filter_parser::{Token, VectorFilter};
+use roaring::{MultiOps, RoaringBitmap};
+
+use crate::error::{DidYouMean, Error};
+use crate::vector::db::IndexEmbeddingConfig;
+use crate::vector::{ArroyStats, ArroyWrapper};
+use crate::Index;
+
+#[derive(Debug, thiserror::Error)]
+pub enum VectorFilterError<'a> {
+    #[error("The embedder `{}` does not exist. {}", embedder.value(), {
+        if available.is_empty() {
+            String::from("This index does not have any configured embedders.")
+        } else {
+            let mut available = available.clone();
+            available.sort_unstable();
+            let did_you_mean = DidYouMean::new(embedder.value(), &available);
+            format!("Available embedders are: {}.{did_you_mean}", available.iter().map(|e| format!("`{e}`")).collect::<Vec<_>>().join(", "))
+        }
+    })]
+    EmbedderDoesNotExist { embedder: &'a Token<'a>, available: Vec<String> },
+
+    #[error("The fragment `{}` does not exist on embedder `{}`. {}", fragment.value(), embedder.value(), {
+        if available.is_empty() {
+            String::from("This embedder does not have any configured fragments.")
+        } else {
+            let mut available = available.clone();
+            available.sort_unstable();
+            let did_you_mean = DidYouMean::new(fragment.value(), &available);
+            format!("Available fragments on this embedder are: {}.{did_you_mean}", available.iter().map(|f| format!("`{f}`")).collect::<Vec<_>>().join(", "))
+        }
+    })]
+    FragmentDoesNotExist {
+        embedder: &'a Token<'a>,
+        fragment: &'a Token<'a>,
+        available: Vec<String>,
+    },
+}
+
+use VectorFilterError::*;
+
+impl<'a> From<VectorFilterError<'a>> for Error {
+    fn from(err: VectorFilterError<'a>) -> Self {
+        match &err {
+            EmbedderDoesNotExist { embedder: token, .. }
+            | FragmentDoesNotExist { fragment: token, .. } => token.as_external_error(err).into(),
+        }
+    }
+}
+
+pub(super) fn evaluate(
+    rtxn: &heed::RoTxn<'_>,
+    index: &Index,
+    universe: Option<&RoaringBitmap>,
+    embedder: Option<Token<'_>>,
+    filter: &VectorFilter<'_>,
+) -> crate::Result<RoaringBitmap> {
+    let index_embedding_configs = index.embedding_configs();
+    let embedding_configs = index_embedding_configs.embedding_configs(rtxn)?;
+
+    let embedders = match embedder {
+        Some(embedder) => vec![embedder],
+        None => embedding_configs.iter().map(|config| Token::from(config.name.as_str())).collect(),
+    };
+
+    let mut docids = embedders
+        .iter()
+        .map(|e| evaluate_inner(rtxn, index, e, &embedding_configs, filter))
+        .union()?;
+
+    if let Some(universe) = universe {
+        docids &= universe;
+    }
+
+    Ok(docids)
+}
+
+fn evaluate_inner(
+    rtxn: &heed::RoTxn<'_>,
+    index: &Index,
+    embedder: &Token<'_>,
+    embedding_configs: &[IndexEmbeddingConfig],
+    filter: &VectorFilter<'_>,
+) -> crate::Result<RoaringBitmap> {
+    let embedder_name = embedder.value();
+    let available_embedders =
+        || embedding_configs.iter().map(|c| c.name.clone()).collect::<Vec<_>>();
+
+    let embedding_config = embedding_configs
+        .iter()
+        .find(|config| config.name == embedder_name)
+        .ok_or_else(|| EmbedderDoesNotExist { embedder, available: available_embedders() })?;
+
+    let embedder_info = index
+        .embedding_configs()
+        .embedder_info(rtxn, embedder_name)?
+        .ok_or_else(|| EmbedderDoesNotExist { embedder, available: available_embedders() })?;
+
+    let arroy_wrapper = ArroyWrapper::new(
+        index.vector_arroy,
+        embedder_info.embedder_id,
+        embedding_config.config.quantized(),
+    );
+
+    let docids = match filter {
+        VectorFilter::Fragment(fragment) => {
+            let fragment_name = fragment.value();
+            let fragment_config = embedding_config
+                .fragments
+                .as_slice()
+                .iter()
+                .find(|fragment| fragment.name == fragment_name)
+                .ok_or_else(|| FragmentDoesNotExist {
+                    embedder,
+                    fragment,
+                    available: embedding_config
+                        .fragments
+                        .as_slice()
+                        .iter()
+                        .map(|f| f.name.clone())
+                        .collect(),
+                })?;
+
+            let user_provided_docids = embedder_info.embedding_status.user_provided_docids();
+            arroy_wrapper.items_in_store(rtxn, fragment_config.id, |bitmap| {
+                bitmap.clone() - user_provided_docids
+            })?
+        }
+        VectorFilter::DocumentTemplate => {
+            if !embedding_config.fragments.as_slice().is_empty() {
+                return Ok(RoaringBitmap::new());
+            }
+
+            let user_provided_docids = embedder_info.embedding_status.user_provided_docids();
+            let mut stats = ArroyStats::default();
+            arroy_wrapper.aggregate_stats(rtxn, &mut stats)?;
+            stats.documents - user_provided_docids.clone()
+        }
+        VectorFilter::UserProvided => {
+            let user_provided_docids = embedder_info.embedding_status.user_provided_docids();
+            user_provided_docids.clone()
+        }
+        VectorFilter::Regenerate => {
+            let mut stats = ArroyStats::default();
+            arroy_wrapper.aggregate_stats(rtxn, &mut stats)?;
+            let skip_regenerate = embedder_info.embedding_status.skip_regenerate_docids();
+            stats.documents - skip_regenerate
+        }
+        VectorFilter::None => {
+            let mut stats = ArroyStats::default();
+            arroy_wrapper.aggregate_stats(rtxn, &mut stats)?;
+            stats.documents
+        }
+    };
+
+    Ok(docids)
+}
--- a/crates/milli/src/search/facet/mod.rs
+++ b/crates/milli/src/search/facet/mod.rs
@@ -17,6 +17,7 @@ mod facet_range_search;
 mod facet_sort_ascending;
 mod facet_sort_descending;
 mod filter;
+mod filter_vector;
 mod search;

 fn facet_extreme_value<'t>(
--- a/crates/milli/src/search/hybrid.rs
+++ b/crates/milli/src/search/hybrid.rs
@@ -7,7 +7,7 @@ use roaring::RoaringBitmap;
 use crate::score_details::{ScoreDetails, ScoreValue, ScoringStrategy};
 use crate::search::new::{distinct_fid, distinct_single_docid};
 use crate::search::SemanticSearch;
-use crate::vector::SearchQuery;
+use crate::vector::{Embedding, SearchQuery};
 use crate::{Index, MatchingWords, Result, Search, SearchResult};

 struct ScoreWithRatioResult {
@@ -16,6 +16,7 @@ struct ScoreWithRatioResult {
    document_scores: Vec<(u32, ScoreWithRatio)>,
    degraded: bool,
    used_negative_operator: bool,
+    query_vector: Option<Embedding>,
 }

 type ScoreWithRatio = (Vec<ScoreDetails>, f32);
@@ -85,6 +86,7 @@ impl ScoreWithRatioResult {
            document_scores,
            degraded: results.degraded,
            used_negative_operator: results.used_negative_operator,
+            query_vector: results.query_vector,
        }
    }

@@ -186,6 +188,7 @@ impl ScoreWithRatioResult {
                degraded: vector_results.degraded | keyword_results.degraded,
                used_negative_operator: vector_results.used_negative_operator
                    | keyword_results.used_negative_operator,
+                query_vector: vector_results.query_vector,
            },
            semantic_hit_count,
        ))
@@ -209,6 +212,7 @@ impl Search<'_> {
            terms_matching_strategy: self.terms_matching_strategy,
            scoring_strategy: ScoringStrategy::Detailed,
            words_limit: self.words_limit,
+            retrieve_vectors: self.retrieve_vectors,
            exhaustive_number_hits: self.exhaustive_number_hits,
            max_total_hits: self.max_total_hits,
            rtxn: self.rtxn,
@@ -265,7 +269,7 @@ impl Search<'_> {
        };

        search.semantic = Some(SemanticSearch {
-            vector: Some(vector_query),
+            vector: Some(vector_query.clone()),
            embedder_name,
            embedder,
            quantized,
@@ -322,6 +326,7 @@ fn return_keyword_results(
        mut document_scores,
        degraded,
        used_negative_operator,
+        query_vector,
    }: SearchResult,
 ) -> (SearchResult, Option<u32>) {
    let (documents_ids, document_scores) = if offset >= documents_ids.len() ||
@@ -348,6 +353,7 @@ fn return_keyword_results(
            document_scores,
            degraded,
            used_negative_operator,
+            query_vector,
        },
        Some(0),
    )
--- a/crates/milli/src/search/mod.rs
+++ b/crates/milli/src/search/mod.rs
@@ -52,6 +52,7 @@ pub struct Search<'a> {
    terms_matching_strategy: TermsMatchingStrategy,
    scoring_strategy: ScoringStrategy,
    words_limit: usize,
+    retrieve_vectors: bool,
    exhaustive_number_hits: bool,
    max_total_hits: Option<usize>,
    rtxn: &'a heed::RoTxn<'a>,
@@ -75,6 +76,7 @@ impl<'a> Search<'a> {
            geo_param: GeoSortParameter::default(),
            terms_matching_strategy: TermsMatchingStrategy::default(),
            scoring_strategy: Default::default(),
+            retrieve_vectors: false,
            exhaustive_number_hits: false,
            max_total_hits: None,
            words_limit: 10,
@@ -161,6 +163,11 @@ impl<'a> Search<'a> {
        self
    }

+    pub fn retrieve_vectors(&mut self, retrieve_vectors: bool) -> &mut Search<'a> {
+        self.retrieve_vectors = retrieve_vectors;
+        self
+    }
+
    /// Forces the search to exhaustively compute the number of candidates,
    /// this will increase the search time but allows finite pagination.
    pub fn exhaustive_number_hits(&mut self, exhaustive_number_hits: bool) -> &mut Search<'a> {
@@ -233,6 +240,7 @@ impl<'a> Search<'a> {
        }

        let universe = filtered_universe(ctx.index, ctx.txn, &self.filter)?;
+        let mut query_vector = None;
        let PartialSearchResult {
            located_query_terms,
            candidates,
@@ -247,24 +255,29 @@ impl<'a> Search<'a> {
                embedder,
                quantized,
                media: _,
-            }) => execute_vector_search(
-                &mut ctx,
-                vector,
-                self.scoring_strategy,
-                self.exhaustive_number_hits,
-                self.max_total_hits,
-                universe,
-                &self.sort_criteria,
-                &self.distinct,
-                self.geo_param,
-                self.offset,
-                self.limit,
-                embedder_name,
-                embedder,
-                *quantized,
-                self.time_budget.clone(),
-                self.ranking_score_threshold,
-            )?,
+            }) => {
+                if self.retrieve_vectors {
+                    query_vector = Some(vector.clone());
+                }
+                execute_vector_search(
+                    &mut ctx,
+                    vector,
+                    self.scoring_strategy,
+                    self.exhaustive_number_hits,
+                    self.max_total_hits,
+                    universe,
+                    &self.sort_criteria,
+                    &self.distinct,
+                    self.geo_param,
+                    self.offset,
+                    self.limit,
+                    embedder_name,
+                    embedder,
+                    *quantized,
+                    self.time_budget.clone(),
+                    self.ranking_score_threshold,
+                )?
+            }
            _ => execute_search(
                &mut ctx,
                self.query.as_deref(),
@@ -306,6 +319,7 @@ impl<'a> Search<'a> {
            documents_ids,
            degraded,
            used_negative_operator,
+            query_vector,
        })
    }
 }
@@ -324,6 +338,7 @@ impl fmt::Debug for Search<'_> {
            terms_matching_strategy,
            scoring_strategy,
            words_limit,
+            retrieve_vectors,
            exhaustive_number_hits,
            max_total_hits,
            rtxn: _,
@@ -344,6 +359,7 @@ impl fmt::Debug for Search<'_> {
            .field("searchable_attributes", searchable_attributes)
            .field("terms_matching_strategy", terms_matching_strategy)
            .field("scoring_strategy", scoring_strategy)
+            .field("retrieve_vectors", retrieve_vectors)
            .field("exhaustive_number_hits", exhaustive_number_hits)
            .field("max_total_hits", max_total_hits)
            .field("words_limit", words_limit)
@@ -366,6 +382,7 @@ pub struct SearchResult {
    pub document_scores: Vec<Vec<ScoreDetails>>,
    pub degraded: bool,
    pub used_negative_operator: bool,
+    pub query_vector: Option<Embedding>,
 }

 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
--- a/crates/milli/src/search/similar.rs
+++ b/crates/milli/src/search/similar.rs
@@ -130,6 +130,7 @@ impl<'a> Similar<'a> {
            document_scores,
            degraded: false,
            used_negative_operator: false,
+            query_vector: None,
        })
    }
 }
--- a/crates/milli/src/test_index.rs
+++ b/crates/milli/src/test_index.rs
@@ -1097,6 +1097,7 @@ fn bug_3021_fourth() {
        mut documents_ids,
        degraded: _,
        used_negative_operator: _,
+        query_vector: _,
    } = search.execute().unwrap();
    let primary_key_id = index.fields_ids_map(&rtxn).unwrap().id("primary_key").unwrap();
    documents_ids.sort_unstable();
@@ -1338,10 +1339,9 @@ fn vectors_are_never_indexed_as_searchable_or_filterable() {
    assert!(results.candidates.is_empty());

    let mut search = index.search(&rtxn);
-    let results = search
-        .filter(Filter::from_str("_vectors.doggo = 6789").unwrap().unwrap())
-        .execute()
-        .unwrap();
+    let results =
+        dbg!(search.filter(Filter::from_str("_vectors.doggo = 6789").unwrap().unwrap()).execute())
+            .unwrap();
    assert!(results.candidates.is_empty());

    index
--- a/crates/milli/src/update/new/indexer/document_operation.rs
+++ b/crates/milli/src/update/new/indexer/document_operation.rs
@@ -1,11 +1,14 @@
+use std::cell::RefCell;
 use std::sync::atomic::Ordering;
+use std::sync::{OnceLock, RwLock};

 use bumpalo::collections::CollectIn;
 use bumpalo::Bump;
 use bumparaw_collections::RawMap;
 use hashbrown::hash_map::Entry;
-use heed::RoTxn;
+use heed::{RoTxn, WithoutTls};
 use memmap2::Mmap;
+use rayon::iter::{IndexedParallelIterator, IntoParallelIterator, ParallelIterator};
 use rayon::slice::ParallelSlice;
 use rustc_hash::FxBuildHasher;
 use serde_json::value::RawValue;
@@ -20,8 +23,11 @@ use crate::update::new::document::{DocumentContext, Versions};
 use crate::update::new::steps::IndexingStep;
 use crate::update::new::thread_local::MostlySend;
 use crate::update::new::{DocumentIdentifiers, Insertion, Update};
-use crate::update::{AvailableIds, IndexDocumentsMethod};
-use crate::{DocumentId, Error, FieldsIdsMap, Index, InternalError, Result, UserError};
+use crate::update::{ConcurrentAvailableIds, IndexDocumentsMethod};
+use crate::{
+    DocumentId, Error, FieldIdMapWithMetadata, FieldsIdsMap, GlobalFieldsIdsMap, Index,
+    InternalError, MetadataBuilder, Result, UserError,
+};

 #[derive(Default)]
 pub struct DocumentOperation<'pl> {
@@ -62,90 +68,108 @@ impl<'pl> DocumentOperation<'pl> {

    #[allow(clippy::too_many_arguments)]
    #[tracing::instrument(level = "trace", skip_all, target = "indexing::document_operation")]
-    pub fn into_changes<MSP>(
+    pub fn into_changes<'e: 'pl, MSP>(
        self,
        indexer: &'pl Bump,
-        index: &Index,
-        rtxn: &'pl RoTxn<'pl>,
+        index: &'e Index,
+        rtxn: &'pl RoTxn<'pl, WithoutTls>,
        primary_key_from_op: Option<&'pl str>,
-        new_fields_ids_map: &mut FieldsIdsMap,
+        new_fid_map: &mut FieldsIdsMap,
        must_stop_processing: &MSP,
        progress: Progress,
    ) -> Result<(DocumentOperationChanges<'pl>, Vec<PayloadStats>, Option<PrimaryKey<'pl>>)>
    where
-        MSP: Fn() -> bool,
+        MSP: Fn() -> bool + Sync,
    {
        progress.update_progress(IndexingStep::PreparingPayloads);
        let Self { operations } = self;

+        let metadata_builder = MetadataBuilder::from_index(index, rtxn)?;
+        let fid_map_with_meta = FieldIdMapWithMetadata::new(new_fid_map.clone(), metadata_builder);
+        let global = RwLock::new(fid_map_with_meta);
+        let gfid_map = GlobalFieldsIdsMap::new(&global);
+
        let documents_ids = index.documents_ids(rtxn)?;
-        let mut operations_stats = Vec::new();
-        let mut available_docids = AvailableIds::new(&documents_ids);
-        let mut docids_version_offsets = hashbrown::HashMap::new();
-        let mut primary_key = None;
+        let available_docids = ConcurrentAvailableIds::new(documents_ids);
+        let docids_version_offsets = papaya::HashMap::new();
+        // let mut docids_version_offsets = hashbrown::HashMap::new();
+        let primary_key = OnceLock::new();

        let payload_count = operations.len();
        let (step, progress_step) = AtomicPayloadStep::new(payload_count as u32);
        progress.update_progress(progress_step);

-        for (payload_index, operation) in operations.into_iter().enumerate() {
-            if must_stop_processing() {
-                return Err(InternalError::AbortedIndexation.into());
-            }
-            step.store(payload_index as u32, Ordering::Relaxed);
-
-            let mut bytes = 0;
-            let result = match operation {
-                Payload::Replace(payload) => extract_addition_payload_changes(
-                    indexer,
-                    index,
-                    rtxn,
-                    primary_key_from_op,
-                    &mut primary_key,
-                    new_fields_ids_map,
-                    &mut available_docids,
-                    &mut bytes,
-                    &docids_version_offsets,
-                    IndexDocumentsMethod::ReplaceDocuments,
-                    payload,
-                ),
-                Payload::Update(payload) => extract_addition_payload_changes(
-                    indexer,
-                    index,
-                    rtxn,
-                    primary_key_from_op,
-                    &mut primary_key,
-                    new_fields_ids_map,
-                    &mut available_docids,
-                    &mut bytes,
-                    &docids_version_offsets,
-                    IndexDocumentsMethod::UpdateDocuments,
-                    payload,
-                ),
-                Payload::Deletion(to_delete) => extract_deletion_payload_changes(
-                    index,
-                    rtxn,
-                    &mut available_docids,
-                    &docids_version_offsets,
-                    to_delete,
-                ),
-            };
-
-            let mut document_count = 0;
-            let error = match result {
-                Ok(new_docids_version_offsets) => {
-                    document_count = new_docids_version_offsets.len() as u64;
-                    // If we don't have any error then we can merge the content of this payload
-                    // into to main payload. Else we just drop this payload extraction.
-                    merge_version_offsets(&mut docids_version_offsets, new_docids_version_offsets);
-                    None
+        let long_txn_id = rtxn.id();
+        let operations_stats = operations
+            .into_par_iter()
+            .enumerate()
+            .map(|(payload_index, operation)| {
+                if must_stop_processing() {
+                    return Err(InternalError::AbortedIndexation.into());
                }
-                Err(Error::UserError(user_error)) => Some(user_error),
-                Err(e) => return Err(e),
-            };
-            operations_stats.push(PayloadStats { document_count, bytes, error });
-        }
-        step.store(payload_count as u32, Ordering::Relaxed);
+                step.fetch_add(1, Ordering::Relaxed);
+
+                let short = index.read_txn()?;
+                // SAFETY: The long_txn_id comes from the main rtxn and the long lifetime is the one of the long rtxn.
+                let rtxn: &'pl RoTxn<'e, _> = unsafe { extend_rtxn_lifetime(long_txn_id, &short) };
+                let indexer = bumpalo::Bump::new();
+                let mut gfid_map = gfid_map.clone();
+
+                let mut bytes = 0;
+                let result = match operation {
+                    Payload::Replace(payload) => extract_addition_payload_changes(
+                        &indexer,
+                        index,
+                        &rtxn,
+                        primary_key_from_op,
+                        &primary_key,
+                        &mut gfid_map,
+                        &available_docids,
+                        &mut bytes,
+                        &docids_version_offsets,
+                        IndexDocumentsMethod::ReplaceDocuments,
+                        payload,
+                    ),
+                    Payload::Update(payload) => extract_addition_payload_changes(
+                        &indexer,
+                        index,
+                        &rtxn,
+                        primary_key_from_op,
+                        &primary_key,
+                        &mut gfid_map,
+                        &available_docids,
+                        &mut bytes,
+                        &docids_version_offsets,
+                        IndexDocumentsMethod::UpdateDocuments,
+                        payload,
+                    ),
+                    Payload::Deletion(to_delete) => extract_deletion_payload_changes(
+                        index,
+                        &rtxn,
+                        &available_docids,
+                        &docids_version_offsets,
+                        to_delete,
+                    ),
+                };
+
+                match result {
+                    Ok(new_docids_version_offsets) => {
+                        let document_count = new_docids_version_offsets.len() as u64;
+                        // If we don't have any error then we can merge the content of this payload
+                        // into to main payload. Else we just drop this payload extraction.
+                        merge_version_offsets(
+                            &mut docids_version_offsets,
+                            new_docids_version_offsets,
+                        );
+                        Ok(PayloadStats { document_count, bytes, error: None })
+                    }
+                    Err(Error::UserError(user_error)) => {
+                        Ok(PayloadStats { document_count: 0, bytes, error: Some(user_error) })
+                    }
+                    Err(e) => Err(e),
+                }
+            })
+            .collect::<Result<Vec<_>>>()?;

        // TODO We must drain the HashMap into a Vec because rayon::hash_map::IntoIter: !Clone
        let mut docids_version_offsets: bumpalo::collections::vec::Vec<_> =
@@ -157,21 +181,25 @@ impl<'pl> DocumentOperation<'pl> {
            .sort_unstable_by_key(|(_, po)| first_update_pointer(&po.operations).unwrap_or(0));

        let docids_version_offsets = docids_version_offsets.into_bump_slice();
-        Ok((DocumentOperationChanges { docids_version_offsets }, operations_stats, primary_key))
+        Ok((
+            DocumentOperationChanges { docids_version_offsets },
+            operations_stats,
+            primary_key.into_inner(),
+        ))
    }
 }

 #[allow(clippy::too_many_arguments)]
 fn extract_addition_payload_changes<'r, 'pl: 'r>(
-    indexer: &'pl Bump,
+    indexer: &Bump,
    index: &Index,
    rtxn: &'r RoTxn<'r>,
    primary_key_from_op: Option<&'r str>,
-    primary_key: &mut Option<PrimaryKey<'r>>,
-    new_fields_ids_map: &mut FieldsIdsMap,
-    available_docids: &mut AvailableIds,
+    primary_key: &OnceLock<PrimaryKey<'r>>,
+    new_fields_ids_map: &mut GlobalFieldsIdsMap,
+    available_docids: &ConcurrentAvailableIds,
    bytes: &mut u64,
-    main_docids_version_offsets: &hashbrown::HashMap<&'pl str, PayloadOperations<'pl>>,
+    main_docids_version_offsets: &papaya::HashMap<&'pl str, PayloadOperations<'pl>>,
    method: IndexDocumentsMethod,
    payload: &'pl [u8],
 ) -> Result<hashbrown::HashMap<&'pl str, PayloadOperations<'pl>>> {
@@ -204,10 +232,10 @@ fn extract_addition_payload_changes<'r, 'pl: 'r>(
                Err(error) => return Err(error),
            };

-            primary_key.get_or_insert(pk)
+            primary_key.get_or_init(|| pk)
        } else {
            // primary key was retrieved in the first iteration or in a previous payload
-            primary_key.as_ref().unwrap()
+            primary_key.get().unwrap()
        };

        let external_id =
@@ -313,12 +341,12 @@ fn extract_addition_payload_changes<'r, 'pl: 'r>(
        );
        match result {
            Ok(Ok((pk, _))) => {
-                primary_key.get_or_insert(pk);
+                primary_key.get_or_init(|| pk);
            }
            Ok(Err(UserError::NoPrimaryKeyCandidateFound)) => (),
            Ok(Err(user_error)) => return Err(Error::UserError(user_error)),
            Err(error) => return Err(error),
-        };
+        }
    }

    Ok(new_docids_version_offsets)
@@ -327,7 +355,7 @@ fn extract_addition_payload_changes<'r, 'pl: 'r>(
 fn extract_deletion_payload_changes<'s, 'pl: 's>(
    index: &Index,
    rtxn: &RoTxn,
-    available_docids: &mut AvailableIds,
+    available_docids: &ConcurrentAvailableIds,
    main_docids_version_offsets: &hashbrown::HashMap<&'s str, PayloadOperations<'pl>>,
    to_delete: &'pl [&'pl str],
 ) -> Result<hashbrown::HashMap<&'s str, PayloadOperations<'pl>>> {
@@ -612,3 +640,21 @@ pub fn first_update_pointer(docops: &[InnerDocOp]) -> Option<usize> {
        InnerDocOp::Deletion => None,
    })
 }
+
+/// Extends the lifetime of a read-only transaction.
+/// The `long_txn_id` transaction ID must come from a call to the `RoTxn::id` method.
+///
+/// ## Panics
+///
+/// Panics if the long transaction ID does not match the transaction ID of the short transaction.
+pub unsafe fn extend_rtxn_lifetime<'e, 'long, 'short>(
+    long_txn_id: usize,
+    short: &'short RoTxn<'e, WithoutTls>,
+) -> &'long RoTxn<'e, WithoutTls> {
+    assert_eq!(
+        long_txn_id,
+        short.id(),
+        "Lifetimes can only be extended if they have the same transaction ID"
+    );
+    unsafe { std::mem::transmute(short) }
+}
--- a/crates/milli/src/update/new/indexer/guess_primary_key.rs
+++ b/crates/milli/src/update/new/indexer/guess_primary_key.rs
@@ -4,7 +4,7 @@ use rustc_hash::FxBuildHasher;

 use crate::documents::{PrimaryKey, DEFAULT_PRIMARY_KEY};
 use crate::update::new::StdResult;
-use crate::{FieldsIdsMap, Index, Result, UserError};
+use crate::{FieldsIdsMap, GlobalFieldsIdsMap, Index, Result, UserError};

 /// Returns the primary key that has already been set for this index or the
 /// one we will guess by searching for the first key that contains "id" as a substring,
@@ -12,7 +12,7 @@ use crate::{FieldsIdsMap, Index, Result, UserError};
 pub fn retrieve_or_guess_primary_key<'a>(
    rtxn: &'a RoTxn<'a>,
    index: &Index,
-    new_fields_ids_map: &mut FieldsIdsMap,
+    new_fields_ids_map: &mut GlobalFieldsIdsMap, // Use a &mut Mapper: MutFieldIdMapper
    primary_key_from_op: Option<&'a str>,
    first_document: Option<RawMap<'a, FxBuildHasher>>,
 ) -> Result<StdResult<(PrimaryKey<'a>, bool), UserError>> {
@@ -47,7 +47,7 @@ pub fn retrieve_or_guess_primary_key<'a>(
            let guesses: Result<Vec<&str>> = first_document
                .keys()
                .filter_map(|name| {
-                    let Some(_) = new_fields_ids_map.insert(name) else {
+                    let Some(_) = new_fields_ids_map.id_or_insert(name) else {
                        return Some(Err(UserError::AttributeLimitReached.into()));
                    };
                    name.to_lowercase().ends_with(DEFAULT_PRIMARY_KEY).then_some(Ok(name))
--- a/crates/milli/src/vector/db.rs
+++ b/crates/milli/src/vector/db.rs
@@ -128,6 +128,7 @@ impl EmbeddingStatus {
    pub fn is_user_provided(&self, docid: DocumentId) -> bool {
        self.user_provided.contains(docid)
    }
+
    /// Whether vectors should be regenerated for that document and that embedder.
    pub fn must_regenerate(&self, docid: DocumentId) -> bool {
        let invert = self.skip_regenerate_different_from_user_provided.contains(docid);
--- a/crates/milli/src/vector/mod.rs
+++ b/crates/milli/src/vector/mod.rs
@@ -556,9 +556,6 @@ impl ArroyWrapper {
            for reader in self.readers(rtxn, self.quantized_db()) {
                let reader = reader?;
                let documents = reader.item_ids();
-                if documents.is_empty() {
-                    break;
-                }
                stats.documents |= documents;
                stats.number_of_embeddings += documents.len();
            }
@@ -566,9 +563,6 @@ impl ArroyWrapper {
            for reader in self.readers(rtxn, self.angular_db()) {
                let reader = reader?;
                let documents = reader.item_ids();
-                if documents.is_empty() {
-                    break;
-                }
                stats.documents |= documents;
                stats.number_of_embeddings += documents.len();
            }
Author	SHA1	Message	Date
Clément Renault	c4cedb5390	WIP: Introducing papaya	2025-08-19 12:10:03 +02:00
Clément Renault	0b40892eca	WIP: Ooops, I did some unsafe	2025-08-19 11:49:30 +02:00
Clément Renault	7ddd8a2b66	WIP: I need multiple rtxns and will probably use a bit of unsafe	2025-08-14 17:34:52 +02:00
Clément Renault	2addedf98a	WIP: parallelizing	2025-08-14 15:16:26 +02:00
Tamo	a608e57c3c	Merge pull request #5741 from meilisearch/fragment-filters Vector filters	2025-08-13 16:39:02 +00:00
Mubelotix	398efa3c55	Merge branch 'main' into fragment-filters	2025-08-13 17:27:09 +02:00
Mubelotix	307ea38c2a	Remove old irrelevant tests	2025-08-13 17:19:37 +02:00
Mubelotix	cdeca59587	Add error message for quoting errors	2025-08-13 17:14:36 +02:00
Mubelotix	8529e2161a	Clarify more errors	2025-08-13 13:37:19 +02:00
Mubelotix	b80869f2be	Add two other "did you mean" messages	2025-08-13 13:16:25 +02:00
Mubelotix	666ae1a3e7	Add "did you mean" message	2025-08-13 13:00:38 +02:00
Mubelotix	f6559258ce	Improve operation error on vector filters	2025-08-13 10:32:28 +02:00
Mubelotix	b5ba0e42b3	Add new error	2025-08-13 09:58:16 +02:00
Mubelotix	300f5ce0f4	Merge pull request #5778 from meilisearch/retrieve-query-vectors Return query vector	2025-08-13 07:39:15 +00:00
Mubelotix	3f20c1aa5d	Merge branch 'main' into retrieve-query-vectors	2025-08-11 13:01:27 +02:00
Mubelotix	5df125cbb7	Format	2025-08-07 09:31:05 +02:00
Mubelotix	74992560b0	Simplify conditions	2025-08-07 09:28:45 +02:00
Mubelotix	c385cf985b	Fix tests	2025-08-05 15:55:31 +02:00
Mubelotix	2121819c66	Fix tests	2025-08-05 14:18:45 +02:00
Mubelotix	2f33cd5f0a	Merge branch 'main' into fragment-filters	2025-08-05 14:05:15 +02:00
Mubelotix	2f5101a1e4	Merge branch 'main' into retrieve-query-vectors	2025-08-05 14:02:25 +02:00
Mubelotix	be045a7636	Merge branch 'release-v1.16.0' into fragment-filters	2025-08-01 09:04:12 +02:00
Mubelotix	60acdf8574	Fix grammar Co-Authored-By: Louis Dureuil <louis.dureuil@xinra.net>	2025-07-29 11:05:16 +02:00
Mubelotix	93864009cc	Rename variable with typo Co-Authored-By: Louis Dureuil <louis.dureuil@xinra.net>	2025-07-29 11:04:08 +02:00
Mubelotix	223df5a433	Remove incorrect break	2025-07-29 11:02:59 +02:00
Mubelotix	3580b3a4ef	Remove userProvided from fragments	2025-07-29 10:56:54 +02:00
Mubelotix	66b6e47494	Remove warning	2025-07-29 10:52:21 +02:00
Mubelotix	6c3dd83ae5	Fix old test	2025-07-29 09:03:48 +02:00
Mubelotix	10567b150c	Continue updating tests	2025-07-25 14:25:35 +02:00
Mubelotix	a439f57d70	Update tests	2025-07-25 13:41:31 +02:00
Mubelotix	d243504296	Improve test	2025-07-25 11:58:34 +02:00
Mubelotix	a7fe2abca4	Implement for multi-search	2025-07-25 11:45:51 +02:00
Mubelotix	26da478b5b	Add query vector to response	2025-07-24 17:27:49 +02:00
Mubelotix	13d38d59bf	Remove useless import	2025-07-24 15:44:11 +02:00
Mubelotix	4264abda23	Remove debugs	2025-07-24 15:30:36 +02:00
Mubelotix	dbb670a9ee	Remove old split function	2025-07-24 15:28:58 +02:00
Mubelotix	a92e36ab83	Small improvements	2025-07-24 15:28:17 +02:00
Mubelotix	ad06828685	Add tests on parser	2025-07-24 15:24:42 +02:00
Mubelotix	8f1b697b91	Format	2025-07-24 14:57:06 +02:00
Mubelotix	bb4d573862	Switch to a nom parser	2025-07-24 14:56:35 +02:00
Mubelotix	aa5a1f333a	Refactor to support less combinations	2025-07-23 15:33:17 +02:00
Mubelotix	776e55d209	Improve code readability	2025-07-22 11:37:21 +02:00
Mubelotix	3362fb8476	Remove print	2025-07-22 11:21:06 +02:00
Mubelotix	6d93b36279	Format	2025-07-22 11:18:41 +02:00
Mubelotix	982e989886	Test regenerate filter	2025-07-22 11:10:05 +02:00
Mubelotix	0014ed3114	Apply review suggestions	2025-07-22 10:56:05 +02:00
Mubelotix	ab07e9480e	Resolve post-merge issues	2025-07-21 18:22:10 +02:00
Mubelotix	00e957051e	Merge remote-tracking branch 'origin/release-v1.16.0' into fragment-filters	2025-07-21 18:19:45 +02:00
Mubelotix	f244439b4f	Revert "Format" This reverts commit `30fd546c12`.	2025-07-10 16:43:45 +02:00
Mubelotix	30fd546c12	Format	2025-07-10 16:43:10 +02:00
Mubelotix	a930977460	Fix test	2025-07-10 09:37:58 +02:00
Mubelotix	a3b8c2b71f	Gate behind multimodal experimental feature	2025-07-09 18:21:52 +02:00
Mubelotix	39f808714d	Implement a documentTemplate filter	2025-07-09 18:03:32 +02:00
Mubelotix	8adf6141e0	Fix old test	2025-07-08 16:55:43 +02:00
Mubelotix	df3f282e4d	Merge branch 'request-fragments-test' into fragment-filters	2025-07-08 16:35:14 +02:00
Mubelotix	d81855015b	Add test	2025-07-08 16:23:45 +02:00
Mubelotix	feb53104e5	Grammar	2025-07-08 16:19:55 +02:00
Mubelotix	881c37393f	Add telemetry	2025-07-08 16:06:27 +02:00
Mubelotix	9e98a25e45	Fix clippy	2025-07-08 15:56:09 +02:00
Mubelotix	fb73b83abe	Fix performance	2025-07-08 12:14:34 +02:00
Mubelotix	29b74424ad	Clean code	2025-07-08 12:03:32 +02:00
Mubelotix	b4cafec8b3	Add tests for operators along vector filter	2025-07-08 11:56:19 +02:00
Mubelotix	d43cd40807	Split tests	2025-07-08 11:48:23 +02:00
Mubelotix	0301d8f239	Improve error handling	2025-07-08 11:39:10 +02:00
Mubelotix	2d45124d9b	Fix parsing	2025-07-08 10:01:50 +02:00
Mubelotix	40e7284d70	Add tests	2025-07-08 10:01:35 +02:00
Mubelotix	4d8d34cc93	Merge branch 'request-fragments-test' into fragment-filters	2025-07-07 18:45:34 +02:00
Mubelotix	5cced0af02	Prevent having both a fragment name and userProvided	2025-07-07 18:41:03 +02:00
Mubelotix	9c60e9689f	Support not specifying an embedder in the vector filter	2025-07-07 18:34:24 +02:00
Mubelotix	2052537681	Implement core filter logic	2025-07-07 15:28:35 +02:00
Mubelotix	a9bb64c55a	Unrelated minor fixes	2025-07-07 15:28:10 +02:00