mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-09-06 20:56:31 +00:00
Merge pull request #5741 from meilisearch/fragment-filters
Vector filters
This commit is contained in:
1
Cargo.lock
generated
1
Cargo.lock
generated
@ -2031,6 +2031,7 @@ name = "filter-parser"
|
|||||||
version = "1.17.1"
|
version = "1.17.1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"insta",
|
"insta",
|
||||||
|
"levenshtein_automata",
|
||||||
"nom",
|
"nom",
|
||||||
"nom_locate",
|
"nom_locate",
|
||||||
"unescaper",
|
"unescaper",
|
||||||
|
@ -15,6 +15,7 @@ license.workspace = true
|
|||||||
nom = "7.1.3"
|
nom = "7.1.3"
|
||||||
nom_locate = "4.2.0"
|
nom_locate = "4.2.0"
|
||||||
unescaper = "0.1.6"
|
unescaper = "0.1.6"
|
||||||
|
levenshtein_automata = { version = "0.2.1", features = ["fst_automaton"] }
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
# fixed version due to format breakages in v1.40
|
# fixed version due to format breakages in v1.40
|
||||||
|
@ -7,11 +7,22 @@
|
|||||||
|
|
||||||
use nom::branch::alt;
|
use nom::branch::alt;
|
||||||
use nom::bytes::complete::tag;
|
use nom::bytes::complete::tag;
|
||||||
|
use nom::character::complete::char;
|
||||||
|
use nom::character::complete::multispace0;
|
||||||
use nom::character::complete::multispace1;
|
use nom::character::complete::multispace1;
|
||||||
use nom::combinator::cut;
|
use nom::combinator::cut;
|
||||||
|
use nom::combinator::map;
|
||||||
|
use nom::combinator::value;
|
||||||
|
use nom::sequence::preceded;
|
||||||
use nom::sequence::{terminated, tuple};
|
use nom::sequence::{terminated, tuple};
|
||||||
use Condition::*;
|
use Condition::*;
|
||||||
|
|
||||||
|
use crate::error::IResultExt;
|
||||||
|
use crate::value::parse_vector_value;
|
||||||
|
use crate::value::parse_vector_value_cut;
|
||||||
|
use crate::Error;
|
||||||
|
use crate::ErrorKind;
|
||||||
|
use crate::VectorFilter;
|
||||||
use crate::{parse_value, FilterCondition, IResult, Span, Token};
|
use crate::{parse_value, FilterCondition, IResult, Span, Token};
|
||||||
|
|
||||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||||
@ -113,6 +124,83 @@ pub fn parse_not_exists(input: Span) -> IResult<FilterCondition> {
|
|||||||
Ok((input, FilterCondition::Not(Box::new(FilterCondition::Condition { fid: key, op: Exists }))))
|
Ok((input, FilterCondition::Not(Box::new(FilterCondition::Condition { fid: key, op: Exists }))))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn parse_vectors(input: Span) -> IResult<(Token, Option<Token>, VectorFilter<'_>)> {
|
||||||
|
let (input, _) = multispace0(input)?;
|
||||||
|
let (input, fid) = tag("_vectors")(input)?;
|
||||||
|
|
||||||
|
if let Ok((input, _)) = multispace1::<_, crate::Error>(input) {
|
||||||
|
return Ok((input, (Token::from(fid), None, VectorFilter::None)));
|
||||||
|
}
|
||||||
|
|
||||||
|
let (input, _) = char('.')(input)?;
|
||||||
|
|
||||||
|
// From this point, we are certain this is a vector filter, so our errors must be final.
|
||||||
|
// We could use nom's `cut` but it's better to be explicit about the errors
|
||||||
|
|
||||||
|
if let Ok((_, space)) = tag::<_, _, ()>(" ")(input) {
|
||||||
|
return Err(crate::Error::failure_from_kind(space, ErrorKind::VectorFilterMissingEmbedder));
|
||||||
|
}
|
||||||
|
|
||||||
|
let (input, embedder_name) =
|
||||||
|
parse_vector_value_cut(input, ErrorKind::VectorFilterInvalidEmbedder)?;
|
||||||
|
|
||||||
|
let (input, filter) = alt((
|
||||||
|
map(
|
||||||
|
preceded(tag(".fragments"), |input| {
|
||||||
|
let (input, _) = tag(".")(input).map_cut(ErrorKind::VectorFilterMissingFragment)?;
|
||||||
|
parse_vector_value_cut(input, ErrorKind::VectorFilterInvalidFragment)
|
||||||
|
}),
|
||||||
|
VectorFilter::Fragment,
|
||||||
|
),
|
||||||
|
value(VectorFilter::UserProvided, tag(".userProvided")),
|
||||||
|
value(VectorFilter::DocumentTemplate, tag(".documentTemplate")),
|
||||||
|
value(VectorFilter::Regenerate, tag(".regenerate")),
|
||||||
|
value(VectorFilter::None, nom::combinator::success("")),
|
||||||
|
))(input)?;
|
||||||
|
|
||||||
|
if let Ok((input, point)) = tag::<_, _, ()>(".")(input) {
|
||||||
|
let opt_value = parse_vector_value(input).ok().map(|(_, v)| v);
|
||||||
|
let value =
|
||||||
|
opt_value.as_ref().map(|v| v.value().to_owned()).unwrap_or_else(|| point.to_string());
|
||||||
|
let context = opt_value.map(|v| v.original_span()).unwrap_or(point);
|
||||||
|
let previous_kind = match filter {
|
||||||
|
VectorFilter::Fragment(_) => Some("fragments"),
|
||||||
|
VectorFilter::DocumentTemplate => Some("documentTemplate"),
|
||||||
|
VectorFilter::UserProvided => Some("userProvided"),
|
||||||
|
VectorFilter::Regenerate => Some("regenerate"),
|
||||||
|
VectorFilter::None => None,
|
||||||
|
};
|
||||||
|
return Err(Error::failure_from_kind(
|
||||||
|
context,
|
||||||
|
ErrorKind::VectorFilterUnknownSuffix(previous_kind, value),
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
let (input, _) = multispace1(input).map_cut(ErrorKind::VectorFilterLeftover)?;
|
||||||
|
|
||||||
|
Ok((input, (Token::from(fid), Some(embedder_name), filter)))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// vectors_exists = vectors ("EXISTS" | ("NOT" WS+ "EXISTS"))
|
||||||
|
pub fn parse_vectors_exists(input: Span) -> IResult<FilterCondition> {
|
||||||
|
let (input, (fid, embedder, filter)) = parse_vectors(input)?;
|
||||||
|
|
||||||
|
// Try parsing "EXISTS" first
|
||||||
|
if let Ok((input, _)) = tag::<_, _, ()>("EXISTS")(input) {
|
||||||
|
return Ok((input, FilterCondition::VectorExists { fid, embedder, filter }));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try parsing "NOT EXISTS"
|
||||||
|
if let Ok((input, _)) = tuple::<_, _, (), _>((tag("NOT"), multispace1, tag("EXISTS")))(input) {
|
||||||
|
return Ok((
|
||||||
|
input,
|
||||||
|
FilterCondition::Not(Box::new(FilterCondition::VectorExists { fid, embedder, filter })),
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
Err(crate::Error::failure_from_kind(input, ErrorKind::VectorFilterOperation))
|
||||||
|
}
|
||||||
|
|
||||||
/// contains = value "CONTAINS" value
|
/// contains = value "CONTAINS" value
|
||||||
pub fn parse_contains(input: Span) -> IResult<FilterCondition> {
|
pub fn parse_contains(input: Span) -> IResult<FilterCondition> {
|
||||||
let (input, (fid, contains, value)) =
|
let (input, (fid, contains, value)) =
|
||||||
|
@ -42,6 +42,23 @@ pub fn cut_with_err<'a, O>(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub trait IResultExt<'a> {
|
||||||
|
fn map_cut(self, kind: ErrorKind<'a>) -> Self;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a, T> IResultExt<'a> for IResult<'a, T> {
|
||||||
|
fn map_cut(self, kind: ErrorKind<'a>) -> Self {
|
||||||
|
self.map_err(move |e: nom::Err<Error<'a>>| {
|
||||||
|
let input = match e {
|
||||||
|
nom::Err::Incomplete(_) => return e,
|
||||||
|
nom::Err::Error(e) => *e.context(),
|
||||||
|
nom::Err::Failure(e) => *e.context(),
|
||||||
|
};
|
||||||
|
Error::failure_from_kind(input, kind)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct Error<'a> {
|
pub struct Error<'a> {
|
||||||
context: Span<'a>,
|
context: Span<'a>,
|
||||||
@ -61,6 +78,14 @@ pub enum ErrorKind<'a> {
|
|||||||
GeoBoundingBox,
|
GeoBoundingBox,
|
||||||
MisusedGeoRadius,
|
MisusedGeoRadius,
|
||||||
MisusedGeoBoundingBox,
|
MisusedGeoBoundingBox,
|
||||||
|
VectorFilterLeftover,
|
||||||
|
VectorFilterInvalidQuotes,
|
||||||
|
VectorFilterMissingEmbedder,
|
||||||
|
VectorFilterInvalidEmbedder,
|
||||||
|
VectorFilterMissingFragment,
|
||||||
|
VectorFilterInvalidFragment,
|
||||||
|
VectorFilterUnknownSuffix(Option<&'static str>, String),
|
||||||
|
VectorFilterOperation,
|
||||||
InvalidPrimary,
|
InvalidPrimary,
|
||||||
InvalidEscapedNumber,
|
InvalidEscapedNumber,
|
||||||
ExpectedEof,
|
ExpectedEof,
|
||||||
@ -91,6 +116,10 @@ impl<'a> Error<'a> {
|
|||||||
Self { context, kind }
|
Self { context, kind }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn failure_from_kind(context: Span<'a>, kind: ErrorKind<'a>) -> nom::Err<Self> {
|
||||||
|
nom::Err::Failure(Self::new_from_kind(context, kind))
|
||||||
|
}
|
||||||
|
|
||||||
pub fn new_from_external(context: Span<'a>, error: impl std::error::Error) -> Self {
|
pub fn new_from_external(context: Span<'a>, error: impl std::error::Error) -> Self {
|
||||||
Self::new_from_kind(context, ErrorKind::External(error.to_string()))
|
Self::new_from_kind(context, ErrorKind::External(error.to_string()))
|
||||||
}
|
}
|
||||||
@ -128,6 +157,20 @@ impl Display for Error<'_> {
|
|||||||
// first line being the diagnostic and the second line being the incriminated filter.
|
// first line being the diagnostic and the second line being the incriminated filter.
|
||||||
let escaped_input = input.escape_debug();
|
let escaped_input = input.escape_debug();
|
||||||
|
|
||||||
|
fn key_suggestion<'a>(key: &str, keys: &[&'a str]) -> Option<&'a str> {
|
||||||
|
let typos =
|
||||||
|
levenshtein_automata::LevenshteinAutomatonBuilder::new(2, true).build_dfa(key);
|
||||||
|
for key in keys.iter() {
|
||||||
|
match typos.eval(key) {
|
||||||
|
levenshtein_automata::Distance::Exact(_) => {
|
||||||
|
return Some(key);
|
||||||
|
}
|
||||||
|
levenshtein_automata::Distance::AtLeast(_) => continue,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
None
|
||||||
|
}
|
||||||
|
|
||||||
match &self.kind {
|
match &self.kind {
|
||||||
ErrorKind::ExpectedValue(_) if input.trim().is_empty() => {
|
ErrorKind::ExpectedValue(_) if input.trim().is_empty() => {
|
||||||
writeln!(f, "Was expecting a value but instead got nothing.")?
|
writeln!(f, "Was expecting a value but instead got nothing.")?
|
||||||
@ -169,6 +212,44 @@ impl Display for Error<'_> {
|
|||||||
ErrorKind::MisusedGeoBoundingBox => {
|
ErrorKind::MisusedGeoBoundingBox => {
|
||||||
writeln!(f, "The `_geoBoundingBox` filter is an operation and can't be used as a value.")?
|
writeln!(f, "The `_geoBoundingBox` filter is an operation and can't be used as a value.")?
|
||||||
}
|
}
|
||||||
|
ErrorKind::VectorFilterLeftover => {
|
||||||
|
writeln!(f, "The vector filter has leftover tokens.")?
|
||||||
|
}
|
||||||
|
ErrorKind::VectorFilterUnknownSuffix(_, value) if value.as_str() == "." => {
|
||||||
|
writeln!(f, "Was expecting one of `.fragments`, `.userProvided`, `.documentTemplate`, `.regenerate` or nothing, but instead found a point without a valid value.")?;
|
||||||
|
}
|
||||||
|
ErrorKind::VectorFilterUnknownSuffix(None, value) if ["fragments", "userProvided", "documentTemplate", "regenerate"].contains(&value.as_str()) => {
|
||||||
|
// This will happen with "_vectors.rest.\"userProvided\"" for instance
|
||||||
|
writeln!(f, "Was expecting this part to be unquoted.")?
|
||||||
|
}
|
||||||
|
ErrorKind::VectorFilterUnknownSuffix(None, value) => {
|
||||||
|
if let Some(suggestion) = key_suggestion(value, &["fragments", "userProvided", "documentTemplate", "regenerate"]) {
|
||||||
|
writeln!(f, "Was expecting one of `fragments`, `userProvided`, `documentTemplate`, `regenerate` or nothing, but instead found `{value}`. Did you mean `{suggestion}`?")?;
|
||||||
|
} else {
|
||||||
|
writeln!(f, "Was expecting one of `fragments`, `userProvided`, `documentTemplate`, `regenerate` or nothing, but instead found `{value}`.")?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ErrorKind::VectorFilterUnknownSuffix(Some(previous_filter_kind), value) => {
|
||||||
|
writeln!(f, "Vector filter can only accept one of `fragments`, `userProvided`, `documentTemplate` or `regenerate`, but found both `{previous_filter_kind}` and `{value}`.")?
|
||||||
|
},
|
||||||
|
ErrorKind::VectorFilterInvalidFragment => {
|
||||||
|
writeln!(f, "The vector filter's fragment name is invalid.")?
|
||||||
|
}
|
||||||
|
ErrorKind::VectorFilterMissingFragment => {
|
||||||
|
writeln!(f, "The vector filter is missing a fragment name.")?
|
||||||
|
}
|
||||||
|
ErrorKind::VectorFilterMissingEmbedder => {
|
||||||
|
writeln!(f, "Was expecting embedder name but found nothing.")?
|
||||||
|
}
|
||||||
|
ErrorKind::VectorFilterInvalidEmbedder => {
|
||||||
|
writeln!(f, "The vector filter's embedder name is invalid.")?
|
||||||
|
}
|
||||||
|
ErrorKind::VectorFilterOperation => {
|
||||||
|
writeln!(f, "Was expecting an operation like `EXISTS` or `NOT EXISTS` after the vector filter.")?
|
||||||
|
}
|
||||||
|
ErrorKind::VectorFilterInvalidQuotes => {
|
||||||
|
writeln!(f, "The quotes in one of the values are inconsistent.")?
|
||||||
|
}
|
||||||
ErrorKind::ReservedKeyword(word) => {
|
ErrorKind::ReservedKeyword(word) => {
|
||||||
writeln!(f, "`{word}` is a reserved keyword and thus cannot be used as a field name unless it is put inside quotes. Use \"{word}\" or \'{word}\' instead.")?
|
writeln!(f, "`{word}` is a reserved keyword and thus cannot be used as a field name unless it is put inside quotes. Use \"{word}\" or \'{word}\' instead.")?
|
||||||
}
|
}
|
||||||
|
@ -65,6 +65,9 @@ use nom_locate::LocatedSpan;
|
|||||||
pub(crate) use value::parse_value;
|
pub(crate) use value::parse_value;
|
||||||
use value::word_exact;
|
use value::word_exact;
|
||||||
|
|
||||||
|
use crate::condition::parse_vectors_exists;
|
||||||
|
use crate::error::IResultExt;
|
||||||
|
|
||||||
pub type Span<'a> = LocatedSpan<&'a str, &'a str>;
|
pub type Span<'a> = LocatedSpan<&'a str, &'a str>;
|
||||||
|
|
||||||
type IResult<'a, Ret> = nom::IResult<Span<'a>, Ret, Error<'a>>;
|
type IResult<'a, Ret> = nom::IResult<Span<'a>, Ret, Error<'a>>;
|
||||||
@ -136,6 +139,15 @@ impl<'a> From<&'a str> for Token<'a> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||||
|
pub enum VectorFilter<'a> {
|
||||||
|
Fragment(Token<'a>),
|
||||||
|
DocumentTemplate,
|
||||||
|
UserProvided,
|
||||||
|
Regenerate,
|
||||||
|
None,
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||||
pub enum FilterCondition<'a> {
|
pub enum FilterCondition<'a> {
|
||||||
Not(Box<Self>),
|
Not(Box<Self>),
|
||||||
@ -143,6 +155,7 @@ pub enum FilterCondition<'a> {
|
|||||||
In { fid: Token<'a>, els: Vec<Token<'a>> },
|
In { fid: Token<'a>, els: Vec<Token<'a>> },
|
||||||
Or(Vec<Self>),
|
Or(Vec<Self>),
|
||||||
And(Vec<Self>),
|
And(Vec<Self>),
|
||||||
|
VectorExists { fid: Token<'a>, embedder: Option<Token<'a>>, filter: VectorFilter<'a> },
|
||||||
GeoLowerThan { point: [Token<'a>; 2], radius: Token<'a> },
|
GeoLowerThan { point: [Token<'a>; 2], radius: Token<'a> },
|
||||||
GeoBoundingBox { top_right_point: [Token<'a>; 2], bottom_left_point: [Token<'a>; 2] },
|
GeoBoundingBox { top_right_point: [Token<'a>; 2], bottom_left_point: [Token<'a>; 2] },
|
||||||
}
|
}
|
||||||
@ -173,9 +186,24 @@ impl<'a> FilterCondition<'a> {
|
|||||||
FilterCondition::Or(seq) | FilterCondition::And(seq) => {
|
FilterCondition::Or(seq) | FilterCondition::And(seq) => {
|
||||||
seq.iter().find_map(|filter| filter.use_contains_operator())
|
seq.iter().find_map(|filter| filter.use_contains_operator())
|
||||||
}
|
}
|
||||||
|
FilterCondition::VectorExists { .. }
|
||||||
|
| FilterCondition::GeoLowerThan { .. }
|
||||||
|
| FilterCondition::GeoBoundingBox { .. }
|
||||||
|
| FilterCondition::In { .. } => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn use_vector_filter(&self) -> Option<&Token> {
|
||||||
|
match self {
|
||||||
|
FilterCondition::Condition { .. } => None,
|
||||||
|
FilterCondition::Not(this) => this.use_vector_filter(),
|
||||||
|
FilterCondition::Or(seq) | FilterCondition::And(seq) => {
|
||||||
|
seq.iter().find_map(|filter| filter.use_vector_filter())
|
||||||
|
}
|
||||||
FilterCondition::GeoLowerThan { .. }
|
FilterCondition::GeoLowerThan { .. }
|
||||||
| FilterCondition::GeoBoundingBox { .. }
|
| FilterCondition::GeoBoundingBox { .. }
|
||||||
| FilterCondition::In { .. } => None,
|
| FilterCondition::In { .. } => None,
|
||||||
|
FilterCondition::VectorExists { fid, .. } => Some(fid),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -263,10 +291,7 @@ fn parse_in_body(input: Span) -> IResult<Vec<Token>> {
|
|||||||
let (input, _) = ws(word_exact("IN"))(input)?;
|
let (input, _) = ws(word_exact("IN"))(input)?;
|
||||||
|
|
||||||
// everything after `IN` can be a failure
|
// everything after `IN` can be a failure
|
||||||
let (input, _) =
|
let (input, _) = tag("[")(input).map_cut(ErrorKind::InOpeningBracket)?;
|
||||||
cut_with_err(tag("["), |_| Error::new_from_kind(input, ErrorKind::InOpeningBracket))(
|
|
||||||
input,
|
|
||||||
)?;
|
|
||||||
|
|
||||||
let (input, content) = cut(parse_value_list)(input)?;
|
let (input, content) = cut(parse_value_list)(input)?;
|
||||||
|
|
||||||
@ -412,7 +437,7 @@ fn parse_geo_bounding_box(input: Span) -> IResult<FilterCondition> {
|
|||||||
let (input, args) = parsed?;
|
let (input, args) = parsed?;
|
||||||
|
|
||||||
if args.len() != 2 || args[0].len() != 2 || args[1].len() != 2 {
|
if args.len() != 2 || args[0].len() != 2 || args[1].len() != 2 {
|
||||||
return Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::GeoBoundingBox)));
|
return Err(Error::failure_from_kind(input, ErrorKind::GeoBoundingBox));
|
||||||
}
|
}
|
||||||
|
|
||||||
let res = FilterCondition::GeoBoundingBox {
|
let res = FilterCondition::GeoBoundingBox {
|
||||||
@ -433,7 +458,7 @@ fn parse_geo_point(input: Span) -> IResult<FilterCondition> {
|
|||||||
))(input)
|
))(input)
|
||||||
.map_err(|e| e.map(|_| Error::new_from_kind(input, ErrorKind::ReservedGeo("_geoPoint"))))?;
|
.map_err(|e| e.map(|_| Error::new_from_kind(input, ErrorKind::ReservedGeo("_geoPoint"))))?;
|
||||||
// if we succeeded we still return a `Failure` because geoPoints are not allowed
|
// if we succeeded we still return a `Failure` because geoPoints are not allowed
|
||||||
Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::ReservedGeo("_geoPoint"))))
|
Err(Error::failure_from_kind(input, ErrorKind::ReservedGeo("_geoPoint")))
|
||||||
}
|
}
|
||||||
|
|
||||||
/// geoPoint = WS* "_geoDistance(float WS* "," WS* float WS* "," WS* float)
|
/// geoPoint = WS* "_geoDistance(float WS* "," WS* float WS* "," WS* float)
|
||||||
@ -447,7 +472,7 @@ fn parse_geo_distance(input: Span) -> IResult<FilterCondition> {
|
|||||||
))(input)
|
))(input)
|
||||||
.map_err(|e| e.map(|_| Error::new_from_kind(input, ErrorKind::ReservedGeo("_geoDistance"))))?;
|
.map_err(|e| e.map(|_| Error::new_from_kind(input, ErrorKind::ReservedGeo("_geoDistance"))))?;
|
||||||
// if we succeeded we still return a `Failure` because `geoDistance` filters are not allowed
|
// if we succeeded we still return a `Failure` because `geoDistance` filters are not allowed
|
||||||
Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::ReservedGeo("_geoDistance"))))
|
Err(Error::failure_from_kind(input, ErrorKind::ReservedGeo("_geoDistance")))
|
||||||
}
|
}
|
||||||
|
|
||||||
/// geo = WS* "_geo(float WS* "," WS* float WS* "," WS* float)
|
/// geo = WS* "_geo(float WS* "," WS* float WS* "," WS* float)
|
||||||
@ -461,7 +486,7 @@ fn parse_geo(input: Span) -> IResult<FilterCondition> {
|
|||||||
))(input)
|
))(input)
|
||||||
.map_err(|e| e.map(|_| Error::new_from_kind(input, ErrorKind::ReservedGeo("_geo"))))?;
|
.map_err(|e| e.map(|_| Error::new_from_kind(input, ErrorKind::ReservedGeo("_geo"))))?;
|
||||||
// if we succeeded we still return a `Failure` because `_geo` filter is not allowed
|
// if we succeeded we still return a `Failure` because `_geo` filter is not allowed
|
||||||
Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::ReservedGeo("_geo"))))
|
Err(Error::failure_from_kind(input, ErrorKind::ReservedGeo("_geo")))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_error_reserved_keyword(input: Span) -> IResult<FilterCondition> {
|
fn parse_error_reserved_keyword(input: Span) -> IResult<FilterCondition> {
|
||||||
@ -500,8 +525,7 @@ fn parse_primary(input: Span, depth: usize) -> IResult<FilterCondition> {
|
|||||||
parse_is_not_null,
|
parse_is_not_null,
|
||||||
parse_is_empty,
|
parse_is_empty,
|
||||||
parse_is_not_empty,
|
parse_is_not_empty,
|
||||||
parse_exists,
|
alt((parse_vectors_exists, parse_exists, parse_not_exists)),
|
||||||
parse_not_exists,
|
|
||||||
parse_to,
|
parse_to,
|
||||||
parse_contains,
|
parse_contains,
|
||||||
parse_not_contains,
|
parse_not_contains,
|
||||||
@ -557,6 +581,22 @@ impl std::fmt::Display for FilterCondition<'_> {
|
|||||||
}
|
}
|
||||||
write!(f, "]")
|
write!(f, "]")
|
||||||
}
|
}
|
||||||
|
FilterCondition::VectorExists { fid: _, embedder, filter: inner } => {
|
||||||
|
write!(f, "_vectors")?;
|
||||||
|
if let Some(embedder) = embedder {
|
||||||
|
write!(f, ".{:?}", embedder.value())?;
|
||||||
|
}
|
||||||
|
match inner {
|
||||||
|
VectorFilter::Fragment(fragment) => {
|
||||||
|
write!(f, ".fragments.{:?}", fragment.value())?
|
||||||
|
}
|
||||||
|
VectorFilter::DocumentTemplate => write!(f, ".documentTemplate")?,
|
||||||
|
VectorFilter::UserProvided => write!(f, ".userProvided")?,
|
||||||
|
VectorFilter::Regenerate => write!(f, ".regenerate")?,
|
||||||
|
VectorFilter::None => (),
|
||||||
|
}
|
||||||
|
write!(f, " EXISTS")
|
||||||
|
}
|
||||||
FilterCondition::GeoLowerThan { point, radius } => {
|
FilterCondition::GeoLowerThan { point, radius } => {
|
||||||
write!(f, "_geoRadius({}, {}, {})", point[0], point[1], radius)
|
write!(f, "_geoRadius({}, {}, {})", point[0], point[1], radius)
|
||||||
}
|
}
|
||||||
@ -630,6 +670,9 @@ pub mod tests {
|
|||||||
insta::assert_snapshot!(p(r"title = 'foo\\\\\\\\'"), @r#"{title} = {foo\\\\}"#);
|
insta::assert_snapshot!(p(r"title = 'foo\\\\\\\\'"), @r#"{title} = {foo\\\\}"#);
|
||||||
// but it also works with other sequences
|
// but it also works with other sequences
|
||||||
insta::assert_snapshot!(p(r#"title = 'foo\x20\n\t\"\'"'"#), @"{title} = {foo \n\t\"\'\"}");
|
insta::assert_snapshot!(p(r#"title = 'foo\x20\n\t\"\'"'"#), @"{title} = {foo \n\t\"\'\"}");
|
||||||
|
|
||||||
|
insta::assert_snapshot!(p(r#"_vectors." valid.name ".fragments."also.. valid! " EXISTS"#), @r#"_vectors." valid.name ".fragments."also.. valid! " EXISTS"#);
|
||||||
|
insta::assert_snapshot!(p("_vectors.\"\n\t\r\\\"\" EXISTS"), @r#"_vectors."\n\t\r\"" EXISTS"#);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
@ -692,6 +735,18 @@ pub mod tests {
|
|||||||
insta::assert_snapshot!(p("NOT subscribers IS NOT EMPTY"), @"{subscribers} IS EMPTY");
|
insta::assert_snapshot!(p("NOT subscribers IS NOT EMPTY"), @"{subscribers} IS EMPTY");
|
||||||
insta::assert_snapshot!(p("subscribers IS NOT EMPTY"), @"NOT ({subscribers} IS EMPTY)");
|
insta::assert_snapshot!(p("subscribers IS NOT EMPTY"), @"NOT ({subscribers} IS EMPTY)");
|
||||||
|
|
||||||
|
// Test _vectors EXISTS + _vectors NOT EXITS
|
||||||
|
insta::assert_snapshot!(p("_vectors EXISTS"), @"_vectors EXISTS");
|
||||||
|
insta::assert_snapshot!(p("_vectors.embedderName EXISTS"), @r#"_vectors."embedderName" EXISTS"#);
|
||||||
|
insta::assert_snapshot!(p("_vectors.embedderName.documentTemplate EXISTS"), @r#"_vectors."embedderName".documentTemplate EXISTS"#);
|
||||||
|
insta::assert_snapshot!(p("_vectors.embedderName.regenerate EXISTS"), @r#"_vectors."embedderName".regenerate EXISTS"#);
|
||||||
|
insta::assert_snapshot!(p("_vectors.embedderName.regenerate EXISTS"), @r#"_vectors."embedderName".regenerate EXISTS"#);
|
||||||
|
insta::assert_snapshot!(p("_vectors.embedderName.fragments.fragmentName EXISTS"), @r#"_vectors."embedderName".fragments."fragmentName" EXISTS"#);
|
||||||
|
insta::assert_snapshot!(p(" _vectors.embedderName.fragments.fragmentName EXISTS"), @r#"_vectors."embedderName".fragments."fragmentName" EXISTS"#);
|
||||||
|
insta::assert_snapshot!(p("NOT _vectors EXISTS"), @"NOT (_vectors EXISTS)");
|
||||||
|
insta::assert_snapshot!(p(" NOT _vectors EXISTS"), @"NOT (_vectors EXISTS)");
|
||||||
|
insta::assert_snapshot!(p(" _vectors NOT EXISTS"), @"NOT (_vectors EXISTS)");
|
||||||
|
|
||||||
// Test EXISTS + NOT EXITS
|
// Test EXISTS + NOT EXITS
|
||||||
insta::assert_snapshot!(p("subscribers EXISTS"), @"{subscribers} EXISTS");
|
insta::assert_snapshot!(p("subscribers EXISTS"), @"{subscribers} EXISTS");
|
||||||
insta::assert_snapshot!(p("NOT subscribers EXISTS"), @"NOT ({subscribers} EXISTS)");
|
insta::assert_snapshot!(p("NOT subscribers EXISTS"), @"NOT ({subscribers} EXISTS)");
|
||||||
@ -946,6 +1001,71 @@ pub mod tests {
|
|||||||
"###
|
"###
|
||||||
);
|
);
|
||||||
|
|
||||||
|
insta::assert_snapshot!(p(r#"_vectors _vectors EXISTS"#), @r"
|
||||||
|
Was expecting an operation like `EXISTS` or `NOT EXISTS` after the vector filter.
|
||||||
|
10:25 _vectors _vectors EXISTS
|
||||||
|
");
|
||||||
|
insta::assert_snapshot!(p(r#"_vectors. embedderName EXISTS"#), @r"
|
||||||
|
Was expecting embedder name but found nothing.
|
||||||
|
10:11 _vectors. embedderName EXISTS
|
||||||
|
");
|
||||||
|
insta::assert_snapshot!(p(r#"_vectors .embedderName EXISTS"#), @r"
|
||||||
|
Was expecting an operation like `EXISTS` or `NOT EXISTS` after the vector filter.
|
||||||
|
10:30 _vectors .embedderName EXISTS
|
||||||
|
");
|
||||||
|
insta::assert_snapshot!(p(r#"_vectors.embedderName. EXISTS"#), @r"
|
||||||
|
Was expecting one of `.fragments`, `.userProvided`, `.documentTemplate`, `.regenerate` or nothing, but instead found a point without a valid value.
|
||||||
|
22:23 _vectors.embedderName. EXISTS
|
||||||
|
");
|
||||||
|
insta::assert_snapshot!(p(r#"_vectors."embedderName EXISTS"#), @r#"
|
||||||
|
The quotes in one of the values are inconsistent.
|
||||||
|
10:30 _vectors."embedderName EXISTS
|
||||||
|
"#);
|
||||||
|
insta::assert_snapshot!(p(r#"_vectors."embedderNam"e EXISTS"#), @r#"
|
||||||
|
The vector filter has leftover tokens.
|
||||||
|
23:31 _vectors."embedderNam"e EXISTS
|
||||||
|
"#);
|
||||||
|
insta::assert_snapshot!(p(r#"_vectors.embedderName.documentTemplate. EXISTS"#), @r"
|
||||||
|
Was expecting one of `.fragments`, `.userProvided`, `.documentTemplate`, `.regenerate` or nothing, but instead found a point without a valid value.
|
||||||
|
39:40 _vectors.embedderName.documentTemplate. EXISTS
|
||||||
|
");
|
||||||
|
insta::assert_snapshot!(p(r#"_vectors.embedderName.fragments EXISTS"#), @r"
|
||||||
|
The vector filter is missing a fragment name.
|
||||||
|
32:39 _vectors.embedderName.fragments EXISTS
|
||||||
|
");
|
||||||
|
insta::assert_snapshot!(p(r#"_vectors.embedderName.fragments. EXISTS"#), @r"
|
||||||
|
The vector filter's fragment name is invalid.
|
||||||
|
33:40 _vectors.embedderName.fragments. EXISTS
|
||||||
|
");
|
||||||
|
insta::assert_snapshot!(p(r#"_vectors.embedderName.fragments.test test EXISTS"#), @r"
|
||||||
|
Was expecting an operation like `EXISTS` or `NOT EXISTS` after the vector filter.
|
||||||
|
38:49 _vectors.embedderName.fragments.test test EXISTS
|
||||||
|
");
|
||||||
|
insta::assert_snapshot!(p(r#"_vectors.embedderName.fragments. test EXISTS"#), @r"
|
||||||
|
The vector filter's fragment name is invalid.
|
||||||
|
33:45 _vectors.embedderName.fragments. test EXISTS
|
||||||
|
");
|
||||||
|
insta::assert_snapshot!(p(r#"_vectors.embedderName .fragments. test EXISTS"#), @r"
|
||||||
|
Was expecting an operation like `EXISTS` or `NOT EXISTS` after the vector filter.
|
||||||
|
23:46 _vectors.embedderName .fragments. test EXISTS
|
||||||
|
");
|
||||||
|
insta::assert_snapshot!(p(r#"_vectors.embedderName .fragments.test EXISTS"#), @r"
|
||||||
|
Was expecting an operation like `EXISTS` or `NOT EXISTS` after the vector filter.
|
||||||
|
23:45 _vectors.embedderName .fragments.test EXISTS
|
||||||
|
");
|
||||||
|
insta::assert_snapshot!(p(r#"_vectors.embedderName.fargments.test EXISTS"#), @r"
|
||||||
|
Was expecting one of `fragments`, `userProvided`, `documentTemplate`, `regenerate` or nothing, but instead found `fargments`. Did you mean `fragments`?
|
||||||
|
23:32 _vectors.embedderName.fargments.test EXISTS
|
||||||
|
");
|
||||||
|
insta::assert_snapshot!(p(r#"_vectors.embedderName."userProvided" EXISTS"#), @r#"
|
||||||
|
Was expecting this part to be unquoted.
|
||||||
|
24:36 _vectors.embedderName."userProvided" EXISTS
|
||||||
|
"#);
|
||||||
|
insta::assert_snapshot!(p(r#"_vectors.embedderName.userProvided.fragments.test EXISTS"#), @r"
|
||||||
|
Vector filter can only accept one of `fragments`, `userProvided`, `documentTemplate` or `regenerate`, but found both `userProvided` and `fragments`.
|
||||||
|
36:45 _vectors.embedderName.userProvided.fragments.test EXISTS
|
||||||
|
");
|
||||||
|
|
||||||
insta::assert_snapshot!(p(r#"NOT OR EXISTS AND EXISTS NOT EXISTS"#), @r###"
|
insta::assert_snapshot!(p(r#"NOT OR EXISTS AND EXISTS NOT EXISTS"#), @r###"
|
||||||
Was expecting a value but instead got `OR`, which is a reserved keyword. To use `OR` as a field name or a value, surround it by quotes.
|
Was expecting a value but instead got `OR`, which is a reserved keyword. To use `OR` as a field name or a value, surround it by quotes.
|
||||||
5:7 NOT OR EXISTS AND EXISTS NOT EXISTS
|
5:7 NOT OR EXISTS AND EXISTS NOT EXISTS
|
||||||
|
@ -80,6 +80,51 @@ pub fn word_exact<'a, 'b: 'a>(tag: &'b str) -> impl Fn(Span<'a>) -> IResult<'a,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// vector_value = ( non_dot_word | singleQuoted | doubleQuoted)
|
||||||
|
pub fn parse_vector_value(input: Span) -> IResult<Token> {
|
||||||
|
pub fn non_dot_word(input: Span) -> IResult<Token> {
|
||||||
|
let (input, word) = take_while1(|c| is_value_component(c) && c != '.')(input)?;
|
||||||
|
Ok((input, word.into()))
|
||||||
|
}
|
||||||
|
|
||||||
|
let (input, value) = alt((
|
||||||
|
delimited(char('\''), cut(|input| quoted_by('\'', input)), cut(char('\''))),
|
||||||
|
delimited(char('"'), cut(|input| quoted_by('"', input)), cut(char('"'))),
|
||||||
|
non_dot_word,
|
||||||
|
))(input)?;
|
||||||
|
|
||||||
|
match unescaper::unescape(value.value()) {
|
||||||
|
Ok(content) => {
|
||||||
|
if content.len() != value.value().len() {
|
||||||
|
Ok((input, Token::new(value.original_span(), Some(content))))
|
||||||
|
} else {
|
||||||
|
Ok((input, value))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(unescaper::Error::IncompleteStr(_)) => Err(nom::Err::Incomplete(nom::Needed::Unknown)),
|
||||||
|
Err(unescaper::Error::ParseIntError { .. }) => Err(nom::Err::Error(Error::new_from_kind(
|
||||||
|
value.original_span(),
|
||||||
|
ErrorKind::InvalidEscapedNumber,
|
||||||
|
))),
|
||||||
|
Err(unescaper::Error::InvalidChar { .. }) => Err(nom::Err::Error(Error::new_from_kind(
|
||||||
|
value.original_span(),
|
||||||
|
ErrorKind::MalformedValue,
|
||||||
|
))),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn parse_vector_value_cut<'a>(input: Span<'a>, kind: ErrorKind<'a>) -> IResult<'a, Token<'a>> {
|
||||||
|
parse_vector_value(input).map_err(|e| match e {
|
||||||
|
nom::Err::Failure(e) => match e.kind() {
|
||||||
|
ErrorKind::Char(c) if *c == '"' || *c == '\'' => {
|
||||||
|
crate::Error::failure_from_kind(input, ErrorKind::VectorFilterInvalidQuotes)
|
||||||
|
}
|
||||||
|
_ => crate::Error::failure_from_kind(input, kind),
|
||||||
|
},
|
||||||
|
_ => crate::Error::failure_from_kind(input, kind),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
/// value = WS* ( word | singleQuoted | doubleQuoted) WS+
|
/// value = WS* ( word | singleQuoted | doubleQuoted) WS+
|
||||||
pub fn parse_value(input: Span) -> IResult<Token> {
|
pub fn parse_value(input: Span) -> IResult<Token> {
|
||||||
// to get better diagnostic message we are going to strip the left whitespaces from the input right now
|
// to get better diagnostic message we are going to strip the left whitespaces from the input right now
|
||||||
@ -99,31 +144,21 @@ pub fn parse_value(input: Span) -> IResult<Token> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
match parse_geo_radius(input) {
|
match parse_geo_radius(input) {
|
||||||
Ok(_) => {
|
Ok(_) => return Err(Error::failure_from_kind(input, ErrorKind::MisusedGeoRadius)),
|
||||||
return Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::MisusedGeoRadius)))
|
|
||||||
}
|
|
||||||
// if we encountered a failure it means the user badly wrote a _geoRadius filter.
|
// if we encountered a failure it means the user badly wrote a _geoRadius filter.
|
||||||
// But instead of showing them how to fix his syntax we are going to tell them they should not use this filter as a value.
|
// But instead of showing them how to fix his syntax we are going to tell them they should not use this filter as a value.
|
||||||
Err(e) if e.is_failure() => {
|
Err(e) if e.is_failure() => {
|
||||||
return Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::MisusedGeoRadius)))
|
return Err(Error::failure_from_kind(input, ErrorKind::MisusedGeoRadius))
|
||||||
}
|
}
|
||||||
_ => (),
|
_ => (),
|
||||||
}
|
}
|
||||||
|
|
||||||
match parse_geo_bounding_box(input) {
|
match parse_geo_bounding_box(input) {
|
||||||
Ok(_) => {
|
Ok(_) => return Err(Error::failure_from_kind(input, ErrorKind::MisusedGeoBoundingBox)),
|
||||||
return Err(nom::Err::Failure(Error::new_from_kind(
|
|
||||||
input,
|
|
||||||
ErrorKind::MisusedGeoBoundingBox,
|
|
||||||
)))
|
|
||||||
}
|
|
||||||
// if we encountered a failure it means the user badly wrote a _geoBoundingBox filter.
|
// if we encountered a failure it means the user badly wrote a _geoBoundingBox filter.
|
||||||
// But instead of showing them how to fix his syntax we are going to tell them they should not use this filter as a value.
|
// But instead of showing them how to fix his syntax we are going to tell them they should not use this filter as a value.
|
||||||
Err(e) if e.is_failure() => {
|
Err(e) if e.is_failure() => {
|
||||||
return Err(nom::Err::Failure(Error::new_from_kind(
|
return Err(Error::failure_from_kind(input, ErrorKind::MisusedGeoBoundingBox))
|
||||||
input,
|
|
||||||
ErrorKind::MisusedGeoBoundingBox,
|
|
||||||
)))
|
|
||||||
}
|
}
|
||||||
_ => (),
|
_ => (),
|
||||||
}
|
}
|
||||||
|
@ -736,7 +736,7 @@ fn test_document_addition_mixed_rights_with_index() {
|
|||||||
#[test]
|
#[test]
|
||||||
fn test_document_addition_mixed_right_without_index_starts_with_cant_create() {
|
fn test_document_addition_mixed_right_without_index_starts_with_cant_create() {
|
||||||
// We're going to autobatch multiple document addition.
|
// We're going to autobatch multiple document addition.
|
||||||
// - The index does not exists
|
// - The index does not exist
|
||||||
// - The first document addition don't have the right to create an index
|
// - The first document addition don't have the right to create an index
|
||||||
// - The second do. They should not batch together.
|
// - The second do. They should not batch together.
|
||||||
// - The second should batch with everything else as it's going to create an index.
|
// - The second should batch with everything else as it's going to create an index.
|
||||||
|
@ -139,6 +139,8 @@ pub struct DocumentsFetchAggregator<Method: AggregateMethod> {
|
|||||||
per_document_id: bool,
|
per_document_id: bool,
|
||||||
// if a filter was used
|
// if a filter was used
|
||||||
per_filter: bool,
|
per_filter: bool,
|
||||||
|
with_vector_filter: bool,
|
||||||
|
|
||||||
// if documents were sorted
|
// if documents were sorted
|
||||||
sort: bool,
|
sort: bool,
|
||||||
|
|
||||||
@ -166,6 +168,7 @@ impl<Method: AggregateMethod> Aggregate for DocumentsFetchAggregator<Method> {
|
|||||||
Box::new(Self {
|
Box::new(Self {
|
||||||
per_document_id: self.per_document_id | new.per_document_id,
|
per_document_id: self.per_document_id | new.per_document_id,
|
||||||
per_filter: self.per_filter | new.per_filter,
|
per_filter: self.per_filter | new.per_filter,
|
||||||
|
with_vector_filter: self.with_vector_filter | new.with_vector_filter,
|
||||||
sort: self.sort | new.sort,
|
sort: self.sort | new.sort,
|
||||||
retrieve_vectors: self.retrieve_vectors | new.retrieve_vectors,
|
retrieve_vectors: self.retrieve_vectors | new.retrieve_vectors,
|
||||||
max_limit: self.max_limit.max(new.max_limit),
|
max_limit: self.max_limit.max(new.max_limit),
|
||||||
@ -250,6 +253,7 @@ pub async fn get_document(
|
|||||||
retrieve_vectors: param_retrieve_vectors.0,
|
retrieve_vectors: param_retrieve_vectors.0,
|
||||||
per_document_id: true,
|
per_document_id: true,
|
||||||
per_filter: false,
|
per_filter: false,
|
||||||
|
with_vector_filter: false,
|
||||||
sort: false,
|
sort: false,
|
||||||
max_limit: 0,
|
max_limit: 0,
|
||||||
max_offset: 0,
|
max_offset: 0,
|
||||||
@ -475,6 +479,10 @@ pub async fn documents_by_query_post(
|
|||||||
analytics.publish(
|
analytics.publish(
|
||||||
DocumentsFetchAggregator::<DocumentsPOST> {
|
DocumentsFetchAggregator::<DocumentsPOST> {
|
||||||
per_filter: body.filter.is_some(),
|
per_filter: body.filter.is_some(),
|
||||||
|
with_vector_filter: body
|
||||||
|
.filter
|
||||||
|
.as_ref()
|
||||||
|
.is_some_and(|f| f.to_string().contains("_vectors")),
|
||||||
sort: body.sort.is_some(),
|
sort: body.sort.is_some(),
|
||||||
retrieve_vectors: body.retrieve_vectors,
|
retrieve_vectors: body.retrieve_vectors,
|
||||||
max_limit: body.limit,
|
max_limit: body.limit,
|
||||||
@ -576,6 +584,10 @@ pub async fn get_documents(
|
|||||||
analytics.publish(
|
analytics.publish(
|
||||||
DocumentsFetchAggregator::<DocumentsGET> {
|
DocumentsFetchAggregator::<DocumentsGET> {
|
||||||
per_filter: query.filter.is_some(),
|
per_filter: query.filter.is_some(),
|
||||||
|
with_vector_filter: query
|
||||||
|
.filter
|
||||||
|
.as_ref()
|
||||||
|
.is_some_and(|f| f.to_string().contains("_vectors")),
|
||||||
sort: query.sort.is_some(),
|
sort: query.sort.is_some(),
|
||||||
retrieve_vectors: query.retrieve_vectors,
|
retrieve_vectors: query.retrieve_vectors,
|
||||||
max_limit: query.limit,
|
max_limit: query.limit,
|
||||||
@ -1455,8 +1467,6 @@ fn some_documents<'a, 't: 'a>(
|
|||||||
document.remove("_vectors");
|
document.remove("_vectors");
|
||||||
}
|
}
|
||||||
RetrieveVectors::Retrieve => {
|
RetrieveVectors::Retrieve => {
|
||||||
// Clippy is simply wrong
|
|
||||||
#[allow(clippy::manual_unwrap_or_default)]
|
|
||||||
let mut vectors = match document.remove("_vectors") {
|
let mut vectors = match document.remove("_vectors") {
|
||||||
Some(Value::Object(map)) => map,
|
Some(Value::Object(map)) => map,
|
||||||
_ => Default::default(),
|
_ => Default::default(),
|
||||||
|
@ -40,6 +40,7 @@ pub struct SearchAggregator<Method: AggregateMethod> {
|
|||||||
// filter
|
// filter
|
||||||
filter_with_geo_radius: bool,
|
filter_with_geo_radius: bool,
|
||||||
filter_with_geo_bounding_box: bool,
|
filter_with_geo_bounding_box: bool,
|
||||||
|
filter_on_vectors: bool,
|
||||||
// every time a request has a filter, this field must be incremented by the number of terms it contains
|
// every time a request has a filter, this field must be incremented by the number of terms it contains
|
||||||
filter_sum_of_criteria_terms: usize,
|
filter_sum_of_criteria_terms: usize,
|
||||||
// every time a request has a filter, this field must be incremented by one
|
// every time a request has a filter, this field must be incremented by one
|
||||||
@ -163,6 +164,7 @@ impl<Method: AggregateMethod> SearchAggregator<Method> {
|
|||||||
let stringified_filters = filter.to_string();
|
let stringified_filters = filter.to_string();
|
||||||
ret.filter_with_geo_radius = stringified_filters.contains("_geoRadius(");
|
ret.filter_with_geo_radius = stringified_filters.contains("_geoRadius(");
|
||||||
ret.filter_with_geo_bounding_box = stringified_filters.contains("_geoBoundingBox(");
|
ret.filter_with_geo_bounding_box = stringified_filters.contains("_geoBoundingBox(");
|
||||||
|
ret.filter_on_vectors = stringified_filters.contains("_vectors");
|
||||||
ret.filter_sum_of_criteria_terms = RE.split(&stringified_filters).count();
|
ret.filter_sum_of_criteria_terms = RE.split(&stringified_filters).count();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -261,6 +263,7 @@ impl<Method: AggregateMethod> Aggregate for SearchAggregator<Method> {
|
|||||||
distinct,
|
distinct,
|
||||||
filter_with_geo_radius,
|
filter_with_geo_radius,
|
||||||
filter_with_geo_bounding_box,
|
filter_with_geo_bounding_box,
|
||||||
|
filter_on_vectors,
|
||||||
filter_sum_of_criteria_terms,
|
filter_sum_of_criteria_terms,
|
||||||
filter_total_number_of_criteria,
|
filter_total_number_of_criteria,
|
||||||
used_syntax,
|
used_syntax,
|
||||||
@ -315,6 +318,7 @@ impl<Method: AggregateMethod> Aggregate for SearchAggregator<Method> {
|
|||||||
// filter
|
// filter
|
||||||
self.filter_with_geo_radius |= filter_with_geo_radius;
|
self.filter_with_geo_radius |= filter_with_geo_radius;
|
||||||
self.filter_with_geo_bounding_box |= filter_with_geo_bounding_box;
|
self.filter_with_geo_bounding_box |= filter_with_geo_bounding_box;
|
||||||
|
self.filter_on_vectors |= filter_on_vectors;
|
||||||
self.filter_sum_of_criteria_terms =
|
self.filter_sum_of_criteria_terms =
|
||||||
self.filter_sum_of_criteria_terms.saturating_add(filter_sum_of_criteria_terms);
|
self.filter_sum_of_criteria_terms.saturating_add(filter_sum_of_criteria_terms);
|
||||||
self.filter_total_number_of_criteria =
|
self.filter_total_number_of_criteria =
|
||||||
@ -389,6 +393,7 @@ impl<Method: AggregateMethod> Aggregate for SearchAggregator<Method> {
|
|||||||
distinct,
|
distinct,
|
||||||
filter_with_geo_radius,
|
filter_with_geo_radius,
|
||||||
filter_with_geo_bounding_box,
|
filter_with_geo_bounding_box,
|
||||||
|
filter_on_vectors,
|
||||||
filter_sum_of_criteria_terms,
|
filter_sum_of_criteria_terms,
|
||||||
filter_total_number_of_criteria,
|
filter_total_number_of_criteria,
|
||||||
used_syntax,
|
used_syntax,
|
||||||
@ -446,6 +451,7 @@ impl<Method: AggregateMethod> Aggregate for SearchAggregator<Method> {
|
|||||||
"filter": {
|
"filter": {
|
||||||
"with_geoRadius": filter_with_geo_radius,
|
"with_geoRadius": filter_with_geo_radius,
|
||||||
"with_geoBoundingBox": filter_with_geo_bounding_box,
|
"with_geoBoundingBox": filter_with_geo_bounding_box,
|
||||||
|
"on_vectors": filter_on_vectors,
|
||||||
"avg_criteria_number": format!("{:.2}", filter_sum_of_criteria_terms as f64 / filter_total_number_of_criteria as f64),
|
"avg_criteria_number": format!("{:.2}", filter_sum_of_criteria_terms as f64 / filter_total_number_of_criteria as f64),
|
||||||
"most_used_syntax": used_syntax.iter().max_by_key(|(_, v)| *v).map(|(k, _)| json!(k)).unwrap_or_else(|| json!(null)),
|
"most_used_syntax": used_syntax.iter().max_by_key(|(_, v)| *v).map(|(k, _)| json!(k)).unwrap_or_else(|| json!(null)),
|
||||||
},
|
},
|
||||||
|
@ -336,7 +336,7 @@ impl<Method: AggregateMethod + 'static> Aggregate for TaskFilterAnalytics<Method
|
|||||||
"link": "https://docs.meilisearch.com/errors#missing_authorization_header"
|
"link": "https://docs.meilisearch.com/errors#missing_authorization_header"
|
||||||
}
|
}
|
||||||
)),
|
)),
|
||||||
(status = 404, description = "The task uid does not exists", body = ResponseError, content_type = "application/json", example = json!(
|
(status = 404, description = "The task uid does not exist", body = ResponseError, content_type = "application/json", example = json!(
|
||||||
{
|
{
|
||||||
"message": "Task :taskUid not found.",
|
"message": "Task :taskUid not found.",
|
||||||
"code": "task_not_found",
|
"code": "task_not_found",
|
||||||
@ -430,7 +430,7 @@ async fn cancel_tasks(
|
|||||||
"link": "https://docs.meilisearch.com/errors#missing_authorization_header"
|
"link": "https://docs.meilisearch.com/errors#missing_authorization_header"
|
||||||
}
|
}
|
||||||
)),
|
)),
|
||||||
(status = 404, description = "The task uid does not exists", body = ResponseError, content_type = "application/json", example = json!(
|
(status = 404, description = "The task uid does not exist", body = ResponseError, content_type = "application/json", example = json!(
|
||||||
{
|
{
|
||||||
"message": "Task :taskUid not found.",
|
"message": "Task :taskUid not found.",
|
||||||
"code": "task_not_found",
|
"code": "task_not_found",
|
||||||
@ -611,7 +611,7 @@ async fn get_tasks(
|
|||||||
"link": "https://docs.meilisearch.com/errors#missing_authorization_header"
|
"link": "https://docs.meilisearch.com/errors#missing_authorization_header"
|
||||||
}
|
}
|
||||||
)),
|
)),
|
||||||
(status = 404, description = "The task uid does not exists", body = ResponseError, content_type = "application/json", example = json!(
|
(status = 404, description = "The task uid does not exist", body = ResponseError, content_type = "application/json", example = json!(
|
||||||
{
|
{
|
||||||
"message": "Task :taskUid not found.",
|
"message": "Task :taskUid not found.",
|
||||||
"code": "task_not_found",
|
"code": "task_not_found",
|
||||||
@ -665,7 +665,7 @@ async fn get_task(
|
|||||||
"link": "https://docs.meilisearch.com/errors#missing_authorization_header"
|
"link": "https://docs.meilisearch.com/errors#missing_authorization_header"
|
||||||
}
|
}
|
||||||
)),
|
)),
|
||||||
(status = 404, description = "The task uid does not exists", body = ResponseError, content_type = "application/json", example = json!(
|
(status = 404, description = "The task uid does not exist", body = ResponseError, content_type = "application/json", example = json!(
|
||||||
{
|
{
|
||||||
"message": "Task :taskUid not found.",
|
"message": "Task :taskUid not found.",
|
||||||
"code": "task_not_found",
|
"code": "task_not_found",
|
||||||
|
@ -2091,7 +2091,7 @@ pub(crate) fn parse_filter(
|
|||||||
})?;
|
})?;
|
||||||
|
|
||||||
if let Some(ref filter) = filter {
|
if let Some(ref filter) = filter {
|
||||||
// If the contains operator is used while the contains filter features is not enabled, errors out
|
// If the contains operator is used while the contains filter feature is not enabled, errors out
|
||||||
if let Some((token, error)) =
|
if let Some((token, error)) =
|
||||||
filter.use_contains_operator().zip(features.check_contains_filter().err())
|
filter.use_contains_operator().zip(features.check_contains_filter().err())
|
||||||
{
|
{
|
||||||
@ -2102,6 +2102,18 @@ pub(crate) fn parse_filter(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if let Some(ref filter) = filter {
|
||||||
|
// If a vector filter is used while the multi modal feature is not enabled, errors out
|
||||||
|
if let Some((token, error)) =
|
||||||
|
filter.use_vector_filter().zip(features.check_multimodal("using a vector filter").err())
|
||||||
|
{
|
||||||
|
return Err(ResponseError::from_msg(
|
||||||
|
token.as_external_error(error).to_string(),
|
||||||
|
Code::FeatureNotEnabled,
|
||||||
|
));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
Ok(filter)
|
Ok(filter)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -557,7 +557,7 @@ async fn delete_document_by_filter() {
|
|||||||
"###);
|
"###);
|
||||||
|
|
||||||
let index = shared_does_not_exists_index().await;
|
let index = shared_does_not_exists_index().await;
|
||||||
// index does not exists
|
// index does not exist
|
||||||
let (response, _code) =
|
let (response, _code) =
|
||||||
index.delete_document_by_filter_fail(json!({ "filter": "doggo = bernese"}), server).await;
|
index.delete_document_by_filter_fail(json!({ "filter": "doggo = bernese"}), server).await;
|
||||||
snapshot!(response, @r###"
|
snapshot!(response, @r###"
|
||||||
|
@ -304,7 +304,7 @@ async fn search_bad_filter() {
|
|||||||
let server = Server::new_shared();
|
let server = Server::new_shared();
|
||||||
let index = server.unique_index();
|
let index = server.unique_index();
|
||||||
// Also, to trigger the error message we need to effectively create the index or else it'll throw an
|
// Also, to trigger the error message we need to effectively create the index or else it'll throw an
|
||||||
// index does not exists error.
|
// index does not exist error.
|
||||||
let (response, _code) = index.create(None).await;
|
let (response, _code) = index.create(None).await;
|
||||||
server.wait_task(response.uid()).await.succeeded();
|
server.wait_task(response.uid()).await.succeeded();
|
||||||
|
|
||||||
@ -1263,7 +1263,7 @@ async fn search_with_contains_without_enabling_the_feature() {
|
|||||||
let server = Server::new_shared();
|
let server = Server::new_shared();
|
||||||
let index = server.unique_index();
|
let index = server.unique_index();
|
||||||
// Also, to trigger the error message we need to effectively create the index or else it'll throw an
|
// Also, to trigger the error message we need to effectively create the index or else it'll throw an
|
||||||
// index does not exists error.
|
// index does not exist error.
|
||||||
let (task, _code) = index.create(None).await;
|
let (task, _code) = index.create(None).await;
|
||||||
server.wait_task(task.uid()).await.succeeded();
|
server.wait_task(task.uid()).await.succeeded();
|
||||||
|
|
||||||
|
@ -4,8 +4,8 @@ use tempfile::TempDir;
|
|||||||
|
|
||||||
use super::test_settings_documents_indexing_swapping_and_search;
|
use super::test_settings_documents_indexing_swapping_and_search;
|
||||||
use crate::common::{
|
use crate::common::{
|
||||||
default_settings, shared_index_with_documents, shared_index_with_nested_documents, Server,
|
default_settings, shared_index_for_fragments, shared_index_with_documents,
|
||||||
DOCUMENTS, NESTED_DOCUMENTS,
|
shared_index_with_nested_documents, Server, DOCUMENTS, NESTED_DOCUMENTS,
|
||||||
};
|
};
|
||||||
use crate::json;
|
use crate::json;
|
||||||
|
|
||||||
@ -731,3 +731,432 @@ async fn test_filterable_attributes_priority() {
|
|||||||
)
|
)
|
||||||
.await;
|
.await;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn vector_filter_all_embedders() {
|
||||||
|
let index = shared_index_for_fragments().await;
|
||||||
|
|
||||||
|
let (value, _code) = index
|
||||||
|
.search_post(json!({
|
||||||
|
"filter": "_vectors EXISTS",
|
||||||
|
"attributesToRetrieve": ["name"]
|
||||||
|
}))
|
||||||
|
.await;
|
||||||
|
snapshot!(value, @r#"
|
||||||
|
{
|
||||||
|
"hits": [
|
||||||
|
{
|
||||||
|
"name": "kefir"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "echo"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "intel"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "dustin"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"query": "",
|
||||||
|
"processingTimeMs": "[duration]",
|
||||||
|
"limit": 20,
|
||||||
|
"offset": 0,
|
||||||
|
"estimatedTotalHits": 4
|
||||||
|
}
|
||||||
|
"#);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn vector_filter_missing_fragment() {
|
||||||
|
let index = shared_index_for_fragments().await;
|
||||||
|
|
||||||
|
let (value, _code) = index
|
||||||
|
.search_post(json!({
|
||||||
|
"filter": "_vectors.rest.fragments EXISTS",
|
||||||
|
"attributesToRetrieve": ["name"]
|
||||||
|
}))
|
||||||
|
.await;
|
||||||
|
snapshot!(value, @r#"
|
||||||
|
{
|
||||||
|
"message": "The vector filter is missing a fragment name.\n24:31 _vectors.rest.fragments EXISTS",
|
||||||
|
"code": "invalid_search_filter",
|
||||||
|
"type": "invalid_request",
|
||||||
|
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
|
||||||
|
}
|
||||||
|
"#);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn vector_filter_nonexistent_embedder() {
|
||||||
|
let index = shared_index_for_fragments().await;
|
||||||
|
|
||||||
|
let (value, _code) = index
|
||||||
|
.search_post(json!({
|
||||||
|
"filter": "_vectors.other EXISTS",
|
||||||
|
"attributesToRetrieve": ["name"]
|
||||||
|
}))
|
||||||
|
.await;
|
||||||
|
snapshot!(value, @r#"
|
||||||
|
{
|
||||||
|
"message": "Index `[uuid]`: The embedder `other` does not exist. Available embedders are: `rest`.\n10:15 _vectors.other EXISTS",
|
||||||
|
"code": "invalid_search_filter",
|
||||||
|
"type": "invalid_request",
|
||||||
|
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
|
||||||
|
}
|
||||||
|
"#);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn vector_filter_all_embedders_user_provided() {
|
||||||
|
let index = shared_index_for_fragments().await;
|
||||||
|
|
||||||
|
// This one is counterintuitive, but it is the same as the previous one.
|
||||||
|
// It's because userProvided is interpreted as an embedder name
|
||||||
|
let (value, _code) = index
|
||||||
|
.search_post(json!({
|
||||||
|
"filter": "_vectors.userProvided EXISTS",
|
||||||
|
"attributesToRetrieve": ["name"]
|
||||||
|
}))
|
||||||
|
.await;
|
||||||
|
snapshot!(value, @r#"
|
||||||
|
{
|
||||||
|
"message": "Index `[uuid]`: The embedder `userProvided` does not exist. Available embedders are: `rest`.\n10:22 _vectors.userProvided EXISTS",
|
||||||
|
"code": "invalid_search_filter",
|
||||||
|
"type": "invalid_request",
|
||||||
|
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
|
||||||
|
}
|
||||||
|
"#);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn vector_filter_specific_embedder() {
|
||||||
|
let index = shared_index_for_fragments().await;
|
||||||
|
|
||||||
|
let (value, _code) = index
|
||||||
|
.search_post(json!({
|
||||||
|
"filter": "_vectors.rest EXISTS",
|
||||||
|
"attributesToRetrieve": ["name"]
|
||||||
|
}))
|
||||||
|
.await;
|
||||||
|
snapshot!(value, @r#"
|
||||||
|
{
|
||||||
|
"hits": [
|
||||||
|
{
|
||||||
|
"name": "kefir"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "echo"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "intel"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "dustin"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"query": "",
|
||||||
|
"processingTimeMs": "[duration]",
|
||||||
|
"limit": 20,
|
||||||
|
"offset": 0,
|
||||||
|
"estimatedTotalHits": 4
|
||||||
|
}
|
||||||
|
"#);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn vector_filter_user_provided() {
|
||||||
|
let index = shared_index_for_fragments().await;
|
||||||
|
|
||||||
|
let (value, _code) = index
|
||||||
|
.search_post(json!({
|
||||||
|
"filter": "_vectors.rest.userProvided EXISTS",
|
||||||
|
"attributesToRetrieve": ["name"]
|
||||||
|
}))
|
||||||
|
.await;
|
||||||
|
snapshot!(value, @r#"
|
||||||
|
{
|
||||||
|
"hits": [
|
||||||
|
{
|
||||||
|
"name": "echo"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"query": "",
|
||||||
|
"processingTimeMs": "[duration]",
|
||||||
|
"limit": 20,
|
||||||
|
"offset": 0,
|
||||||
|
"estimatedTotalHits": 1
|
||||||
|
}
|
||||||
|
"#);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn vector_filter_specific_fragment() {
|
||||||
|
let index = shared_index_for_fragments().await;
|
||||||
|
|
||||||
|
let (value, _code) = index
|
||||||
|
.search_post(json!({
|
||||||
|
"filter": "_vectors.rest.fragments.withBreed EXISTS",
|
||||||
|
"attributesToRetrieve": ["name"]
|
||||||
|
}))
|
||||||
|
.await;
|
||||||
|
snapshot!(value, @r#"
|
||||||
|
{
|
||||||
|
"hits": [
|
||||||
|
{
|
||||||
|
"name": "intel"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "dustin"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"query": "",
|
||||||
|
"processingTimeMs": "[duration]",
|
||||||
|
"limit": 20,
|
||||||
|
"offset": 0,
|
||||||
|
"estimatedTotalHits": 2
|
||||||
|
}
|
||||||
|
"#);
|
||||||
|
|
||||||
|
let (value, _code) = index
|
||||||
|
.search_post(json!({
|
||||||
|
"filter": "_vectors.rest.fragments.basic EXISTS",
|
||||||
|
"attributesToRetrieve": ["name"]
|
||||||
|
}))
|
||||||
|
.await;
|
||||||
|
snapshot!(value, @r#"
|
||||||
|
{
|
||||||
|
"hits": [
|
||||||
|
{
|
||||||
|
"name": "kefir"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "intel"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "dustin"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"query": "",
|
||||||
|
"processingTimeMs": "[duration]",
|
||||||
|
"limit": 20,
|
||||||
|
"offset": 0,
|
||||||
|
"estimatedTotalHits": 3
|
||||||
|
}
|
||||||
|
"#);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn vector_filter_non_existant_fragment() {
|
||||||
|
let index = shared_index_for_fragments().await;
|
||||||
|
|
||||||
|
let (value, _code) = index
|
||||||
|
.search_post(json!({
|
||||||
|
"filter": "_vectors.rest.fragments.withBred EXISTS",
|
||||||
|
"attributesToRetrieve": ["name"]
|
||||||
|
}))
|
||||||
|
.await;
|
||||||
|
snapshot!(value, @r#"
|
||||||
|
{
|
||||||
|
"message": "Index `[uuid]`: The fragment `withBred` does not exist on embedder `rest`. Available fragments on this embedder are: `basic`, `withBreed`. Did you mean `withBreed`?\n25:33 _vectors.rest.fragments.withBred EXISTS",
|
||||||
|
"code": "invalid_search_filter",
|
||||||
|
"type": "invalid_request",
|
||||||
|
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
|
||||||
|
}
|
||||||
|
"#);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn vector_filter_document_template_but_fragments_used() {
|
||||||
|
let index = shared_index_for_fragments().await;
|
||||||
|
|
||||||
|
let (value, _code) = index
|
||||||
|
.search_post(json!({
|
||||||
|
"filter": "_vectors.rest.documentTemplate EXISTS",
|
||||||
|
"attributesToRetrieve": ["name"]
|
||||||
|
}))
|
||||||
|
.await;
|
||||||
|
snapshot!(value, @r#"
|
||||||
|
{
|
||||||
|
"hits": [],
|
||||||
|
"query": "",
|
||||||
|
"processingTimeMs": "[duration]",
|
||||||
|
"limit": 20,
|
||||||
|
"offset": 0,
|
||||||
|
"estimatedTotalHits": 0
|
||||||
|
}
|
||||||
|
"#);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn vector_filter_document_template() {
|
||||||
|
let (_mock, setting) = crate::vector::create_mock().await;
|
||||||
|
let server = crate::vector::get_server_vector().await;
|
||||||
|
let index = server.index("doggo");
|
||||||
|
|
||||||
|
let (_response, code) = server.set_features(json!({"multimodal": true})).await;
|
||||||
|
snapshot!(code, @"200 OK");
|
||||||
|
|
||||||
|
let (response, code) = index
|
||||||
|
.update_settings(json!({
|
||||||
|
"embedders": {
|
||||||
|
"rest": setting,
|
||||||
|
},
|
||||||
|
}))
|
||||||
|
.await;
|
||||||
|
snapshot!(code, @"202 Accepted");
|
||||||
|
server.wait_task(response.uid()).await.succeeded();
|
||||||
|
|
||||||
|
let documents = json!([
|
||||||
|
{"id": 0, "name": "kefir"},
|
||||||
|
{"id": 1, "name": "echo", "_vectors": { "rest": [1, 1, 1] }},
|
||||||
|
{"id": 2, "name": "intel"},
|
||||||
|
{"id": 3, "name": "iko" }
|
||||||
|
]);
|
||||||
|
let (value, code) = index.add_documents(documents, None).await;
|
||||||
|
snapshot!(code, @"202 Accepted");
|
||||||
|
server.wait_task(value.uid()).await.succeeded();
|
||||||
|
|
||||||
|
let (value, _code) = index
|
||||||
|
.search_post(json!({
|
||||||
|
"filter": "_vectors.rest.documentTemplate EXISTS",
|
||||||
|
"attributesToRetrieve": ["name"]
|
||||||
|
}))
|
||||||
|
.await;
|
||||||
|
snapshot!(value, @r#"
|
||||||
|
{
|
||||||
|
"hits": [
|
||||||
|
{
|
||||||
|
"name": "kefir"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "intel"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "iko"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"query": "",
|
||||||
|
"processingTimeMs": "[duration]",
|
||||||
|
"limit": 20,
|
||||||
|
"offset": 0,
|
||||||
|
"estimatedTotalHits": 3
|
||||||
|
}
|
||||||
|
"#);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn vector_filter_feature_gate() {
|
||||||
|
let index = shared_index_with_documents().await;
|
||||||
|
|
||||||
|
let (value, _code) = index
|
||||||
|
.search_post(json!({
|
||||||
|
"filter": "_vectors EXISTS",
|
||||||
|
"attributesToRetrieve": ["name"]
|
||||||
|
}))
|
||||||
|
.await;
|
||||||
|
snapshot!(value, @r#"
|
||||||
|
{
|
||||||
|
"message": "using a vector filter requires enabling the `multimodal` experimental feature. See https://github.com/orgs/meilisearch/discussions/846\n1:9 _vectors EXISTS",
|
||||||
|
"code": "feature_not_enabled",
|
||||||
|
"type": "invalid_request",
|
||||||
|
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
|
||||||
|
}
|
||||||
|
"#);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn vector_filter_negation() {
|
||||||
|
let index = shared_index_for_fragments().await;
|
||||||
|
|
||||||
|
let (value, _code) = index
|
||||||
|
.search_post(json!({
|
||||||
|
"filter": "_vectors.rest.userProvided NOT EXISTS",
|
||||||
|
"attributesToRetrieve": ["name"]
|
||||||
|
}))
|
||||||
|
.await;
|
||||||
|
snapshot!(value, @r#"
|
||||||
|
{
|
||||||
|
"hits": [
|
||||||
|
{
|
||||||
|
"name": "kefir"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "intel"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "dustin"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"query": "",
|
||||||
|
"processingTimeMs": "[duration]",
|
||||||
|
"limit": 20,
|
||||||
|
"offset": 0,
|
||||||
|
"estimatedTotalHits": 3
|
||||||
|
}
|
||||||
|
"#);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn vector_filter_or_combination() {
|
||||||
|
let index = shared_index_for_fragments().await;
|
||||||
|
|
||||||
|
let (value, _code) = index
|
||||||
|
.search_post(json!({
|
||||||
|
"filter": "_vectors.rest.fragments.withBreed EXISTS OR _vectors.rest.userProvided EXISTS",
|
||||||
|
"attributesToRetrieve": ["name"]
|
||||||
|
}))
|
||||||
|
.await;
|
||||||
|
snapshot!(value, @r#"
|
||||||
|
{
|
||||||
|
"hits": [
|
||||||
|
{
|
||||||
|
"name": "echo"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "intel"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "dustin"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"query": "",
|
||||||
|
"processingTimeMs": "[duration]",
|
||||||
|
"limit": 20,
|
||||||
|
"offset": 0,
|
||||||
|
"estimatedTotalHits": 3
|
||||||
|
}
|
||||||
|
"#);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn vector_filter_regenerate() {
|
||||||
|
let index = shared_index_for_fragments().await;
|
||||||
|
|
||||||
|
let (value, _code) = index
|
||||||
|
.search_post(json!({
|
||||||
|
"filter": format!("_vectors.rest.regenerate EXISTS"),
|
||||||
|
"attributesToRetrieve": ["name"]
|
||||||
|
}))
|
||||||
|
.await;
|
||||||
|
snapshot!(value, @r#"
|
||||||
|
{
|
||||||
|
"hits": [
|
||||||
|
{
|
||||||
|
"name": "kefir"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "intel"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "dustin"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"query": "",
|
||||||
|
"processingTimeMs": "[duration]",
|
||||||
|
"limit": 20,
|
||||||
|
"offset": 0,
|
||||||
|
"estimatedTotalHits": 3
|
||||||
|
}
|
||||||
|
"#);
|
||||||
|
}
|
||||||
|
@ -14,8 +14,9 @@ use meilisearch::option::MaxThreads;
|
|||||||
use crate::common::index::Index;
|
use crate::common::index::Index;
|
||||||
use crate::common::{default_settings, GetAllDocumentsOptions, Server};
|
use crate::common::{default_settings, GetAllDocumentsOptions, Server};
|
||||||
use crate::json;
|
use crate::json;
|
||||||
|
pub use rest::create_mock;
|
||||||
|
|
||||||
async fn get_server_vector() -> Server {
|
pub async fn get_server_vector() -> Server {
|
||||||
Server::new().await
|
Server::new().await
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -12,7 +12,7 @@ use crate::common::Value;
|
|||||||
use crate::json;
|
use crate::json;
|
||||||
use crate::vector::{get_server_vector, GetAllDocumentsOptions};
|
use crate::vector::{get_server_vector, GetAllDocumentsOptions};
|
||||||
|
|
||||||
async fn create_mock() -> (&'static MockServer, Value) {
|
pub async fn create_mock() -> (&'static MockServer, Value) {
|
||||||
let mock_server = Box::leak(Box::new(MockServer::start().await));
|
let mock_server = Box::leak(Box::new(MockServer::start().await));
|
||||||
|
|
||||||
let text_to_embedding: BTreeMap<_, _> = vec![
|
let text_to_embedding: BTreeMap<_, _> = vec![
|
||||||
|
@ -639,3 +639,29 @@ fn conditionally_lookup_for_error_message() {
|
|||||||
assert_eq!(err.to_string(), format!("{} {}", prefix, suffix));
|
assert_eq!(err.to_string(), format!("{} {}", prefix, suffix));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub struct DidYouMean<'a>(Option<&'a str>);
|
||||||
|
|
||||||
|
impl<'a> DidYouMean<'a> {
|
||||||
|
pub fn new(key: &str, keys: &'a [String]) -> DidYouMean<'a> {
|
||||||
|
let typos = levenshtein_automata::LevenshteinAutomatonBuilder::new(2, true).build_dfa(key);
|
||||||
|
for key in keys.iter() {
|
||||||
|
match typos.eval(key) {
|
||||||
|
levenshtein_automata::Distance::Exact(_) => {
|
||||||
|
return DidYouMean(Some(key));
|
||||||
|
}
|
||||||
|
levenshtein_automata::Distance::AtLeast(_) => continue,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
DidYouMean(None)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl std::fmt::Display for DidYouMean<'_> {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
if let Some(suggestion) = self.0 {
|
||||||
|
write!(f, " Did you mean `{suggestion}`?")?;
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -111,7 +111,7 @@ impl FilterableAttributesFeatures {
|
|||||||
self.filter.is_filterable_null()
|
self.filter.is_filterable_null()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Check if `IS EXISTS` is allowed
|
/// Check if `EXISTS` is allowed
|
||||||
pub fn is_filterable_exists(&self) -> bool {
|
pub fn is_filterable_exists(&self) -> bool {
|
||||||
self.filter.is_filterable_exists()
|
self.filter.is_filterable_exists()
|
||||||
}
|
}
|
||||||
|
@ -12,7 +12,7 @@ use roaring::{MultiOps, RoaringBitmap};
|
|||||||
use serde_json::Value;
|
use serde_json::Value;
|
||||||
|
|
||||||
use super::facet_range_search;
|
use super::facet_range_search;
|
||||||
use crate::constants::RESERVED_GEO_FIELD_NAME;
|
use crate::constants::{RESERVED_GEO_FIELD_NAME, RESERVED_VECTORS_FIELD_NAME};
|
||||||
use crate::error::{Error, UserError};
|
use crate::error::{Error, UserError};
|
||||||
use crate::filterable_attributes_rules::{filtered_matching_patterns, matching_features};
|
use crate::filterable_attributes_rules::{filtered_matching_patterns, matching_features};
|
||||||
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec};
|
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec};
|
||||||
@ -228,6 +228,10 @@ impl<'a> Filter<'a> {
|
|||||||
pub fn use_contains_operator(&self) -> Option<&Token> {
|
pub fn use_contains_operator(&self) -> Option<&Token> {
|
||||||
self.condition.use_contains_operator()
|
self.condition.use_contains_operator()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn use_vector_filter(&self) -> Option<&Token> {
|
||||||
|
self.condition.use_vector_filter()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> Filter<'a> {
|
impl<'a> Filter<'a> {
|
||||||
@ -235,10 +239,12 @@ impl<'a> Filter<'a> {
|
|||||||
// to avoid doing this for each recursive call we're going to do it ONCE ahead of time
|
// to avoid doing this for each recursive call we're going to do it ONCE ahead of time
|
||||||
let fields_ids_map = index.fields_ids_map(rtxn)?;
|
let fields_ids_map = index.fields_ids_map(rtxn)?;
|
||||||
let filterable_attributes_rules = index.filterable_attributes_rules(rtxn)?;
|
let filterable_attributes_rules = index.filterable_attributes_rules(rtxn)?;
|
||||||
|
|
||||||
for fid in self.condition.fids(MAX_FILTER_DEPTH) {
|
for fid in self.condition.fids(MAX_FILTER_DEPTH) {
|
||||||
let attribute = fid.value();
|
let attribute = fid.value();
|
||||||
if matching_features(attribute, &filterable_attributes_rules)
|
if matching_features(attribute, &filterable_attributes_rules)
|
||||||
.is_some_and(|(_, features)| features.is_filterable())
|
.is_some_and(|(_, features)| features.is_filterable())
|
||||||
|
|| attribute == RESERVED_VECTORS_FIELD_NAME
|
||||||
{
|
{
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@ -578,7 +584,8 @@ impl<'a> Filter<'a> {
|
|||||||
.union()
|
.union()
|
||||||
}
|
}
|
||||||
FilterCondition::Condition { fid, op } => {
|
FilterCondition::Condition { fid, op } => {
|
||||||
let Some(field_id) = field_ids_map.id(fid.value()) else {
|
let value = fid.value();
|
||||||
|
let Some(field_id) = field_ids_map.id(value) else {
|
||||||
return Ok(RoaringBitmap::new());
|
return Ok(RoaringBitmap::new());
|
||||||
};
|
};
|
||||||
let Some((rule_index, features)) =
|
let Some((rule_index, features)) =
|
||||||
@ -635,6 +642,9 @@ impl<'a> Filter<'a> {
|
|||||||
Ok(RoaringBitmap::new())
|
Ok(RoaringBitmap::new())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
FilterCondition::VectorExists { fid: _, embedder, filter } => {
|
||||||
|
super::filter_vector::evaluate(rtxn, index, universe, embedder.clone(), filter)
|
||||||
|
}
|
||||||
FilterCondition::GeoLowerThan { point, radius } => {
|
FilterCondition::GeoLowerThan { point, radius } => {
|
||||||
if index.is_geo_filtering_enabled(rtxn)? {
|
if index.is_geo_filtering_enabled(rtxn)? {
|
||||||
let base_point: [f64; 2] =
|
let base_point: [f64; 2] =
|
||||||
|
157
crates/milli/src/search/facet/filter_vector.rs
Normal file
157
crates/milli/src/search/facet/filter_vector.rs
Normal file
@ -0,0 +1,157 @@
|
|||||||
|
use filter_parser::{Token, VectorFilter};
|
||||||
|
use roaring::{MultiOps, RoaringBitmap};
|
||||||
|
|
||||||
|
use crate::error::{DidYouMean, Error};
|
||||||
|
use crate::vector::db::IndexEmbeddingConfig;
|
||||||
|
use crate::vector::{ArroyStats, ArroyWrapper};
|
||||||
|
use crate::Index;
|
||||||
|
|
||||||
|
#[derive(Debug, thiserror::Error)]
|
||||||
|
pub enum VectorFilterError<'a> {
|
||||||
|
#[error("The embedder `{}` does not exist. {}", embedder.value(), {
|
||||||
|
if available.is_empty() {
|
||||||
|
String::from("This index does not have any configured embedders.")
|
||||||
|
} else {
|
||||||
|
let mut available = available.clone();
|
||||||
|
available.sort_unstable();
|
||||||
|
let did_you_mean = DidYouMean::new(embedder.value(), &available);
|
||||||
|
format!("Available embedders are: {}.{did_you_mean}", available.iter().map(|e| format!("`{e}`")).collect::<Vec<_>>().join(", "))
|
||||||
|
}
|
||||||
|
})]
|
||||||
|
EmbedderDoesNotExist { embedder: &'a Token<'a>, available: Vec<String> },
|
||||||
|
|
||||||
|
#[error("The fragment `{}` does not exist on embedder `{}`. {}", fragment.value(), embedder.value(), {
|
||||||
|
if available.is_empty() {
|
||||||
|
String::from("This embedder does not have any configured fragments.")
|
||||||
|
} else {
|
||||||
|
let mut available = available.clone();
|
||||||
|
available.sort_unstable();
|
||||||
|
let did_you_mean = DidYouMean::new(fragment.value(), &available);
|
||||||
|
format!("Available fragments on this embedder are: {}.{did_you_mean}", available.iter().map(|f| format!("`{f}`")).collect::<Vec<_>>().join(", "))
|
||||||
|
}
|
||||||
|
})]
|
||||||
|
FragmentDoesNotExist {
|
||||||
|
embedder: &'a Token<'a>,
|
||||||
|
fragment: &'a Token<'a>,
|
||||||
|
available: Vec<String>,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
use VectorFilterError::*;
|
||||||
|
|
||||||
|
impl<'a> From<VectorFilterError<'a>> for Error {
|
||||||
|
fn from(err: VectorFilterError<'a>) -> Self {
|
||||||
|
match &err {
|
||||||
|
EmbedderDoesNotExist { embedder: token, .. }
|
||||||
|
| FragmentDoesNotExist { fragment: token, .. } => token.as_external_error(err).into(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(super) fn evaluate(
|
||||||
|
rtxn: &heed::RoTxn<'_>,
|
||||||
|
index: &Index,
|
||||||
|
universe: Option<&RoaringBitmap>,
|
||||||
|
embedder: Option<Token<'_>>,
|
||||||
|
filter: &VectorFilter<'_>,
|
||||||
|
) -> crate::Result<RoaringBitmap> {
|
||||||
|
let index_embedding_configs = index.embedding_configs();
|
||||||
|
let embedding_configs = index_embedding_configs.embedding_configs(rtxn)?;
|
||||||
|
|
||||||
|
let embedders = match embedder {
|
||||||
|
Some(embedder) => vec![embedder],
|
||||||
|
None => embedding_configs.iter().map(|config| Token::from(config.name.as_str())).collect(),
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut docids = embedders
|
||||||
|
.iter()
|
||||||
|
.map(|e| evaluate_inner(rtxn, index, e, &embedding_configs, filter))
|
||||||
|
.union()?;
|
||||||
|
|
||||||
|
if let Some(universe) = universe {
|
||||||
|
docids &= universe;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(docids)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn evaluate_inner(
|
||||||
|
rtxn: &heed::RoTxn<'_>,
|
||||||
|
index: &Index,
|
||||||
|
embedder: &Token<'_>,
|
||||||
|
embedding_configs: &[IndexEmbeddingConfig],
|
||||||
|
filter: &VectorFilter<'_>,
|
||||||
|
) -> crate::Result<RoaringBitmap> {
|
||||||
|
let embedder_name = embedder.value();
|
||||||
|
let available_embedders =
|
||||||
|
|| embedding_configs.iter().map(|c| c.name.clone()).collect::<Vec<_>>();
|
||||||
|
|
||||||
|
let embedding_config = embedding_configs
|
||||||
|
.iter()
|
||||||
|
.find(|config| config.name == embedder_name)
|
||||||
|
.ok_or_else(|| EmbedderDoesNotExist { embedder, available: available_embedders() })?;
|
||||||
|
|
||||||
|
let embedder_info = index
|
||||||
|
.embedding_configs()
|
||||||
|
.embedder_info(rtxn, embedder_name)?
|
||||||
|
.ok_or_else(|| EmbedderDoesNotExist { embedder, available: available_embedders() })?;
|
||||||
|
|
||||||
|
let arroy_wrapper = ArroyWrapper::new(
|
||||||
|
index.vector_arroy,
|
||||||
|
embedder_info.embedder_id,
|
||||||
|
embedding_config.config.quantized(),
|
||||||
|
);
|
||||||
|
|
||||||
|
let docids = match filter {
|
||||||
|
VectorFilter::Fragment(fragment) => {
|
||||||
|
let fragment_name = fragment.value();
|
||||||
|
let fragment_config = embedding_config
|
||||||
|
.fragments
|
||||||
|
.as_slice()
|
||||||
|
.iter()
|
||||||
|
.find(|fragment| fragment.name == fragment_name)
|
||||||
|
.ok_or_else(|| FragmentDoesNotExist {
|
||||||
|
embedder,
|
||||||
|
fragment,
|
||||||
|
available: embedding_config
|
||||||
|
.fragments
|
||||||
|
.as_slice()
|
||||||
|
.iter()
|
||||||
|
.map(|f| f.name.clone())
|
||||||
|
.collect(),
|
||||||
|
})?;
|
||||||
|
|
||||||
|
let user_provided_docids = embedder_info.embedding_status.user_provided_docids();
|
||||||
|
arroy_wrapper.items_in_store(rtxn, fragment_config.id, |bitmap| {
|
||||||
|
bitmap.clone() - user_provided_docids
|
||||||
|
})?
|
||||||
|
}
|
||||||
|
VectorFilter::DocumentTemplate => {
|
||||||
|
if !embedding_config.fragments.as_slice().is_empty() {
|
||||||
|
return Ok(RoaringBitmap::new());
|
||||||
|
}
|
||||||
|
|
||||||
|
let user_provided_docids = embedder_info.embedding_status.user_provided_docids();
|
||||||
|
let mut stats = ArroyStats::default();
|
||||||
|
arroy_wrapper.aggregate_stats(rtxn, &mut stats)?;
|
||||||
|
stats.documents - user_provided_docids.clone()
|
||||||
|
}
|
||||||
|
VectorFilter::UserProvided => {
|
||||||
|
let user_provided_docids = embedder_info.embedding_status.user_provided_docids();
|
||||||
|
user_provided_docids.clone()
|
||||||
|
}
|
||||||
|
VectorFilter::Regenerate => {
|
||||||
|
let mut stats = ArroyStats::default();
|
||||||
|
arroy_wrapper.aggregate_stats(rtxn, &mut stats)?;
|
||||||
|
let skip_regenerate = embedder_info.embedding_status.skip_regenerate_docids();
|
||||||
|
stats.documents - skip_regenerate
|
||||||
|
}
|
||||||
|
VectorFilter::None => {
|
||||||
|
let mut stats = ArroyStats::default();
|
||||||
|
arroy_wrapper.aggregate_stats(rtxn, &mut stats)?;
|
||||||
|
stats.documents
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok(docids)
|
||||||
|
}
|
@ -17,6 +17,7 @@ mod facet_range_search;
|
|||||||
mod facet_sort_ascending;
|
mod facet_sort_ascending;
|
||||||
mod facet_sort_descending;
|
mod facet_sort_descending;
|
||||||
mod filter;
|
mod filter;
|
||||||
|
mod filter_vector;
|
||||||
mod search;
|
mod search;
|
||||||
|
|
||||||
fn facet_extreme_value<'t>(
|
fn facet_extreme_value<'t>(
|
||||||
|
@ -1339,9 +1339,8 @@ fn vectors_are_never_indexed_as_searchable_or_filterable() {
|
|||||||
assert!(results.candidates.is_empty());
|
assert!(results.candidates.is_empty());
|
||||||
|
|
||||||
let mut search = index.search(&rtxn);
|
let mut search = index.search(&rtxn);
|
||||||
let results = search
|
let results =
|
||||||
.filter(Filter::from_str("_vectors.doggo = 6789").unwrap().unwrap())
|
dbg!(search.filter(Filter::from_str("_vectors.doggo = 6789").unwrap().unwrap()).execute())
|
||||||
.execute()
|
|
||||||
.unwrap();
|
.unwrap();
|
||||||
assert!(results.candidates.is_empty());
|
assert!(results.candidates.is_empty());
|
||||||
|
|
||||||
|
@ -128,6 +128,7 @@ impl EmbeddingStatus {
|
|||||||
pub fn is_user_provided(&self, docid: DocumentId) -> bool {
|
pub fn is_user_provided(&self, docid: DocumentId) -> bool {
|
||||||
self.user_provided.contains(docid)
|
self.user_provided.contains(docid)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Whether vectors should be regenerated for that document and that embedder.
|
/// Whether vectors should be regenerated for that document and that embedder.
|
||||||
pub fn must_regenerate(&self, docid: DocumentId) -> bool {
|
pub fn must_regenerate(&self, docid: DocumentId) -> bool {
|
||||||
let invert = self.skip_regenerate_different_from_user_provided.contains(docid);
|
let invert = self.skip_regenerate_different_from_user_provided.contains(docid);
|
||||||
|
@ -556,9 +556,6 @@ impl ArroyWrapper {
|
|||||||
for reader in self.readers(rtxn, self.quantized_db()) {
|
for reader in self.readers(rtxn, self.quantized_db()) {
|
||||||
let reader = reader?;
|
let reader = reader?;
|
||||||
let documents = reader.item_ids();
|
let documents = reader.item_ids();
|
||||||
if documents.is_empty() {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
stats.documents |= documents;
|
stats.documents |= documents;
|
||||||
stats.number_of_embeddings += documents.len();
|
stats.number_of_embeddings += documents.len();
|
||||||
}
|
}
|
||||||
@ -566,9 +563,6 @@ impl ArroyWrapper {
|
|||||||
for reader in self.readers(rtxn, self.angular_db()) {
|
for reader in self.readers(rtxn, self.angular_db()) {
|
||||||
let reader = reader?;
|
let reader = reader?;
|
||||||
let documents = reader.item_ids();
|
let documents = reader.item_ids();
|
||||||
if documents.is_empty() {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
stats.documents |= documents;
|
stats.documents |= documents;
|
||||||
stats.number_of_embeddings += documents.len();
|
stats.number_of_embeddings += documents.len();
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user