Improve error handling

This commit is contained in:
Mubelotix
2025-07-08 11:39:10 +02:00
parent 2d45124d9b
commit 0301d8f239
4 changed files with 342 additions and 87 deletions

View File

@ -60,7 +60,7 @@ use nom::combinator::{cut, eof, map, opt};
use nom::multi::{many0, separated_list1}; use nom::multi::{many0, separated_list1};
use nom::number::complete::recognize_float; use nom::number::complete::recognize_float;
use nom::sequence::{delimited, preceded, terminated, tuple}; use nom::sequence::{delimited, preceded, terminated, tuple};
use nom::Finish; use nom::{Finish, Slice};
use nom_locate::LocatedSpan; use nom_locate::LocatedSpan;
pub(crate) use value::parse_value; pub(crate) use value::parse_value;
use value::word_exact; use value::word_exact;
@ -121,6 +121,16 @@ impl<'a> Token<'a> {
Err(Error::new_from_kind(self.span, ErrorKind::NonFiniteFloat)) Err(Error::new_from_kind(self.span, ErrorKind::NonFiniteFloat))
} }
} }
/// Split the token by a delimiter and return an iterator of tokens.
/// Each token in the iterator will have its own span that corresponds to a slice of the original token's span.
pub fn split(&self, delimiter: &'a str) -> impl Iterator<Item = Token<'a>> + '_ {
let original_addr = self.value().as_ptr() as usize;
self.value().split(delimiter).map(move |part| {
let offset = part.as_ptr() as usize - original_addr;
Token::new(self.span.slice(offset..offset + part.len()), Some(part.to_string()))
})
}
} }
impl<'a> From<Span<'a>> for Token<'a> { impl<'a> From<Span<'a>> for Token<'a> {
@ -604,6 +614,8 @@ impl std::fmt::Display for Token<'_> {
#[cfg(test)] #[cfg(test)]
pub mod tests { pub mod tests {
use std::fmt::format;
use FilterCondition as Fc; use FilterCondition as Fc;
use super::*; use super::*;
@ -1043,4 +1055,103 @@ pub mod tests {
let token: Token = s.into(); let token: Token = s.into();
assert_eq!(token.value(), s); assert_eq!(token.value(), s);
} }
#[test]
fn split() {
let s = "test string that should not be parsed\n newline";
let token: Token = s.into();
let parts: Vec<_> = token.split(" ").collect();
insta::assert_snapshot!(format!("{parts:#?}"), @r#"
[
Token {
span: LocatedSpan {
offset: 0,
line: 1,
fragment: "test",
extra: "test string that should not be parsed\n newline",
},
value: Some(
"test",
),
},
Token {
span: LocatedSpan {
offset: 5,
line: 1,
fragment: "string",
extra: "test string that should not be parsed\n newline",
},
value: Some(
"string",
),
},
Token {
span: LocatedSpan {
offset: 12,
line: 1,
fragment: "that",
extra: "test string that should not be parsed\n newline",
},
value: Some(
"that",
),
},
Token {
span: LocatedSpan {
offset: 17,
line: 1,
fragment: "should",
extra: "test string that should not be parsed\n newline",
},
value: Some(
"should",
),
},
Token {
span: LocatedSpan {
offset: 24,
line: 1,
fragment: "not",
extra: "test string that should not be parsed\n newline",
},
value: Some(
"not",
),
},
Token {
span: LocatedSpan {
offset: 28,
line: 1,
fragment: "be",
extra: "test string that should not be parsed\n newline",
},
value: Some(
"be",
),
},
Token {
span: LocatedSpan {
offset: 31,
line: 1,
fragment: "parsed\n",
extra: "test string that should not be parsed\n newline",
},
value: Some(
"parsed\n",
),
},
Token {
span: LocatedSpan {
offset: 39,
line: 2,
fragment: "newline",
extra: "test string that should not be parsed\n newline",
},
value: Some(
"newline",
),
},
]
"#);
}
} }

View File

@ -736,10 +736,12 @@ async fn test_filterable_attributes_priority() {
async fn test_vector_filter() { async fn test_vector_filter() {
let index = crate::vector::shared_index_for_fragments().await; let index = crate::vector::shared_index_for_fragments().await;
let (value, _code) = index.search_post(json!({ let (value, _code) = index
"filter": "_vectors EXISTS", .search_post(json!({
"attributesToRetrieve": ["id"] "filter": "_vectors EXISTS",
})).await; "attributesToRetrieve": ["id"]
}))
.await;
snapshot!(value, @r#" snapshot!(value, @r#"
{ {
"hits": [ "hits": [
@ -764,42 +766,44 @@ async fn test_vector_filter() {
} }
"#); "#);
let (value, _code) = index.search_post(json!({ let (value, _code) = index
"filter": "_vectors.other EXISTS", .search_post(json!({
"attributesToRetrieve": ["id"] "filter": "_vectors.other EXISTS",
})).await; "attributesToRetrieve": ["id"]
}))
.await;
snapshot!(value, @r#" snapshot!(value, @r#"
{ {
"hits": [], "message": "Index `[uuid]`: The embedder `other` does not exist. Available embedders are: `rest`.\n10:15 _vectors.other EXISTS",
"query": "", "code": "invalid_search_filter",
"processingTimeMs": "[duration]", "type": "invalid_request",
"limit": 20, "link": "https://docs.meilisearch.com/errors#invalid_search_filter"
"offset": 0,
"estimatedTotalHits": 0
} }
"#); "#);
// This one is counterintuitive, but it is the same as the previous one. // This one is counterintuitive, but it is the same as the previous one.
// It's because userProvided is interpreted as an embedder name // It's because userProvided is interpreted as an embedder name
let (value, _code) = index.search_post(json!({ let (value, _code) = index
"filter": "_vectors.userProvided EXISTS", .search_post(json!({
"attributesToRetrieve": ["id"] "filter": "_vectors.userProvided EXISTS",
})).await; "attributesToRetrieve": ["id"]
}))
.await;
snapshot!(value, @r#" snapshot!(value, @r#"
{ {
"hits": [], "message": "Index `[uuid]`: The embedder `userProvided` does not exist. Available embedders are: `rest`.\n10:22 _vectors.userProvided EXISTS",
"query": "", "code": "invalid_search_filter",
"processingTimeMs": "[duration]", "type": "invalid_request",
"limit": 20, "link": "https://docs.meilisearch.com/errors#invalid_search_filter"
"offset": 0,
"estimatedTotalHits": 0
} }
"#); "#);
let (value, _code) = index.search_post(json!({ let (value, _code) = index
"filter": "_vectors.rest EXISTS", .search_post(json!({
"attributesToRetrieve": ["id"] "filter": "_vectors.rest EXISTS",
})).await; "attributesToRetrieve": ["id"]
}))
.await;
snapshot!(value, @r#" snapshot!(value, @r#"
{ {
"hits": [ "hits": [
@ -824,10 +828,12 @@ async fn test_vector_filter() {
} }
"#); "#);
let (value, _code) = index.search_post(json!({ let (value, _code) = index
"filter": "_vectors.rest.userProvided EXISTS", .search_post(json!({
"attributesToRetrieve": ["id"] "filter": "_vectors.rest.userProvided EXISTS",
})).await; "attributesToRetrieve": ["id"]
}))
.await;
snapshot!(value, @r#" snapshot!(value, @r#"
{ {
"hits": [ "hits": [
@ -843,10 +849,12 @@ async fn test_vector_filter() {
} }
"#); "#);
let (value, _code) = index.search_post(json!({ let (value, _code) = index
"filter": "_vectors.rest.fragments.withBreed EXISTS", .search_post(json!({
"attributesToRetrieve": ["id"] "filter": "_vectors.rest.fragments.withBreed EXISTS",
})).await; "attributesToRetrieve": ["id"]
}))
.await;
snapshot!(value, @r#" snapshot!(value, @r#"
{ {
"hits": [ "hits": [
@ -864,11 +872,13 @@ async fn test_vector_filter() {
"estimatedTotalHits": 2 "estimatedTotalHits": 2
} }
"#); "#);
let (value, _code) = index.search_post(json!({ let (value, _code) = index
"filter": "_vectors.rest.fragments.basic EXISTS", .search_post(json!({
"attributesToRetrieve": ["id"] "filter": "_vectors.rest.fragments.basic EXISTS",
})).await; "attributesToRetrieve": ["id"]
}))
.await;
snapshot!(value, @r#" snapshot!(value, @r#"
{ {
"hits": [ "hits": [
@ -892,4 +902,19 @@ async fn test_vector_filter() {
"estimatedTotalHits": 4 "estimatedTotalHits": 4
} }
"#); "#);
let (value, _code) = index
.search_post(json!({
"filter": "_vectors.rest.fragments.other EXISTS",
"attributesToRetrieve": ["id"]
}))
.await;
snapshot!(value, @r#"
{
"message": "Index `[uuid]`: The fragment `other` does not exist on embedder `rest`. Available fragments on this embedder are: `basic`, `withBreed`.\n25:30 _vectors.rest.fragments.other EXISTS",
"code": "invalid_search_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
}
"#);
} }

View File

@ -241,7 +241,8 @@ impl<'a> Filter<'a> {
let attribute = fid.value(); let attribute = fid.value();
if matching_features(attribute, &filterable_attributes_rules) if matching_features(attribute, &filterable_attributes_rules)
.is_some_and(|(_, features)| features.is_filterable()) || VectorFilter::matches(attribute) .is_some_and(|(_, features)| features.is_filterable())
|| VectorFilter::matches(attribute)
{ {
continue; continue;
} }
@ -548,11 +549,11 @@ impl<'a> Filter<'a> {
let value = fid.value(); let value = fid.value();
if VectorFilter::matches(value) { if VectorFilter::matches(value) {
if !matches!(op, Condition::Exists) { if !matches!(op, Condition::Exists) {
return Err(Error::UserError(UserError::InvalidFilter( return Err(Error::UserError(UserError::InvalidFilter(String::from(
String::from("Vector filter can only be used with the `exists` operator"), "Vector filter can only be used with the `exists` operator",
))); ))));
} }
let vector_filter = VectorFilter::parse(value)?; let vector_filter = VectorFilter::parse(fid)?;
return vector_filter.evaluate(rtxn, index, universe); return vector_filter.evaluate(rtxn, index, universe);
} }

View File

@ -1,16 +1,118 @@
use filter_parser::Token;
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use crate::error::{Error, UserError}; use crate::error::{Error, UserError};
use crate::vector::{ArroyStats, ArroyWrapper}; use crate::vector::{ArroyStats, ArroyWrapper};
use crate::{Index, Result}; use crate::Index;
pub(super) struct VectorFilter<'a> { pub(super) struct VectorFilter<'a> {
embedder_name: Option<&'a str>, embedder_token: Option<Token<'a>>,
fragment_name: Option<&'a str>, fragment_token: Option<Token<'a>>,
user_provided: bool, user_provided: bool,
// TODO: not_user_provided: bool, // TODO: not_user_provided: bool,
} }
#[derive(Debug)]
pub enum VectorFilterError<'a> {
EmptyFilter,
InvalidPrefix(Token<'a>),
MissingFragmentName(Token<'a>),
UserProvidedWithFragment(Token<'a>),
LeftoverToken(Token<'a>),
EmbedderDoesNotExist {
embedder: &'a Token<'a>,
available: Vec<String>,
},
FragmentDoesNotExist {
embedder: &'a Token<'a>,
fragment: &'a Token<'a>,
available: Vec<String>,
},
}
use VectorFilterError::*;
impl std::error::Error for VectorFilterError<'_> {}
impl std::fmt::Display for VectorFilterError<'_> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
EmptyFilter => {
write!(f, "Vector filter cannot be empty.")
}
InvalidPrefix(prefix) => {
write!(
f,
"Vector filter must start with `_vectors` but found `{}`.",
prefix.value()
)
}
MissingFragmentName(_token) => {
write!(f, "Vector filter is inconsistent: either specify a fragment name or remove the `fragments` part.")
}
UserProvidedWithFragment(_token) => {
write!(f, "Vector filter cannot specify both a fragment name and userProvided.")
}
LeftoverToken(token) => {
write!(f, "Vector filter has leftover token: `{}`.", token.value())
}
EmbedderDoesNotExist { embedder, available } => {
write!(f, "The embedder `{}` does not exist.", embedder.value())?;
if available.is_empty() {
write!(f, " This index does not have configured embedders.")
} else {
write!(f, " Available embedders are: ")?;
let mut available = available.clone();
available.sort_unstable();
for (idx, embedder) in available.iter().enumerate() {
write!(f, "`{embedder}`")?;
if idx != available.len() - 1 {
write!(f, ", ")?;
}
}
write!(f, ".")
}
}
FragmentDoesNotExist { embedder, fragment, available } => {
write!(
f,
"The fragment `{}` does not exist on embedder `{}`.",
fragment.value(),
embedder.value(),
)?;
if available.is_empty() {
write!(f, " This embedder does not have configured fragments.")
} else {
write!(f, " Available fragments on this embedder are: ")?;
let mut available = available.clone();
available.sort_unstable();
for (idx, fragment) in available.iter().enumerate() {
write!(f, "`{fragment}`")?;
if idx != available.len() - 1 {
write!(f, ", ")?;
}
}
write!(f, ".")
}
}
}
}
}
impl<'a> From<VectorFilterError<'a>> for Error {
fn from(err: VectorFilterError<'a>) -> Self {
match &err {
EmptyFilter => Error::UserError(UserError::InvalidFilter(err.to_string())),
InvalidPrefix(token)
| MissingFragmentName(token)
| UserProvidedWithFragment(token)
| LeftoverToken(token) => token.clone().as_external_error(err).into(),
EmbedderDoesNotExist { embedder: token, .. }
| FragmentDoesNotExist { fragment: token, .. } => token.as_external_error(err).into(),
}
}
}
impl<'a> VectorFilter<'a> { impl<'a> VectorFilter<'a> {
pub(super) fn matches(value: &str) -> bool { pub(super) fn matches(value: &str) -> bool {
value.starts_with("_vectors.") || value == "_vectors" value.starts_with("_vectors.") || value == "_vectors"
@ -23,71 +125,74 @@ impl<'a> VectorFilter<'a> {
/// - `_vectors.{embedder_name}` /// - `_vectors.{embedder_name}`
/// - `_vectors.{embedder_name}.userProvided` /// - `_vectors.{embedder_name}.userProvided`
/// - `_vectors.{embedder_name}.fragments.{fragment_name}` /// - `_vectors.{embedder_name}.fragments.{fragment_name}`
pub(super) fn parse(s: &'a str) -> Result<Self> { pub(super) fn parse(s: &'a Token<'a>) -> Result<Self, VectorFilterError<'a>> {
let mut split = s.split('.').peekable(); let mut split = s.split(".").peekable();
if split.next() != Some("_vectors") { match split.next() {
return Err(Error::UserError(UserError::InvalidFilter(String::from( Some(token) if token.value() == "_vectors" => (),
"Vector filter must start with '_vectors'", Some(token) => return Err(InvalidPrefix(token)),
)))); None => return Err(EmptyFilter),
} }
let embedder_name = split.next(); let embedder_name = split.next();
let mut fragment_name = None; let mut fragment_name = None;
if split.peek() == Some(&"fragments") { if split.peek().map(|t| t.value()) == Some("fragments") {
split.next(); let token = split.next().expect("it was peeked before");
fragment_name = Some(split.next().ok_or_else(|| { fragment_name = Some(split.next().ok_or(MissingFragmentName(token))?);
Error::UserError(UserError::InvalidFilter(
String::from("Vector filter is inconsistent: either specify a fragment name or remove the 'fragments' part"),
))
})?);
} }
let mut user_provided = false; let mut user_provided_token = None;
if split.peek() == Some(&"userProvided") || split.peek() == Some(&"user_provided") { if split.peek().map(|t| t.value()) == Some("userProvided")
split.next(); || split.peek().map(|t| t.value()) == Some("user_provided")
user_provided = true; {
user_provided_token = split.next();
} }
if fragment_name.is_some() && user_provided { if let (Some(_), Some(user_provided_token)) = (&fragment_name, &user_provided_token) {
return Err(Error::UserError(UserError::InvalidFilter( return Err(UserProvidedWithFragment(user_provided_token.clone()))?;
String::from("Vector filter cannot specify both a fragment name and userProvided"),
)));
} }
if let Some(next) = split.next() { if let Some(next) = split.next() {
return Err(Error::UserError(UserError::InvalidFilter(format!( return Err(LeftoverToken(next))?;
"Unexpected part in vector filter: '{next}'"
))));
} }
Ok(Self { embedder_name, fragment_name, user_provided }) Ok(Self {
embedder_token: embedder_name,
fragment_token: fragment_name,
user_provided: user_provided_token.is_some(),
})
} }
pub(super) fn evaluate( pub(super) fn evaluate(
&self, self,
rtxn: &heed::RoTxn<'_>, rtxn: &heed::RoTxn<'_>,
index: &Index, index: &Index,
universe: Option<&RoaringBitmap>, universe: Option<&RoaringBitmap>,
) -> Result<RoaringBitmap> { ) -> crate::Result<RoaringBitmap> {
let index_embedding_configs = index.embedding_configs(); let index_embedding_configs = index.embedding_configs();
let embedding_configs = index_embedding_configs.embedding_configs(rtxn)?; let embedding_configs = index_embedding_configs.embedding_configs(rtxn)?;
let mut embedders = Vec::new(); let mut embedders = Vec::new();
if let Some(embedder_name) = self.embedder_name { if let Some(embedder_token) = &self.embedder_token {
let embedder_name = embedder_token.value();
let Some(embedder_config) = let Some(embedder_config) =
embedding_configs.iter().find(|config| config.name == embedder_name) embedding_configs.iter().find(|config| config.name == embedder_name)
else { else {
return Ok(RoaringBitmap::new()); return Err(EmbedderDoesNotExist {
embedder: embedder_token,
available: embedding_configs.iter().map(|c| c.name.clone()).collect(),
})?;
}; };
let Some(embedder_info) = let Some(embedder_info) = index_embedding_configs.embedder_info(rtxn, embedder_name)?
index_embedding_configs.embedder_info(rtxn, embedder_name)?
else { else {
return Ok(RoaringBitmap::new()); return Err(EmbedderDoesNotExist {
embedder: embedder_token,
available: embedding_configs.iter().map(|c| c.name.clone()).collect(),
})?;
}; };
embedders.push((embedder_config, embedder_info)); embedders.push((embedder_config, embedder_info));
} else { } else {
for embedder_config in embedding_configs.iter() { for embedder_config in embedding_configs.iter() {
@ -99,7 +204,7 @@ impl<'a> VectorFilter<'a> {
embedders.push((embedder_config, embedder_info)); embedders.push((embedder_config, embedder_info));
} }
}; };
let mut docids = RoaringBitmap::new(); let mut docids = RoaringBitmap::new();
for (embedder_config, embedder_info) in embedders { for (embedder_config, embedder_info) in embedders {
let arroy_wrapper = ArroyWrapper::new( let arroy_wrapper = ArroyWrapper::new(
@ -108,14 +213,27 @@ impl<'a> VectorFilter<'a> {
embedder_config.config.quantized(), embedder_config.config.quantized(),
); );
let mut new_docids = if let Some(fragment_name) = self.fragment_name { let mut new_docids = if let Some(fragment_token) = &self.fragment_token {
let fragment_name = fragment_token.value();
let Some(fragment_config) = embedder_config let Some(fragment_config) = embedder_config
.fragments .fragments
.as_slice() .as_slice()
.iter() .iter()
.find(|fragment| fragment.name == fragment_name) .find(|fragment| fragment.name == fragment_name)
else { else {
return Ok(RoaringBitmap::new()); return Err(FragmentDoesNotExist {
embedder: self
.embedder_token
.as_ref()
.expect("there can't be a fragment without an embedder"),
fragment: fragment_token,
available: embedder_config
.fragments
.as_slice()
.iter()
.map(|f| f.name.clone())
.collect(),
})?;
}; };
arroy_wrapper.items_in_store(rtxn, fragment_config.id, |bitmap| bitmap.clone())? arroy_wrapper.items_in_store(rtxn, fragment_config.id, |bitmap| bitmap.clone())?