Improve error handling

This commit is contained in:
Mubelotix
2025-07-08 11:39:10 +02:00
parent 2d45124d9b
commit 0301d8f239
4 changed files with 342 additions and 87 deletions

View File

@ -60,7 +60,7 @@ use nom::combinator::{cut, eof, map, opt};
use nom::multi::{many0, separated_list1};
use nom::number::complete::recognize_float;
use nom::sequence::{delimited, preceded, terminated, tuple};
use nom::Finish;
use nom::{Finish, Slice};
use nom_locate::LocatedSpan;
pub(crate) use value::parse_value;
use value::word_exact;
@ -121,6 +121,16 @@ impl<'a> Token<'a> {
Err(Error::new_from_kind(self.span, ErrorKind::NonFiniteFloat))
}
}
/// Split the token by a delimiter and return an iterator of tokens.
/// Each token in the iterator will have its own span that corresponds to a slice of the original token's span.
pub fn split(&self, delimiter: &'a str) -> impl Iterator<Item = Token<'a>> + '_ {
let original_addr = self.value().as_ptr() as usize;
self.value().split(delimiter).map(move |part| {
let offset = part.as_ptr() as usize - original_addr;
Token::new(self.span.slice(offset..offset + part.len()), Some(part.to_string()))
})
}
}
impl<'a> From<Span<'a>> for Token<'a> {
@ -604,6 +614,8 @@ impl std::fmt::Display for Token<'_> {
#[cfg(test)]
pub mod tests {
use std::fmt::format;
use FilterCondition as Fc;
use super::*;
@ -1043,4 +1055,103 @@ pub mod tests {
let token: Token = s.into();
assert_eq!(token.value(), s);
}
#[test]
fn split() {
let s = "test string that should not be parsed\n newline";
let token: Token = s.into();
let parts: Vec<_> = token.split(" ").collect();
insta::assert_snapshot!(format!("{parts:#?}"), @r#"
[
Token {
span: LocatedSpan {
offset: 0,
line: 1,
fragment: "test",
extra: "test string that should not be parsed\n newline",
},
value: Some(
"test",
),
},
Token {
span: LocatedSpan {
offset: 5,
line: 1,
fragment: "string",
extra: "test string that should not be parsed\n newline",
},
value: Some(
"string",
),
},
Token {
span: LocatedSpan {
offset: 12,
line: 1,
fragment: "that",
extra: "test string that should not be parsed\n newline",
},
value: Some(
"that",
),
},
Token {
span: LocatedSpan {
offset: 17,
line: 1,
fragment: "should",
extra: "test string that should not be parsed\n newline",
},
value: Some(
"should",
),
},
Token {
span: LocatedSpan {
offset: 24,
line: 1,
fragment: "not",
extra: "test string that should not be parsed\n newline",
},
value: Some(
"not",
),
},
Token {
span: LocatedSpan {
offset: 28,
line: 1,
fragment: "be",
extra: "test string that should not be parsed\n newline",
},
value: Some(
"be",
),
},
Token {
span: LocatedSpan {
offset: 31,
line: 1,
fragment: "parsed\n",
extra: "test string that should not be parsed\n newline",
},
value: Some(
"parsed\n",
),
},
Token {
span: LocatedSpan {
offset: 39,
line: 2,
fragment: "newline",
extra: "test string that should not be parsed\n newline",
},
value: Some(
"newline",
),
},
]
"#);
}
}

View File

@ -736,10 +736,12 @@ async fn test_filterable_attributes_priority() {
async fn test_vector_filter() {
let index = crate::vector::shared_index_for_fragments().await;
let (value, _code) = index.search_post(json!({
"filter": "_vectors EXISTS",
"attributesToRetrieve": ["id"]
})).await;
let (value, _code) = index
.search_post(json!({
"filter": "_vectors EXISTS",
"attributesToRetrieve": ["id"]
}))
.await;
snapshot!(value, @r#"
{
"hits": [
@ -764,42 +766,44 @@ async fn test_vector_filter() {
}
"#);
let (value, _code) = index.search_post(json!({
"filter": "_vectors.other EXISTS",
"attributesToRetrieve": ["id"]
})).await;
let (value, _code) = index
.search_post(json!({
"filter": "_vectors.other EXISTS",
"attributesToRetrieve": ["id"]
}))
.await;
snapshot!(value, @r#"
{
"hits": [],
"query": "",
"processingTimeMs": "[duration]",
"limit": 20,
"offset": 0,
"estimatedTotalHits": 0
"message": "Index `[uuid]`: The embedder `other` does not exist. Available embedders are: `rest`.\n10:15 _vectors.other EXISTS",
"code": "invalid_search_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
}
"#);
// This one is counterintuitive, but it is the same as the previous one.
// It's because userProvided is interpreted as an embedder name
let (value, _code) = index.search_post(json!({
"filter": "_vectors.userProvided EXISTS",
"attributesToRetrieve": ["id"]
})).await;
let (value, _code) = index
.search_post(json!({
"filter": "_vectors.userProvided EXISTS",
"attributesToRetrieve": ["id"]
}))
.await;
snapshot!(value, @r#"
{
"hits": [],
"query": "",
"processingTimeMs": "[duration]",
"limit": 20,
"offset": 0,
"estimatedTotalHits": 0
"message": "Index `[uuid]`: The embedder `userProvided` does not exist. Available embedders are: `rest`.\n10:22 _vectors.userProvided EXISTS",
"code": "invalid_search_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
}
"#);
let (value, _code) = index.search_post(json!({
"filter": "_vectors.rest EXISTS",
"attributesToRetrieve": ["id"]
})).await;
let (value, _code) = index
.search_post(json!({
"filter": "_vectors.rest EXISTS",
"attributesToRetrieve": ["id"]
}))
.await;
snapshot!(value, @r#"
{
"hits": [
@ -824,10 +828,12 @@ async fn test_vector_filter() {
}
"#);
let (value, _code) = index.search_post(json!({
"filter": "_vectors.rest.userProvided EXISTS",
"attributesToRetrieve": ["id"]
})).await;
let (value, _code) = index
.search_post(json!({
"filter": "_vectors.rest.userProvided EXISTS",
"attributesToRetrieve": ["id"]
}))
.await;
snapshot!(value, @r#"
{
"hits": [
@ -843,10 +849,12 @@ async fn test_vector_filter() {
}
"#);
let (value, _code) = index.search_post(json!({
"filter": "_vectors.rest.fragments.withBreed EXISTS",
"attributesToRetrieve": ["id"]
})).await;
let (value, _code) = index
.search_post(json!({
"filter": "_vectors.rest.fragments.withBreed EXISTS",
"attributesToRetrieve": ["id"]
}))
.await;
snapshot!(value, @r#"
{
"hits": [
@ -865,10 +873,12 @@ async fn test_vector_filter() {
}
"#);
let (value, _code) = index.search_post(json!({
"filter": "_vectors.rest.fragments.basic EXISTS",
"attributesToRetrieve": ["id"]
})).await;
let (value, _code) = index
.search_post(json!({
"filter": "_vectors.rest.fragments.basic EXISTS",
"attributesToRetrieve": ["id"]
}))
.await;
snapshot!(value, @r#"
{
"hits": [
@ -892,4 +902,19 @@ async fn test_vector_filter() {
"estimatedTotalHits": 4
}
"#);
let (value, _code) = index
.search_post(json!({
"filter": "_vectors.rest.fragments.other EXISTS",
"attributesToRetrieve": ["id"]
}))
.await;
snapshot!(value, @r#"
{
"message": "Index `[uuid]`: The fragment `other` does not exist on embedder `rest`. Available fragments on this embedder are: `basic`, `withBreed`.\n25:30 _vectors.rest.fragments.other EXISTS",
"code": "invalid_search_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
}
"#);
}

View File

@ -241,7 +241,8 @@ impl<'a> Filter<'a> {
let attribute = fid.value();
if matching_features(attribute, &filterable_attributes_rules)
.is_some_and(|(_, features)| features.is_filterable()) || VectorFilter::matches(attribute)
.is_some_and(|(_, features)| features.is_filterable())
|| VectorFilter::matches(attribute)
{
continue;
}
@ -548,11 +549,11 @@ impl<'a> Filter<'a> {
let value = fid.value();
if VectorFilter::matches(value) {
if !matches!(op, Condition::Exists) {
return Err(Error::UserError(UserError::InvalidFilter(
String::from("Vector filter can only be used with the `exists` operator"),
)));
return Err(Error::UserError(UserError::InvalidFilter(String::from(
"Vector filter can only be used with the `exists` operator",
))));
}
let vector_filter = VectorFilter::parse(value)?;
let vector_filter = VectorFilter::parse(fid)?;
return vector_filter.evaluate(rtxn, index, universe);
}

View File

@ -1,16 +1,118 @@
use filter_parser::Token;
use roaring::RoaringBitmap;
use crate::error::{Error, UserError};
use crate::vector::{ArroyStats, ArroyWrapper};
use crate::{Index, Result};
use crate::Index;
pub(super) struct VectorFilter<'a> {
embedder_name: Option<&'a str>,
fragment_name: Option<&'a str>,
embedder_token: Option<Token<'a>>,
fragment_token: Option<Token<'a>>,
user_provided: bool,
// TODO: not_user_provided: bool,
}
#[derive(Debug)]
pub enum VectorFilterError<'a> {
EmptyFilter,
InvalidPrefix(Token<'a>),
MissingFragmentName(Token<'a>),
UserProvidedWithFragment(Token<'a>),
LeftoverToken(Token<'a>),
EmbedderDoesNotExist {
embedder: &'a Token<'a>,
available: Vec<String>,
},
FragmentDoesNotExist {
embedder: &'a Token<'a>,
fragment: &'a Token<'a>,
available: Vec<String>,
},
}
use VectorFilterError::*;
impl std::error::Error for VectorFilterError<'_> {}
impl std::fmt::Display for VectorFilterError<'_> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
EmptyFilter => {
write!(f, "Vector filter cannot be empty.")
}
InvalidPrefix(prefix) => {
write!(
f,
"Vector filter must start with `_vectors` but found `{}`.",
prefix.value()
)
}
MissingFragmentName(_token) => {
write!(f, "Vector filter is inconsistent: either specify a fragment name or remove the `fragments` part.")
}
UserProvidedWithFragment(_token) => {
write!(f, "Vector filter cannot specify both a fragment name and userProvided.")
}
LeftoverToken(token) => {
write!(f, "Vector filter has leftover token: `{}`.", token.value())
}
EmbedderDoesNotExist { embedder, available } => {
write!(f, "The embedder `{}` does not exist.", embedder.value())?;
if available.is_empty() {
write!(f, " This index does not have configured embedders.")
} else {
write!(f, " Available embedders are: ")?;
let mut available = available.clone();
available.sort_unstable();
for (idx, embedder) in available.iter().enumerate() {
write!(f, "`{embedder}`")?;
if idx != available.len() - 1 {
write!(f, ", ")?;
}
}
write!(f, ".")
}
}
FragmentDoesNotExist { embedder, fragment, available } => {
write!(
f,
"The fragment `{}` does not exist on embedder `{}`.",
fragment.value(),
embedder.value(),
)?;
if available.is_empty() {
write!(f, " This embedder does not have configured fragments.")
} else {
write!(f, " Available fragments on this embedder are: ")?;
let mut available = available.clone();
available.sort_unstable();
for (idx, fragment) in available.iter().enumerate() {
write!(f, "`{fragment}`")?;
if idx != available.len() - 1 {
write!(f, ", ")?;
}
}
write!(f, ".")
}
}
}
}
}
impl<'a> From<VectorFilterError<'a>> for Error {
fn from(err: VectorFilterError<'a>) -> Self {
match &err {
EmptyFilter => Error::UserError(UserError::InvalidFilter(err.to_string())),
InvalidPrefix(token)
| MissingFragmentName(token)
| UserProvidedWithFragment(token)
| LeftoverToken(token) => token.clone().as_external_error(err).into(),
EmbedderDoesNotExist { embedder: token, .. }
| FragmentDoesNotExist { fragment: token, .. } => token.as_external_error(err).into(),
}
}
}
impl<'a> VectorFilter<'a> {
pub(super) fn matches(value: &str) -> bool {
value.starts_with("_vectors.") || value == "_vectors"
@ -23,69 +125,72 @@ impl<'a> VectorFilter<'a> {
/// - `_vectors.{embedder_name}`
/// - `_vectors.{embedder_name}.userProvided`
/// - `_vectors.{embedder_name}.fragments.{fragment_name}`
pub(super) fn parse(s: &'a str) -> Result<Self> {
let mut split = s.split('.').peekable();
pub(super) fn parse(s: &'a Token<'a>) -> Result<Self, VectorFilterError<'a>> {
let mut split = s.split(".").peekable();
if split.next() != Some("_vectors") {
return Err(Error::UserError(UserError::InvalidFilter(String::from(
"Vector filter must start with '_vectors'",
))));
match split.next() {
Some(token) if token.value() == "_vectors" => (),
Some(token) => return Err(InvalidPrefix(token)),
None => return Err(EmptyFilter),
}
let embedder_name = split.next();
let mut fragment_name = None;
if split.peek() == Some(&"fragments") {
split.next();
if split.peek().map(|t| t.value()) == Some("fragments") {
let token = split.next().expect("it was peeked before");
fragment_name = Some(split.next().ok_or_else(|| {
Error::UserError(UserError::InvalidFilter(
String::from("Vector filter is inconsistent: either specify a fragment name or remove the 'fragments' part"),
))
})?);
fragment_name = Some(split.next().ok_or(MissingFragmentName(token))?);
}
let mut user_provided = false;
if split.peek() == Some(&"userProvided") || split.peek() == Some(&"user_provided") {
split.next();
user_provided = true;
let mut user_provided_token = None;
if split.peek().map(|t| t.value()) == Some("userProvided")
|| split.peek().map(|t| t.value()) == Some("user_provided")
{
user_provided_token = split.next();
}
if fragment_name.is_some() && user_provided {
return Err(Error::UserError(UserError::InvalidFilter(
String::from("Vector filter cannot specify both a fragment name and userProvided"),
)));
if let (Some(_), Some(user_provided_token)) = (&fragment_name, &user_provided_token) {
return Err(UserProvidedWithFragment(user_provided_token.clone()))?;
}
if let Some(next) = split.next() {
return Err(Error::UserError(UserError::InvalidFilter(format!(
"Unexpected part in vector filter: '{next}'"
))));
return Err(LeftoverToken(next))?;
}
Ok(Self { embedder_name, fragment_name, user_provided })
Ok(Self {
embedder_token: embedder_name,
fragment_token: fragment_name,
user_provided: user_provided_token.is_some(),
})
}
pub(super) fn evaluate(
&self,
self,
rtxn: &heed::RoTxn<'_>,
index: &Index,
universe: Option<&RoaringBitmap>,
) -> Result<RoaringBitmap> {
) -> crate::Result<RoaringBitmap> {
let index_embedding_configs = index.embedding_configs();
let embedding_configs = index_embedding_configs.embedding_configs(rtxn)?;
let mut embedders = Vec::new();
if let Some(embedder_name) = self.embedder_name {
if let Some(embedder_token) = &self.embedder_token {
let embedder_name = embedder_token.value();
let Some(embedder_config) =
embedding_configs.iter().find(|config| config.name == embedder_name)
else {
return Ok(RoaringBitmap::new());
return Err(EmbedderDoesNotExist {
embedder: embedder_token,
available: embedding_configs.iter().map(|c| c.name.clone()).collect(),
})?;
};
let Some(embedder_info) =
index_embedding_configs.embedder_info(rtxn, embedder_name)?
let Some(embedder_info) = index_embedding_configs.embedder_info(rtxn, embedder_name)?
else {
return Ok(RoaringBitmap::new());
return Err(EmbedderDoesNotExist {
embedder: embedder_token,
available: embedding_configs.iter().map(|c| c.name.clone()).collect(),
})?;
};
embedders.push((embedder_config, embedder_info));
@ -108,14 +213,27 @@ impl<'a> VectorFilter<'a> {
embedder_config.config.quantized(),
);
let mut new_docids = if let Some(fragment_name) = self.fragment_name {
let mut new_docids = if let Some(fragment_token) = &self.fragment_token {
let fragment_name = fragment_token.value();
let Some(fragment_config) = embedder_config
.fragments
.as_slice()
.iter()
.find(|fragment| fragment.name == fragment_name)
else {
return Ok(RoaringBitmap::new());
return Err(FragmentDoesNotExist {
embedder: self
.embedder_token
.as_ref()
.expect("there can't be a fragment without an embedder"),
fragment: fragment_token,
available: embedder_config
.fragments
.as_slice()
.iter()
.map(|f| f.name.clone())
.collect(),
})?;
};
arroy_wrapper.items_in_store(rtxn, fragment_config.id, |bitmap| bitmap.clone())?