mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-07-27 08:41:00 +00:00
Improve error handling
This commit is contained in:
@ -60,7 +60,7 @@ use nom::combinator::{cut, eof, map, opt};
|
||||
use nom::multi::{many0, separated_list1};
|
||||
use nom::number::complete::recognize_float;
|
||||
use nom::sequence::{delimited, preceded, terminated, tuple};
|
||||
use nom::Finish;
|
||||
use nom::{Finish, Slice};
|
||||
use nom_locate::LocatedSpan;
|
||||
pub(crate) use value::parse_value;
|
||||
use value::word_exact;
|
||||
@ -121,6 +121,16 @@ impl<'a> Token<'a> {
|
||||
Err(Error::new_from_kind(self.span, ErrorKind::NonFiniteFloat))
|
||||
}
|
||||
}
|
||||
|
||||
/// Split the token by a delimiter and return an iterator of tokens.
|
||||
/// Each token in the iterator will have its own span that corresponds to a slice of the original token's span.
|
||||
pub fn split(&self, delimiter: &'a str) -> impl Iterator<Item = Token<'a>> + '_ {
|
||||
let original_addr = self.value().as_ptr() as usize;
|
||||
self.value().split(delimiter).map(move |part| {
|
||||
let offset = part.as_ptr() as usize - original_addr;
|
||||
Token::new(self.span.slice(offset..offset + part.len()), Some(part.to_string()))
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> From<Span<'a>> for Token<'a> {
|
||||
@ -604,6 +614,8 @@ impl std::fmt::Display for Token<'_> {
|
||||
|
||||
#[cfg(test)]
|
||||
pub mod tests {
|
||||
use std::fmt::format;
|
||||
|
||||
use FilterCondition as Fc;
|
||||
|
||||
use super::*;
|
||||
@ -1043,4 +1055,103 @@ pub mod tests {
|
||||
let token: Token = s.into();
|
||||
assert_eq!(token.value(), s);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn split() {
|
||||
let s = "test string that should not be parsed\n newline";
|
||||
let token: Token = s.into();
|
||||
let parts: Vec<_> = token.split(" ").collect();
|
||||
insta::assert_snapshot!(format!("{parts:#?}"), @r#"
|
||||
[
|
||||
Token {
|
||||
span: LocatedSpan {
|
||||
offset: 0,
|
||||
line: 1,
|
||||
fragment: "test",
|
||||
extra: "test string that should not be parsed\n newline",
|
||||
},
|
||||
value: Some(
|
||||
"test",
|
||||
),
|
||||
},
|
||||
Token {
|
||||
span: LocatedSpan {
|
||||
offset: 5,
|
||||
line: 1,
|
||||
fragment: "string",
|
||||
extra: "test string that should not be parsed\n newline",
|
||||
},
|
||||
value: Some(
|
||||
"string",
|
||||
),
|
||||
},
|
||||
Token {
|
||||
span: LocatedSpan {
|
||||
offset: 12,
|
||||
line: 1,
|
||||
fragment: "that",
|
||||
extra: "test string that should not be parsed\n newline",
|
||||
},
|
||||
value: Some(
|
||||
"that",
|
||||
),
|
||||
},
|
||||
Token {
|
||||
span: LocatedSpan {
|
||||
offset: 17,
|
||||
line: 1,
|
||||
fragment: "should",
|
||||
extra: "test string that should not be parsed\n newline",
|
||||
},
|
||||
value: Some(
|
||||
"should",
|
||||
),
|
||||
},
|
||||
Token {
|
||||
span: LocatedSpan {
|
||||
offset: 24,
|
||||
line: 1,
|
||||
fragment: "not",
|
||||
extra: "test string that should not be parsed\n newline",
|
||||
},
|
||||
value: Some(
|
||||
"not",
|
||||
),
|
||||
},
|
||||
Token {
|
||||
span: LocatedSpan {
|
||||
offset: 28,
|
||||
line: 1,
|
||||
fragment: "be",
|
||||
extra: "test string that should not be parsed\n newline",
|
||||
},
|
||||
value: Some(
|
||||
"be",
|
||||
),
|
||||
},
|
||||
Token {
|
||||
span: LocatedSpan {
|
||||
offset: 31,
|
||||
line: 1,
|
||||
fragment: "parsed\n",
|
||||
extra: "test string that should not be parsed\n newline",
|
||||
},
|
||||
value: Some(
|
||||
"parsed\n",
|
||||
),
|
||||
},
|
||||
Token {
|
||||
span: LocatedSpan {
|
||||
offset: 39,
|
||||
line: 2,
|
||||
fragment: "newline",
|
||||
extra: "test string that should not be parsed\n newline",
|
||||
},
|
||||
value: Some(
|
||||
"newline",
|
||||
),
|
||||
},
|
||||
]
|
||||
"#);
|
||||
}
|
||||
}
|
||||
|
@ -736,10 +736,12 @@ async fn test_filterable_attributes_priority() {
|
||||
async fn test_vector_filter() {
|
||||
let index = crate::vector::shared_index_for_fragments().await;
|
||||
|
||||
let (value, _code) = index.search_post(json!({
|
||||
"filter": "_vectors EXISTS",
|
||||
"attributesToRetrieve": ["id"]
|
||||
})).await;
|
||||
let (value, _code) = index
|
||||
.search_post(json!({
|
||||
"filter": "_vectors EXISTS",
|
||||
"attributesToRetrieve": ["id"]
|
||||
}))
|
||||
.await;
|
||||
snapshot!(value, @r#"
|
||||
{
|
||||
"hits": [
|
||||
@ -764,42 +766,44 @@ async fn test_vector_filter() {
|
||||
}
|
||||
"#);
|
||||
|
||||
let (value, _code) = index.search_post(json!({
|
||||
"filter": "_vectors.other EXISTS",
|
||||
"attributesToRetrieve": ["id"]
|
||||
})).await;
|
||||
let (value, _code) = index
|
||||
.search_post(json!({
|
||||
"filter": "_vectors.other EXISTS",
|
||||
"attributesToRetrieve": ["id"]
|
||||
}))
|
||||
.await;
|
||||
snapshot!(value, @r#"
|
||||
{
|
||||
"hits": [],
|
||||
"query": "",
|
||||
"processingTimeMs": "[duration]",
|
||||
"limit": 20,
|
||||
"offset": 0,
|
||||
"estimatedTotalHits": 0
|
||||
"message": "Index `[uuid]`: The embedder `other` does not exist. Available embedders are: `rest`.\n10:15 _vectors.other EXISTS",
|
||||
"code": "invalid_search_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
|
||||
}
|
||||
"#);
|
||||
|
||||
// This one is counterintuitive, but it is the same as the previous one.
|
||||
// It's because userProvided is interpreted as an embedder name
|
||||
let (value, _code) = index.search_post(json!({
|
||||
"filter": "_vectors.userProvided EXISTS",
|
||||
"attributesToRetrieve": ["id"]
|
||||
})).await;
|
||||
let (value, _code) = index
|
||||
.search_post(json!({
|
||||
"filter": "_vectors.userProvided EXISTS",
|
||||
"attributesToRetrieve": ["id"]
|
||||
}))
|
||||
.await;
|
||||
snapshot!(value, @r#"
|
||||
{
|
||||
"hits": [],
|
||||
"query": "",
|
||||
"processingTimeMs": "[duration]",
|
||||
"limit": 20,
|
||||
"offset": 0,
|
||||
"estimatedTotalHits": 0
|
||||
"message": "Index `[uuid]`: The embedder `userProvided` does not exist. Available embedders are: `rest`.\n10:22 _vectors.userProvided EXISTS",
|
||||
"code": "invalid_search_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
|
||||
}
|
||||
"#);
|
||||
|
||||
let (value, _code) = index.search_post(json!({
|
||||
"filter": "_vectors.rest EXISTS",
|
||||
"attributesToRetrieve": ["id"]
|
||||
})).await;
|
||||
let (value, _code) = index
|
||||
.search_post(json!({
|
||||
"filter": "_vectors.rest EXISTS",
|
||||
"attributesToRetrieve": ["id"]
|
||||
}))
|
||||
.await;
|
||||
snapshot!(value, @r#"
|
||||
{
|
||||
"hits": [
|
||||
@ -824,10 +828,12 @@ async fn test_vector_filter() {
|
||||
}
|
||||
"#);
|
||||
|
||||
let (value, _code) = index.search_post(json!({
|
||||
"filter": "_vectors.rest.userProvided EXISTS",
|
||||
"attributesToRetrieve": ["id"]
|
||||
})).await;
|
||||
let (value, _code) = index
|
||||
.search_post(json!({
|
||||
"filter": "_vectors.rest.userProvided EXISTS",
|
||||
"attributesToRetrieve": ["id"]
|
||||
}))
|
||||
.await;
|
||||
snapshot!(value, @r#"
|
||||
{
|
||||
"hits": [
|
||||
@ -843,10 +849,12 @@ async fn test_vector_filter() {
|
||||
}
|
||||
"#);
|
||||
|
||||
let (value, _code) = index.search_post(json!({
|
||||
"filter": "_vectors.rest.fragments.withBreed EXISTS",
|
||||
"attributesToRetrieve": ["id"]
|
||||
})).await;
|
||||
let (value, _code) = index
|
||||
.search_post(json!({
|
||||
"filter": "_vectors.rest.fragments.withBreed EXISTS",
|
||||
"attributesToRetrieve": ["id"]
|
||||
}))
|
||||
.await;
|
||||
snapshot!(value, @r#"
|
||||
{
|
||||
"hits": [
|
||||
@ -865,10 +873,12 @@ async fn test_vector_filter() {
|
||||
}
|
||||
"#);
|
||||
|
||||
let (value, _code) = index.search_post(json!({
|
||||
"filter": "_vectors.rest.fragments.basic EXISTS",
|
||||
"attributesToRetrieve": ["id"]
|
||||
})).await;
|
||||
let (value, _code) = index
|
||||
.search_post(json!({
|
||||
"filter": "_vectors.rest.fragments.basic EXISTS",
|
||||
"attributesToRetrieve": ["id"]
|
||||
}))
|
||||
.await;
|
||||
snapshot!(value, @r#"
|
||||
{
|
||||
"hits": [
|
||||
@ -892,4 +902,19 @@ async fn test_vector_filter() {
|
||||
"estimatedTotalHits": 4
|
||||
}
|
||||
"#);
|
||||
|
||||
let (value, _code) = index
|
||||
.search_post(json!({
|
||||
"filter": "_vectors.rest.fragments.other EXISTS",
|
||||
"attributesToRetrieve": ["id"]
|
||||
}))
|
||||
.await;
|
||||
snapshot!(value, @r#"
|
||||
{
|
||||
"message": "Index `[uuid]`: The fragment `other` does not exist on embedder `rest`. Available fragments on this embedder are: `basic`, `withBreed`.\n25:30 _vectors.rest.fragments.other EXISTS",
|
||||
"code": "invalid_search_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
|
||||
}
|
||||
"#);
|
||||
}
|
||||
|
@ -241,7 +241,8 @@ impl<'a> Filter<'a> {
|
||||
|
||||
let attribute = fid.value();
|
||||
if matching_features(attribute, &filterable_attributes_rules)
|
||||
.is_some_and(|(_, features)| features.is_filterable()) || VectorFilter::matches(attribute)
|
||||
.is_some_and(|(_, features)| features.is_filterable())
|
||||
|| VectorFilter::matches(attribute)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
@ -548,11 +549,11 @@ impl<'a> Filter<'a> {
|
||||
let value = fid.value();
|
||||
if VectorFilter::matches(value) {
|
||||
if !matches!(op, Condition::Exists) {
|
||||
return Err(Error::UserError(UserError::InvalidFilter(
|
||||
String::from("Vector filter can only be used with the `exists` operator"),
|
||||
)));
|
||||
return Err(Error::UserError(UserError::InvalidFilter(String::from(
|
||||
"Vector filter can only be used with the `exists` operator",
|
||||
))));
|
||||
}
|
||||
let vector_filter = VectorFilter::parse(value)?;
|
||||
let vector_filter = VectorFilter::parse(fid)?;
|
||||
return vector_filter.evaluate(rtxn, index, universe);
|
||||
}
|
||||
|
||||
|
@ -1,16 +1,118 @@
|
||||
use filter_parser::Token;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::error::{Error, UserError};
|
||||
use crate::vector::{ArroyStats, ArroyWrapper};
|
||||
use crate::{Index, Result};
|
||||
use crate::Index;
|
||||
|
||||
pub(super) struct VectorFilter<'a> {
|
||||
embedder_name: Option<&'a str>,
|
||||
fragment_name: Option<&'a str>,
|
||||
embedder_token: Option<Token<'a>>,
|
||||
fragment_token: Option<Token<'a>>,
|
||||
user_provided: bool,
|
||||
// TODO: not_user_provided: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum VectorFilterError<'a> {
|
||||
EmptyFilter,
|
||||
InvalidPrefix(Token<'a>),
|
||||
MissingFragmentName(Token<'a>),
|
||||
UserProvidedWithFragment(Token<'a>),
|
||||
LeftoverToken(Token<'a>),
|
||||
EmbedderDoesNotExist {
|
||||
embedder: &'a Token<'a>,
|
||||
available: Vec<String>,
|
||||
},
|
||||
FragmentDoesNotExist {
|
||||
embedder: &'a Token<'a>,
|
||||
fragment: &'a Token<'a>,
|
||||
available: Vec<String>,
|
||||
},
|
||||
}
|
||||
|
||||
use VectorFilterError::*;
|
||||
|
||||
impl std::error::Error for VectorFilterError<'_> {}
|
||||
|
||||
impl std::fmt::Display for VectorFilterError<'_> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
EmptyFilter => {
|
||||
write!(f, "Vector filter cannot be empty.")
|
||||
}
|
||||
InvalidPrefix(prefix) => {
|
||||
write!(
|
||||
f,
|
||||
"Vector filter must start with `_vectors` but found `{}`.",
|
||||
prefix.value()
|
||||
)
|
||||
}
|
||||
MissingFragmentName(_token) => {
|
||||
write!(f, "Vector filter is inconsistent: either specify a fragment name or remove the `fragments` part.")
|
||||
}
|
||||
UserProvidedWithFragment(_token) => {
|
||||
write!(f, "Vector filter cannot specify both a fragment name and userProvided.")
|
||||
}
|
||||
LeftoverToken(token) => {
|
||||
write!(f, "Vector filter has leftover token: `{}`.", token.value())
|
||||
}
|
||||
EmbedderDoesNotExist { embedder, available } => {
|
||||
write!(f, "The embedder `{}` does not exist.", embedder.value())?;
|
||||
if available.is_empty() {
|
||||
write!(f, " This index does not have configured embedders.")
|
||||
} else {
|
||||
write!(f, " Available embedders are: ")?;
|
||||
let mut available = available.clone();
|
||||
available.sort_unstable();
|
||||
for (idx, embedder) in available.iter().enumerate() {
|
||||
write!(f, "`{embedder}`")?;
|
||||
if idx != available.len() - 1 {
|
||||
write!(f, ", ")?;
|
||||
}
|
||||
}
|
||||
write!(f, ".")
|
||||
}
|
||||
}
|
||||
FragmentDoesNotExist { embedder, fragment, available } => {
|
||||
write!(
|
||||
f,
|
||||
"The fragment `{}` does not exist on embedder `{}`.",
|
||||
fragment.value(),
|
||||
embedder.value(),
|
||||
)?;
|
||||
if available.is_empty() {
|
||||
write!(f, " This embedder does not have configured fragments.")
|
||||
} else {
|
||||
write!(f, " Available fragments on this embedder are: ")?;
|
||||
let mut available = available.clone();
|
||||
available.sort_unstable();
|
||||
for (idx, fragment) in available.iter().enumerate() {
|
||||
write!(f, "`{fragment}`")?;
|
||||
if idx != available.len() - 1 {
|
||||
write!(f, ", ")?;
|
||||
}
|
||||
}
|
||||
write!(f, ".")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> From<VectorFilterError<'a>> for Error {
|
||||
fn from(err: VectorFilterError<'a>) -> Self {
|
||||
match &err {
|
||||
EmptyFilter => Error::UserError(UserError::InvalidFilter(err.to_string())),
|
||||
InvalidPrefix(token)
|
||||
| MissingFragmentName(token)
|
||||
| UserProvidedWithFragment(token)
|
||||
| LeftoverToken(token) => token.clone().as_external_error(err).into(),
|
||||
EmbedderDoesNotExist { embedder: token, .. }
|
||||
| FragmentDoesNotExist { fragment: token, .. } => token.as_external_error(err).into(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> VectorFilter<'a> {
|
||||
pub(super) fn matches(value: &str) -> bool {
|
||||
value.starts_with("_vectors.") || value == "_vectors"
|
||||
@ -23,69 +125,72 @@ impl<'a> VectorFilter<'a> {
|
||||
/// - `_vectors.{embedder_name}`
|
||||
/// - `_vectors.{embedder_name}.userProvided`
|
||||
/// - `_vectors.{embedder_name}.fragments.{fragment_name}`
|
||||
pub(super) fn parse(s: &'a str) -> Result<Self> {
|
||||
let mut split = s.split('.').peekable();
|
||||
pub(super) fn parse(s: &'a Token<'a>) -> Result<Self, VectorFilterError<'a>> {
|
||||
let mut split = s.split(".").peekable();
|
||||
|
||||
if split.next() != Some("_vectors") {
|
||||
return Err(Error::UserError(UserError::InvalidFilter(String::from(
|
||||
"Vector filter must start with '_vectors'",
|
||||
))));
|
||||
match split.next() {
|
||||
Some(token) if token.value() == "_vectors" => (),
|
||||
Some(token) => return Err(InvalidPrefix(token)),
|
||||
None => return Err(EmptyFilter),
|
||||
}
|
||||
|
||||
let embedder_name = split.next();
|
||||
|
||||
let mut fragment_name = None;
|
||||
if split.peek() == Some(&"fragments") {
|
||||
split.next();
|
||||
if split.peek().map(|t| t.value()) == Some("fragments") {
|
||||
let token = split.next().expect("it was peeked before");
|
||||
|
||||
fragment_name = Some(split.next().ok_or_else(|| {
|
||||
Error::UserError(UserError::InvalidFilter(
|
||||
String::from("Vector filter is inconsistent: either specify a fragment name or remove the 'fragments' part"),
|
||||
))
|
||||
})?);
|
||||
fragment_name = Some(split.next().ok_or(MissingFragmentName(token))?);
|
||||
}
|
||||
|
||||
let mut user_provided = false;
|
||||
if split.peek() == Some(&"userProvided") || split.peek() == Some(&"user_provided") {
|
||||
split.next();
|
||||
user_provided = true;
|
||||
let mut user_provided_token = None;
|
||||
if split.peek().map(|t| t.value()) == Some("userProvided")
|
||||
|| split.peek().map(|t| t.value()) == Some("user_provided")
|
||||
{
|
||||
user_provided_token = split.next();
|
||||
}
|
||||
|
||||
if fragment_name.is_some() && user_provided {
|
||||
return Err(Error::UserError(UserError::InvalidFilter(
|
||||
String::from("Vector filter cannot specify both a fragment name and userProvided"),
|
||||
)));
|
||||
if let (Some(_), Some(user_provided_token)) = (&fragment_name, &user_provided_token) {
|
||||
return Err(UserProvidedWithFragment(user_provided_token.clone()))?;
|
||||
}
|
||||
|
||||
if let Some(next) = split.next() {
|
||||
return Err(Error::UserError(UserError::InvalidFilter(format!(
|
||||
"Unexpected part in vector filter: '{next}'"
|
||||
))));
|
||||
return Err(LeftoverToken(next))?;
|
||||
}
|
||||
|
||||
Ok(Self { embedder_name, fragment_name, user_provided })
|
||||
Ok(Self {
|
||||
embedder_token: embedder_name,
|
||||
fragment_token: fragment_name,
|
||||
user_provided: user_provided_token.is_some(),
|
||||
})
|
||||
}
|
||||
|
||||
pub(super) fn evaluate(
|
||||
&self,
|
||||
self,
|
||||
rtxn: &heed::RoTxn<'_>,
|
||||
index: &Index,
|
||||
universe: Option<&RoaringBitmap>,
|
||||
) -> Result<RoaringBitmap> {
|
||||
) -> crate::Result<RoaringBitmap> {
|
||||
let index_embedding_configs = index.embedding_configs();
|
||||
let embedding_configs = index_embedding_configs.embedding_configs(rtxn)?;
|
||||
|
||||
let mut embedders = Vec::new();
|
||||
if let Some(embedder_name) = self.embedder_name {
|
||||
if let Some(embedder_token) = &self.embedder_token {
|
||||
let embedder_name = embedder_token.value();
|
||||
let Some(embedder_config) =
|
||||
embedding_configs.iter().find(|config| config.name == embedder_name)
|
||||
else {
|
||||
return Ok(RoaringBitmap::new());
|
||||
return Err(EmbedderDoesNotExist {
|
||||
embedder: embedder_token,
|
||||
available: embedding_configs.iter().map(|c| c.name.clone()).collect(),
|
||||
})?;
|
||||
};
|
||||
let Some(embedder_info) =
|
||||
index_embedding_configs.embedder_info(rtxn, embedder_name)?
|
||||
let Some(embedder_info) = index_embedding_configs.embedder_info(rtxn, embedder_name)?
|
||||
else {
|
||||
return Ok(RoaringBitmap::new());
|
||||
return Err(EmbedderDoesNotExist {
|
||||
embedder: embedder_token,
|
||||
available: embedding_configs.iter().map(|c| c.name.clone()).collect(),
|
||||
})?;
|
||||
};
|
||||
|
||||
embedders.push((embedder_config, embedder_info));
|
||||
@ -108,14 +213,27 @@ impl<'a> VectorFilter<'a> {
|
||||
embedder_config.config.quantized(),
|
||||
);
|
||||
|
||||
let mut new_docids = if let Some(fragment_name) = self.fragment_name {
|
||||
let mut new_docids = if let Some(fragment_token) = &self.fragment_token {
|
||||
let fragment_name = fragment_token.value();
|
||||
let Some(fragment_config) = embedder_config
|
||||
.fragments
|
||||
.as_slice()
|
||||
.iter()
|
||||
.find(|fragment| fragment.name == fragment_name)
|
||||
else {
|
||||
return Ok(RoaringBitmap::new());
|
||||
return Err(FragmentDoesNotExist {
|
||||
embedder: self
|
||||
.embedder_token
|
||||
.as_ref()
|
||||
.expect("there can't be a fragment without an embedder"),
|
||||
fragment: fragment_token,
|
||||
available: embedder_config
|
||||
.fragments
|
||||
.as_slice()
|
||||
.iter()
|
||||
.map(|f| f.name.clone())
|
||||
.collect(),
|
||||
})?;
|
||||
};
|
||||
|
||||
arroy_wrapper.items_in_store(rtxn, fragment_config.id, |bitmap| bitmap.clone())?
|
||||
|
Reference in New Issue
Block a user