mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-07-27 08:41:00 +00:00
Improve error handling
This commit is contained in:
@ -60,7 +60,7 @@ use nom::combinator::{cut, eof, map, opt};
|
|||||||
use nom::multi::{many0, separated_list1};
|
use nom::multi::{many0, separated_list1};
|
||||||
use nom::number::complete::recognize_float;
|
use nom::number::complete::recognize_float;
|
||||||
use nom::sequence::{delimited, preceded, terminated, tuple};
|
use nom::sequence::{delimited, preceded, terminated, tuple};
|
||||||
use nom::Finish;
|
use nom::{Finish, Slice};
|
||||||
use nom_locate::LocatedSpan;
|
use nom_locate::LocatedSpan;
|
||||||
pub(crate) use value::parse_value;
|
pub(crate) use value::parse_value;
|
||||||
use value::word_exact;
|
use value::word_exact;
|
||||||
@ -121,6 +121,16 @@ impl<'a> Token<'a> {
|
|||||||
Err(Error::new_from_kind(self.span, ErrorKind::NonFiniteFloat))
|
Err(Error::new_from_kind(self.span, ErrorKind::NonFiniteFloat))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Split the token by a delimiter and return an iterator of tokens.
|
||||||
|
/// Each token in the iterator will have its own span that corresponds to a slice of the original token's span.
|
||||||
|
pub fn split(&self, delimiter: &'a str) -> impl Iterator<Item = Token<'a>> + '_ {
|
||||||
|
let original_addr = self.value().as_ptr() as usize;
|
||||||
|
self.value().split(delimiter).map(move |part| {
|
||||||
|
let offset = part.as_ptr() as usize - original_addr;
|
||||||
|
Token::new(self.span.slice(offset..offset + part.len()), Some(part.to_string()))
|
||||||
|
})
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> From<Span<'a>> for Token<'a> {
|
impl<'a> From<Span<'a>> for Token<'a> {
|
||||||
@ -604,6 +614,8 @@ impl std::fmt::Display for Token<'_> {
|
|||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
pub mod tests {
|
pub mod tests {
|
||||||
|
use std::fmt::format;
|
||||||
|
|
||||||
use FilterCondition as Fc;
|
use FilterCondition as Fc;
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
@ -1043,4 +1055,103 @@ pub mod tests {
|
|||||||
let token: Token = s.into();
|
let token: Token = s.into();
|
||||||
assert_eq!(token.value(), s);
|
assert_eq!(token.value(), s);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn split() {
|
||||||
|
let s = "test string that should not be parsed\n newline";
|
||||||
|
let token: Token = s.into();
|
||||||
|
let parts: Vec<_> = token.split(" ").collect();
|
||||||
|
insta::assert_snapshot!(format!("{parts:#?}"), @r#"
|
||||||
|
[
|
||||||
|
Token {
|
||||||
|
span: LocatedSpan {
|
||||||
|
offset: 0,
|
||||||
|
line: 1,
|
||||||
|
fragment: "test",
|
||||||
|
extra: "test string that should not be parsed\n newline",
|
||||||
|
},
|
||||||
|
value: Some(
|
||||||
|
"test",
|
||||||
|
),
|
||||||
|
},
|
||||||
|
Token {
|
||||||
|
span: LocatedSpan {
|
||||||
|
offset: 5,
|
||||||
|
line: 1,
|
||||||
|
fragment: "string",
|
||||||
|
extra: "test string that should not be parsed\n newline",
|
||||||
|
},
|
||||||
|
value: Some(
|
||||||
|
"string",
|
||||||
|
),
|
||||||
|
},
|
||||||
|
Token {
|
||||||
|
span: LocatedSpan {
|
||||||
|
offset: 12,
|
||||||
|
line: 1,
|
||||||
|
fragment: "that",
|
||||||
|
extra: "test string that should not be parsed\n newline",
|
||||||
|
},
|
||||||
|
value: Some(
|
||||||
|
"that",
|
||||||
|
),
|
||||||
|
},
|
||||||
|
Token {
|
||||||
|
span: LocatedSpan {
|
||||||
|
offset: 17,
|
||||||
|
line: 1,
|
||||||
|
fragment: "should",
|
||||||
|
extra: "test string that should not be parsed\n newline",
|
||||||
|
},
|
||||||
|
value: Some(
|
||||||
|
"should",
|
||||||
|
),
|
||||||
|
},
|
||||||
|
Token {
|
||||||
|
span: LocatedSpan {
|
||||||
|
offset: 24,
|
||||||
|
line: 1,
|
||||||
|
fragment: "not",
|
||||||
|
extra: "test string that should not be parsed\n newline",
|
||||||
|
},
|
||||||
|
value: Some(
|
||||||
|
"not",
|
||||||
|
),
|
||||||
|
},
|
||||||
|
Token {
|
||||||
|
span: LocatedSpan {
|
||||||
|
offset: 28,
|
||||||
|
line: 1,
|
||||||
|
fragment: "be",
|
||||||
|
extra: "test string that should not be parsed\n newline",
|
||||||
|
},
|
||||||
|
value: Some(
|
||||||
|
"be",
|
||||||
|
),
|
||||||
|
},
|
||||||
|
Token {
|
||||||
|
span: LocatedSpan {
|
||||||
|
offset: 31,
|
||||||
|
line: 1,
|
||||||
|
fragment: "parsed\n",
|
||||||
|
extra: "test string that should not be parsed\n newline",
|
||||||
|
},
|
||||||
|
value: Some(
|
||||||
|
"parsed\n",
|
||||||
|
),
|
||||||
|
},
|
||||||
|
Token {
|
||||||
|
span: LocatedSpan {
|
||||||
|
offset: 39,
|
||||||
|
line: 2,
|
||||||
|
fragment: "newline",
|
||||||
|
extra: "test string that should not be parsed\n newline",
|
||||||
|
},
|
||||||
|
value: Some(
|
||||||
|
"newline",
|
||||||
|
),
|
||||||
|
},
|
||||||
|
]
|
||||||
|
"#);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -736,10 +736,12 @@ async fn test_filterable_attributes_priority() {
|
|||||||
async fn test_vector_filter() {
|
async fn test_vector_filter() {
|
||||||
let index = crate::vector::shared_index_for_fragments().await;
|
let index = crate::vector::shared_index_for_fragments().await;
|
||||||
|
|
||||||
let (value, _code) = index.search_post(json!({
|
let (value, _code) = index
|
||||||
"filter": "_vectors EXISTS",
|
.search_post(json!({
|
||||||
"attributesToRetrieve": ["id"]
|
"filter": "_vectors EXISTS",
|
||||||
})).await;
|
"attributesToRetrieve": ["id"]
|
||||||
|
}))
|
||||||
|
.await;
|
||||||
snapshot!(value, @r#"
|
snapshot!(value, @r#"
|
||||||
{
|
{
|
||||||
"hits": [
|
"hits": [
|
||||||
@ -764,42 +766,44 @@ async fn test_vector_filter() {
|
|||||||
}
|
}
|
||||||
"#);
|
"#);
|
||||||
|
|
||||||
let (value, _code) = index.search_post(json!({
|
let (value, _code) = index
|
||||||
"filter": "_vectors.other EXISTS",
|
.search_post(json!({
|
||||||
"attributesToRetrieve": ["id"]
|
"filter": "_vectors.other EXISTS",
|
||||||
})).await;
|
"attributesToRetrieve": ["id"]
|
||||||
|
}))
|
||||||
|
.await;
|
||||||
snapshot!(value, @r#"
|
snapshot!(value, @r#"
|
||||||
{
|
{
|
||||||
"hits": [],
|
"message": "Index `[uuid]`: The embedder `other` does not exist. Available embedders are: `rest`.\n10:15 _vectors.other EXISTS",
|
||||||
"query": "",
|
"code": "invalid_search_filter",
|
||||||
"processingTimeMs": "[duration]",
|
"type": "invalid_request",
|
||||||
"limit": 20,
|
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
|
||||||
"offset": 0,
|
|
||||||
"estimatedTotalHits": 0
|
|
||||||
}
|
}
|
||||||
"#);
|
"#);
|
||||||
|
|
||||||
// This one is counterintuitive, but it is the same as the previous one.
|
// This one is counterintuitive, but it is the same as the previous one.
|
||||||
// It's because userProvided is interpreted as an embedder name
|
// It's because userProvided is interpreted as an embedder name
|
||||||
let (value, _code) = index.search_post(json!({
|
let (value, _code) = index
|
||||||
"filter": "_vectors.userProvided EXISTS",
|
.search_post(json!({
|
||||||
"attributesToRetrieve": ["id"]
|
"filter": "_vectors.userProvided EXISTS",
|
||||||
})).await;
|
"attributesToRetrieve": ["id"]
|
||||||
|
}))
|
||||||
|
.await;
|
||||||
snapshot!(value, @r#"
|
snapshot!(value, @r#"
|
||||||
{
|
{
|
||||||
"hits": [],
|
"message": "Index `[uuid]`: The embedder `userProvided` does not exist. Available embedders are: `rest`.\n10:22 _vectors.userProvided EXISTS",
|
||||||
"query": "",
|
"code": "invalid_search_filter",
|
||||||
"processingTimeMs": "[duration]",
|
"type": "invalid_request",
|
||||||
"limit": 20,
|
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
|
||||||
"offset": 0,
|
|
||||||
"estimatedTotalHits": 0
|
|
||||||
}
|
}
|
||||||
"#);
|
"#);
|
||||||
|
|
||||||
let (value, _code) = index.search_post(json!({
|
let (value, _code) = index
|
||||||
"filter": "_vectors.rest EXISTS",
|
.search_post(json!({
|
||||||
"attributesToRetrieve": ["id"]
|
"filter": "_vectors.rest EXISTS",
|
||||||
})).await;
|
"attributesToRetrieve": ["id"]
|
||||||
|
}))
|
||||||
|
.await;
|
||||||
snapshot!(value, @r#"
|
snapshot!(value, @r#"
|
||||||
{
|
{
|
||||||
"hits": [
|
"hits": [
|
||||||
@ -824,10 +828,12 @@ async fn test_vector_filter() {
|
|||||||
}
|
}
|
||||||
"#);
|
"#);
|
||||||
|
|
||||||
let (value, _code) = index.search_post(json!({
|
let (value, _code) = index
|
||||||
"filter": "_vectors.rest.userProvided EXISTS",
|
.search_post(json!({
|
||||||
"attributesToRetrieve": ["id"]
|
"filter": "_vectors.rest.userProvided EXISTS",
|
||||||
})).await;
|
"attributesToRetrieve": ["id"]
|
||||||
|
}))
|
||||||
|
.await;
|
||||||
snapshot!(value, @r#"
|
snapshot!(value, @r#"
|
||||||
{
|
{
|
||||||
"hits": [
|
"hits": [
|
||||||
@ -843,10 +849,12 @@ async fn test_vector_filter() {
|
|||||||
}
|
}
|
||||||
"#);
|
"#);
|
||||||
|
|
||||||
let (value, _code) = index.search_post(json!({
|
let (value, _code) = index
|
||||||
"filter": "_vectors.rest.fragments.withBreed EXISTS",
|
.search_post(json!({
|
||||||
"attributesToRetrieve": ["id"]
|
"filter": "_vectors.rest.fragments.withBreed EXISTS",
|
||||||
})).await;
|
"attributesToRetrieve": ["id"]
|
||||||
|
}))
|
||||||
|
.await;
|
||||||
snapshot!(value, @r#"
|
snapshot!(value, @r#"
|
||||||
{
|
{
|
||||||
"hits": [
|
"hits": [
|
||||||
@ -864,11 +872,13 @@ async fn test_vector_filter() {
|
|||||||
"estimatedTotalHits": 2
|
"estimatedTotalHits": 2
|
||||||
}
|
}
|
||||||
"#);
|
"#);
|
||||||
|
|
||||||
let (value, _code) = index.search_post(json!({
|
let (value, _code) = index
|
||||||
"filter": "_vectors.rest.fragments.basic EXISTS",
|
.search_post(json!({
|
||||||
"attributesToRetrieve": ["id"]
|
"filter": "_vectors.rest.fragments.basic EXISTS",
|
||||||
})).await;
|
"attributesToRetrieve": ["id"]
|
||||||
|
}))
|
||||||
|
.await;
|
||||||
snapshot!(value, @r#"
|
snapshot!(value, @r#"
|
||||||
{
|
{
|
||||||
"hits": [
|
"hits": [
|
||||||
@ -892,4 +902,19 @@ async fn test_vector_filter() {
|
|||||||
"estimatedTotalHits": 4
|
"estimatedTotalHits": 4
|
||||||
}
|
}
|
||||||
"#);
|
"#);
|
||||||
|
|
||||||
|
let (value, _code) = index
|
||||||
|
.search_post(json!({
|
||||||
|
"filter": "_vectors.rest.fragments.other EXISTS",
|
||||||
|
"attributesToRetrieve": ["id"]
|
||||||
|
}))
|
||||||
|
.await;
|
||||||
|
snapshot!(value, @r#"
|
||||||
|
{
|
||||||
|
"message": "Index `[uuid]`: The fragment `other` does not exist on embedder `rest`. Available fragments on this embedder are: `basic`, `withBreed`.\n25:30 _vectors.rest.fragments.other EXISTS",
|
||||||
|
"code": "invalid_search_filter",
|
||||||
|
"type": "invalid_request",
|
||||||
|
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
|
||||||
|
}
|
||||||
|
"#);
|
||||||
}
|
}
|
||||||
|
@ -241,7 +241,8 @@ impl<'a> Filter<'a> {
|
|||||||
|
|
||||||
let attribute = fid.value();
|
let attribute = fid.value();
|
||||||
if matching_features(attribute, &filterable_attributes_rules)
|
if matching_features(attribute, &filterable_attributes_rules)
|
||||||
.is_some_and(|(_, features)| features.is_filterable()) || VectorFilter::matches(attribute)
|
.is_some_and(|(_, features)| features.is_filterable())
|
||||||
|
|| VectorFilter::matches(attribute)
|
||||||
{
|
{
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@ -548,11 +549,11 @@ impl<'a> Filter<'a> {
|
|||||||
let value = fid.value();
|
let value = fid.value();
|
||||||
if VectorFilter::matches(value) {
|
if VectorFilter::matches(value) {
|
||||||
if !matches!(op, Condition::Exists) {
|
if !matches!(op, Condition::Exists) {
|
||||||
return Err(Error::UserError(UserError::InvalidFilter(
|
return Err(Error::UserError(UserError::InvalidFilter(String::from(
|
||||||
String::from("Vector filter can only be used with the `exists` operator"),
|
"Vector filter can only be used with the `exists` operator",
|
||||||
)));
|
))));
|
||||||
}
|
}
|
||||||
let vector_filter = VectorFilter::parse(value)?;
|
let vector_filter = VectorFilter::parse(fid)?;
|
||||||
return vector_filter.evaluate(rtxn, index, universe);
|
return vector_filter.evaluate(rtxn, index, universe);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,16 +1,118 @@
|
|||||||
|
use filter_parser::Token;
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use crate::error::{Error, UserError};
|
use crate::error::{Error, UserError};
|
||||||
use crate::vector::{ArroyStats, ArroyWrapper};
|
use crate::vector::{ArroyStats, ArroyWrapper};
|
||||||
use crate::{Index, Result};
|
use crate::Index;
|
||||||
|
|
||||||
pub(super) struct VectorFilter<'a> {
|
pub(super) struct VectorFilter<'a> {
|
||||||
embedder_name: Option<&'a str>,
|
embedder_token: Option<Token<'a>>,
|
||||||
fragment_name: Option<&'a str>,
|
fragment_token: Option<Token<'a>>,
|
||||||
user_provided: bool,
|
user_provided: bool,
|
||||||
// TODO: not_user_provided: bool,
|
// TODO: not_user_provided: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub enum VectorFilterError<'a> {
|
||||||
|
EmptyFilter,
|
||||||
|
InvalidPrefix(Token<'a>),
|
||||||
|
MissingFragmentName(Token<'a>),
|
||||||
|
UserProvidedWithFragment(Token<'a>),
|
||||||
|
LeftoverToken(Token<'a>),
|
||||||
|
EmbedderDoesNotExist {
|
||||||
|
embedder: &'a Token<'a>,
|
||||||
|
available: Vec<String>,
|
||||||
|
},
|
||||||
|
FragmentDoesNotExist {
|
||||||
|
embedder: &'a Token<'a>,
|
||||||
|
fragment: &'a Token<'a>,
|
||||||
|
available: Vec<String>,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
use VectorFilterError::*;
|
||||||
|
|
||||||
|
impl std::error::Error for VectorFilterError<'_> {}
|
||||||
|
|
||||||
|
impl std::fmt::Display for VectorFilterError<'_> {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
match self {
|
||||||
|
EmptyFilter => {
|
||||||
|
write!(f, "Vector filter cannot be empty.")
|
||||||
|
}
|
||||||
|
InvalidPrefix(prefix) => {
|
||||||
|
write!(
|
||||||
|
f,
|
||||||
|
"Vector filter must start with `_vectors` but found `{}`.",
|
||||||
|
prefix.value()
|
||||||
|
)
|
||||||
|
}
|
||||||
|
MissingFragmentName(_token) => {
|
||||||
|
write!(f, "Vector filter is inconsistent: either specify a fragment name or remove the `fragments` part.")
|
||||||
|
}
|
||||||
|
UserProvidedWithFragment(_token) => {
|
||||||
|
write!(f, "Vector filter cannot specify both a fragment name and userProvided.")
|
||||||
|
}
|
||||||
|
LeftoverToken(token) => {
|
||||||
|
write!(f, "Vector filter has leftover token: `{}`.", token.value())
|
||||||
|
}
|
||||||
|
EmbedderDoesNotExist { embedder, available } => {
|
||||||
|
write!(f, "The embedder `{}` does not exist.", embedder.value())?;
|
||||||
|
if available.is_empty() {
|
||||||
|
write!(f, " This index does not have configured embedders.")
|
||||||
|
} else {
|
||||||
|
write!(f, " Available embedders are: ")?;
|
||||||
|
let mut available = available.clone();
|
||||||
|
available.sort_unstable();
|
||||||
|
for (idx, embedder) in available.iter().enumerate() {
|
||||||
|
write!(f, "`{embedder}`")?;
|
||||||
|
if idx != available.len() - 1 {
|
||||||
|
write!(f, ", ")?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
write!(f, ".")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
FragmentDoesNotExist { embedder, fragment, available } => {
|
||||||
|
write!(
|
||||||
|
f,
|
||||||
|
"The fragment `{}` does not exist on embedder `{}`.",
|
||||||
|
fragment.value(),
|
||||||
|
embedder.value(),
|
||||||
|
)?;
|
||||||
|
if available.is_empty() {
|
||||||
|
write!(f, " This embedder does not have configured fragments.")
|
||||||
|
} else {
|
||||||
|
write!(f, " Available fragments on this embedder are: ")?;
|
||||||
|
let mut available = available.clone();
|
||||||
|
available.sort_unstable();
|
||||||
|
for (idx, fragment) in available.iter().enumerate() {
|
||||||
|
write!(f, "`{fragment}`")?;
|
||||||
|
if idx != available.len() - 1 {
|
||||||
|
write!(f, ", ")?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
write!(f, ".")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> From<VectorFilterError<'a>> for Error {
|
||||||
|
fn from(err: VectorFilterError<'a>) -> Self {
|
||||||
|
match &err {
|
||||||
|
EmptyFilter => Error::UserError(UserError::InvalidFilter(err.to_string())),
|
||||||
|
InvalidPrefix(token)
|
||||||
|
| MissingFragmentName(token)
|
||||||
|
| UserProvidedWithFragment(token)
|
||||||
|
| LeftoverToken(token) => token.clone().as_external_error(err).into(),
|
||||||
|
EmbedderDoesNotExist { embedder: token, .. }
|
||||||
|
| FragmentDoesNotExist { fragment: token, .. } => token.as_external_error(err).into(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl<'a> VectorFilter<'a> {
|
impl<'a> VectorFilter<'a> {
|
||||||
pub(super) fn matches(value: &str) -> bool {
|
pub(super) fn matches(value: &str) -> bool {
|
||||||
value.starts_with("_vectors.") || value == "_vectors"
|
value.starts_with("_vectors.") || value == "_vectors"
|
||||||
@ -23,71 +125,74 @@ impl<'a> VectorFilter<'a> {
|
|||||||
/// - `_vectors.{embedder_name}`
|
/// - `_vectors.{embedder_name}`
|
||||||
/// - `_vectors.{embedder_name}.userProvided`
|
/// - `_vectors.{embedder_name}.userProvided`
|
||||||
/// - `_vectors.{embedder_name}.fragments.{fragment_name}`
|
/// - `_vectors.{embedder_name}.fragments.{fragment_name}`
|
||||||
pub(super) fn parse(s: &'a str) -> Result<Self> {
|
pub(super) fn parse(s: &'a Token<'a>) -> Result<Self, VectorFilterError<'a>> {
|
||||||
let mut split = s.split('.').peekable();
|
let mut split = s.split(".").peekable();
|
||||||
|
|
||||||
if split.next() != Some("_vectors") {
|
match split.next() {
|
||||||
return Err(Error::UserError(UserError::InvalidFilter(String::from(
|
Some(token) if token.value() == "_vectors" => (),
|
||||||
"Vector filter must start with '_vectors'",
|
Some(token) => return Err(InvalidPrefix(token)),
|
||||||
))));
|
None => return Err(EmptyFilter),
|
||||||
}
|
}
|
||||||
|
|
||||||
let embedder_name = split.next();
|
let embedder_name = split.next();
|
||||||
|
|
||||||
let mut fragment_name = None;
|
let mut fragment_name = None;
|
||||||
if split.peek() == Some(&"fragments") {
|
if split.peek().map(|t| t.value()) == Some("fragments") {
|
||||||
split.next();
|
let token = split.next().expect("it was peeked before");
|
||||||
|
|
||||||
fragment_name = Some(split.next().ok_or_else(|| {
|
fragment_name = Some(split.next().ok_or(MissingFragmentName(token))?);
|
||||||
Error::UserError(UserError::InvalidFilter(
|
|
||||||
String::from("Vector filter is inconsistent: either specify a fragment name or remove the 'fragments' part"),
|
|
||||||
))
|
|
||||||
})?);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut user_provided = false;
|
let mut user_provided_token = None;
|
||||||
if split.peek() == Some(&"userProvided") || split.peek() == Some(&"user_provided") {
|
if split.peek().map(|t| t.value()) == Some("userProvided")
|
||||||
split.next();
|
|| split.peek().map(|t| t.value()) == Some("user_provided")
|
||||||
user_provided = true;
|
{
|
||||||
|
user_provided_token = split.next();
|
||||||
}
|
}
|
||||||
|
|
||||||
if fragment_name.is_some() && user_provided {
|
if let (Some(_), Some(user_provided_token)) = (&fragment_name, &user_provided_token) {
|
||||||
return Err(Error::UserError(UserError::InvalidFilter(
|
return Err(UserProvidedWithFragment(user_provided_token.clone()))?;
|
||||||
String::from("Vector filter cannot specify both a fragment name and userProvided"),
|
|
||||||
)));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if let Some(next) = split.next() {
|
if let Some(next) = split.next() {
|
||||||
return Err(Error::UserError(UserError::InvalidFilter(format!(
|
return Err(LeftoverToken(next))?;
|
||||||
"Unexpected part in vector filter: '{next}'"
|
|
||||||
))));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(Self { embedder_name, fragment_name, user_provided })
|
Ok(Self {
|
||||||
|
embedder_token: embedder_name,
|
||||||
|
fragment_token: fragment_name,
|
||||||
|
user_provided: user_provided_token.is_some(),
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(super) fn evaluate(
|
pub(super) fn evaluate(
|
||||||
&self,
|
self,
|
||||||
rtxn: &heed::RoTxn<'_>,
|
rtxn: &heed::RoTxn<'_>,
|
||||||
index: &Index,
|
index: &Index,
|
||||||
universe: Option<&RoaringBitmap>,
|
universe: Option<&RoaringBitmap>,
|
||||||
) -> Result<RoaringBitmap> {
|
) -> crate::Result<RoaringBitmap> {
|
||||||
let index_embedding_configs = index.embedding_configs();
|
let index_embedding_configs = index.embedding_configs();
|
||||||
let embedding_configs = index_embedding_configs.embedding_configs(rtxn)?;
|
let embedding_configs = index_embedding_configs.embedding_configs(rtxn)?;
|
||||||
|
|
||||||
let mut embedders = Vec::new();
|
let mut embedders = Vec::new();
|
||||||
if let Some(embedder_name) = self.embedder_name {
|
if let Some(embedder_token) = &self.embedder_token {
|
||||||
|
let embedder_name = embedder_token.value();
|
||||||
let Some(embedder_config) =
|
let Some(embedder_config) =
|
||||||
embedding_configs.iter().find(|config| config.name == embedder_name)
|
embedding_configs.iter().find(|config| config.name == embedder_name)
|
||||||
else {
|
else {
|
||||||
return Ok(RoaringBitmap::new());
|
return Err(EmbedderDoesNotExist {
|
||||||
|
embedder: embedder_token,
|
||||||
|
available: embedding_configs.iter().map(|c| c.name.clone()).collect(),
|
||||||
|
})?;
|
||||||
};
|
};
|
||||||
let Some(embedder_info) =
|
let Some(embedder_info) = index_embedding_configs.embedder_info(rtxn, embedder_name)?
|
||||||
index_embedding_configs.embedder_info(rtxn, embedder_name)?
|
|
||||||
else {
|
else {
|
||||||
return Ok(RoaringBitmap::new());
|
return Err(EmbedderDoesNotExist {
|
||||||
|
embedder: embedder_token,
|
||||||
|
available: embedding_configs.iter().map(|c| c.name.clone()).collect(),
|
||||||
|
})?;
|
||||||
};
|
};
|
||||||
|
|
||||||
embedders.push((embedder_config, embedder_info));
|
embedders.push((embedder_config, embedder_info));
|
||||||
} else {
|
} else {
|
||||||
for embedder_config in embedding_configs.iter() {
|
for embedder_config in embedding_configs.iter() {
|
||||||
@ -99,7 +204,7 @@ impl<'a> VectorFilter<'a> {
|
|||||||
embedders.push((embedder_config, embedder_info));
|
embedders.push((embedder_config, embedder_info));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
let mut docids = RoaringBitmap::new();
|
let mut docids = RoaringBitmap::new();
|
||||||
for (embedder_config, embedder_info) in embedders {
|
for (embedder_config, embedder_info) in embedders {
|
||||||
let arroy_wrapper = ArroyWrapper::new(
|
let arroy_wrapper = ArroyWrapper::new(
|
||||||
@ -108,14 +213,27 @@ impl<'a> VectorFilter<'a> {
|
|||||||
embedder_config.config.quantized(),
|
embedder_config.config.quantized(),
|
||||||
);
|
);
|
||||||
|
|
||||||
let mut new_docids = if let Some(fragment_name) = self.fragment_name {
|
let mut new_docids = if let Some(fragment_token) = &self.fragment_token {
|
||||||
|
let fragment_name = fragment_token.value();
|
||||||
let Some(fragment_config) = embedder_config
|
let Some(fragment_config) = embedder_config
|
||||||
.fragments
|
.fragments
|
||||||
.as_slice()
|
.as_slice()
|
||||||
.iter()
|
.iter()
|
||||||
.find(|fragment| fragment.name == fragment_name)
|
.find(|fragment| fragment.name == fragment_name)
|
||||||
else {
|
else {
|
||||||
return Ok(RoaringBitmap::new());
|
return Err(FragmentDoesNotExist {
|
||||||
|
embedder: self
|
||||||
|
.embedder_token
|
||||||
|
.as_ref()
|
||||||
|
.expect("there can't be a fragment without an embedder"),
|
||||||
|
fragment: fragment_token,
|
||||||
|
available: embedder_config
|
||||||
|
.fragments
|
||||||
|
.as_slice()
|
||||||
|
.iter()
|
||||||
|
.map(|f| f.name.clone())
|
||||||
|
.collect(),
|
||||||
|
})?;
|
||||||
};
|
};
|
||||||
|
|
||||||
arroy_wrapper.items_in_store(rtxn, fragment_config.id, |bitmap| bitmap.clone())?
|
arroy_wrapper.items_in_store(rtxn, fragment_config.id, |bitmap| bitmap.clone())?
|
||||||
|
Reference in New Issue
Block a user