mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-09-04 03:36:30 +00:00
Switch to a nom parser
This commit is contained in:
@ -10,8 +10,8 @@ use memchr::memmem::Finder;
|
||||
use roaring::{MultiOps, RoaringBitmap};
|
||||
use serde_json::Value;
|
||||
|
||||
use super::{facet_range_search, filter_vector::VectorFilter};
|
||||
use crate::constants::RESERVED_GEO_FIELD_NAME;
|
||||
use super::facet_range_search;
|
||||
use crate::constants::{RESERVED_GEO_FIELD_NAME, RESERVED_VECTORS_FIELD_NAME};
|
||||
use crate::error::{Error, UserError};
|
||||
use crate::filterable_attributes_rules::{filtered_matching_patterns, matching_features};
|
||||
use crate::heed_codec::facet::{
|
||||
@ -230,7 +230,7 @@ impl<'a> Filter<'a> {
|
||||
}
|
||||
|
||||
pub fn use_vector_filter(&self) -> Option<&Token> {
|
||||
self.condition.use_vector_filter()
|
||||
dbg!(self.condition.use_vector_filter())
|
||||
}
|
||||
}
|
||||
|
||||
@ -241,10 +241,10 @@ impl<'a> Filter<'a> {
|
||||
let filterable_attributes_rules = index.filterable_attributes_rules(rtxn)?;
|
||||
|
||||
for fid in self.condition.fids(MAX_FILTER_DEPTH) {
|
||||
let attribute = fid.value();
|
||||
let attribute = dbg!(fid.value());
|
||||
if matching_features(attribute, &filterable_attributes_rules)
|
||||
.is_some_and(|(_, features)| features.is_filterable())
|
||||
|| VectorFilter::matches(attribute)
|
||||
|| attribute == RESERVED_VECTORS_FIELD_NAME
|
||||
{
|
||||
continue;
|
||||
}
|
||||
@ -549,16 +549,6 @@ impl<'a> Filter<'a> {
|
||||
}
|
||||
FilterCondition::Condition { fid, op } => {
|
||||
let value = fid.value();
|
||||
if VectorFilter::matches(value) {
|
||||
if !matches!(op, Condition::Exists) {
|
||||
return Err(Error::UserError(UserError::InvalidFilter(String::from(
|
||||
"Vector filter can only be used with the `exists` operator",
|
||||
))));
|
||||
}
|
||||
let vector_filter = VectorFilter::parse(fid)?;
|
||||
return vector_filter.evaluate(rtxn, index, universe);
|
||||
}
|
||||
|
||||
let Some(field_id) = field_ids_map.id(value) else {
|
||||
return Ok(RoaringBitmap::new());
|
||||
};
|
||||
@ -616,6 +606,9 @@ impl<'a> Filter<'a> {
|
||||
Ok(RoaringBitmap::new())
|
||||
}
|
||||
}
|
||||
FilterCondition::VectorExists { fid: _, embedder, filter } => {
|
||||
super::filter_vector::evaluate(rtxn, index, universe, embedder.clone(), filter)
|
||||
}
|
||||
FilterCondition::GeoLowerThan { point, radius } => {
|
||||
if index.is_geo_filtering_enabled(rtxn)? {
|
||||
let base_point: [f64; 2] =
|
||||
|
@ -1,45 +1,13 @@
|
||||
use filter_parser::Token;
|
||||
use filter_parser::{Token, VectorFilter};
|
||||
use roaring::{MultiOps, RoaringBitmap};
|
||||
|
||||
use crate::error::{Error, UserError};
|
||||
use crate::error::Error;
|
||||
use crate::vector::db::IndexEmbeddingConfig;
|
||||
use crate::vector::{ArroyStats, ArroyWrapper};
|
||||
use crate::Index;
|
||||
|
||||
#[derive(Debug)]
|
||||
enum VectorFilterInner<'a> {
|
||||
Fragment(Token<'a>),
|
||||
DocumentTemplate,
|
||||
UserProvided,
|
||||
Regenerate,
|
||||
None,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub(super) struct VectorFilter<'a> {
|
||||
embedder: Option<Token<'a>>,
|
||||
inner: VectorFilterInner<'a>,
|
||||
}
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum VectorFilterError<'a> {
|
||||
#[error("Vector filter cannot be empty.")]
|
||||
EmptyFilter,
|
||||
|
||||
#[error("Vector filter must start with `_vectors` but found `{}`.", _0.value())]
|
||||
InvalidPrefix(Token<'a>),
|
||||
|
||||
#[error("Vector filter is inconsistent: either specify a fragment name or remove the `fragments` part.")]
|
||||
MissingFragmentName(Token<'a>),
|
||||
|
||||
#[error("Vector filter cannot have both {}.", {
|
||||
_0.iter().map(|t| format!("`{}`", t.value())).collect::<Vec<_>>().join(" and ")
|
||||
})]
|
||||
ExclusiveOptions(Vec<Token<'a>>),
|
||||
|
||||
#[error("Vector filter has leftover token: `{}`.", _0.value())]
|
||||
LeftoverToken(Token<'a>),
|
||||
|
||||
#[error("The embedder `{}` does not exist. {}", embedder.value(), {
|
||||
if available.is_empty() {
|
||||
String::from("This index does not have any configured embedders.")
|
||||
@ -72,201 +40,113 @@ use VectorFilterError::*;
|
||||
impl<'a> From<VectorFilterError<'a>> for Error {
|
||||
fn from(err: VectorFilterError<'a>) -> Self {
|
||||
match &err {
|
||||
EmptyFilter => Error::UserError(UserError::InvalidFilter(err.to_string())),
|
||||
InvalidPrefix(token) | MissingFragmentName(token) | LeftoverToken(token) => {
|
||||
token.clone().as_external_error(err).into()
|
||||
}
|
||||
ExclusiveOptions(tokens) => tokens
|
||||
.first()
|
||||
.cloned()
|
||||
.unwrap_or_else(|| Token::from("")) // Should never happen: tokens is never created empty
|
||||
.as_external_error(err)
|
||||
.into(),
|
||||
EmbedderDoesNotExist { embedder: token, .. }
|
||||
| FragmentDoesNotExist { fragment: token, .. } => token.as_external_error(err).into(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> VectorFilter<'a> {
|
||||
pub(super) fn matches(value: &str) -> bool {
|
||||
value.starts_with("_vectors.") || value == "_vectors"
|
||||
pub(super) fn evaluate(
|
||||
rtxn: &heed::RoTxn<'_>,
|
||||
index: &Index,
|
||||
universe: Option<&RoaringBitmap>,
|
||||
embedder: Option<Token<'_>>,
|
||||
filter: &VectorFilter<'_>,
|
||||
) -> crate::Result<RoaringBitmap> {
|
||||
let index_embedding_configs = index.embedding_configs();
|
||||
let embedding_configs = index_embedding_configs.embedding_configs(rtxn)?;
|
||||
|
||||
let embedders = match embedder {
|
||||
Some(embedder) => vec![embedder],
|
||||
None => embedding_configs.iter().map(|config| Token::from(config.name.as_str())).collect(),
|
||||
};
|
||||
|
||||
let mut docids = embedders
|
||||
.iter()
|
||||
.map(|e| evaluate_inner(rtxn, index, e, &embedding_configs, filter))
|
||||
.union()?;
|
||||
|
||||
if let Some(universe) = universe {
|
||||
docids &= universe;
|
||||
}
|
||||
|
||||
/// Parses a vector filter string.
|
||||
///
|
||||
/// Valid formats:
|
||||
/// - `_vectors`
|
||||
/// - `_vectors.{embedder_name}`
|
||||
/// - `_vectors.{embedder_name}.regenerate`
|
||||
/// - `_vectors.{embedder_name}.userProvided`
|
||||
/// - `_vectors.{embedder_name}.documentTemplate`
|
||||
/// - `_vectors.{embedder_name}.fragments.{fragment_name}`
|
||||
pub(super) fn parse(s: &'a Token<'a>) -> Result<Self, VectorFilterError<'a>> {
|
||||
let mut split = s.split(".").peekable();
|
||||
|
||||
match split.next() {
|
||||
Some(token) if token.value() == "_vectors" => (),
|
||||
Some(token) => return Err(InvalidPrefix(token)),
|
||||
None => return Err(EmptyFilter),
|
||||
}
|
||||
|
||||
let embedder_name = split.next();
|
||||
|
||||
let mut fragment_tokens = None;
|
||||
if split.peek().map(|t| t.value()) == Some("fragments") {
|
||||
let token = split.next().expect("it was peeked before");
|
||||
let name = split.next().ok_or_else(|| MissingFragmentName(token.clone()))?;
|
||||
|
||||
fragment_tokens = Some((token, name));
|
||||
}
|
||||
|
||||
let mut remaining_tokens = split.collect::<Vec<_>>();
|
||||
|
||||
let mut user_provided_token = None;
|
||||
if let Some(position) = remaining_tokens.iter().position(|t| t.value() == "userProvided") {
|
||||
user_provided_token = Some(remaining_tokens.remove(position));
|
||||
}
|
||||
|
||||
let mut document_template_token = None;
|
||||
if let Some(position) =
|
||||
remaining_tokens.iter().position(|t| t.value() == "documentTemplate")
|
||||
{
|
||||
document_template_token = Some(remaining_tokens.remove(position));
|
||||
}
|
||||
|
||||
let mut regenerate_token = None;
|
||||
if let Some(position) = remaining_tokens.iter().position(|t| t.value() == "regenerate") {
|
||||
regenerate_token = Some(remaining_tokens.remove(position));
|
||||
}
|
||||
|
||||
if !remaining_tokens.is_empty() {
|
||||
return Err(LeftoverToken(remaining_tokens.remove(0)));
|
||||
}
|
||||
|
||||
let inner =
|
||||
match (fragment_tokens, user_provided_token, document_template_token, regenerate_token)
|
||||
{
|
||||
(Some((_token, name)), None, None, None) => VectorFilterInner::Fragment(name),
|
||||
(None, Some(_), None, None) => VectorFilterInner::UserProvided,
|
||||
(None, None, Some(_), None) => VectorFilterInner::DocumentTemplate,
|
||||
(None, None, None, Some(_)) => VectorFilterInner::Regenerate,
|
||||
(None, None, None, None) => VectorFilterInner::None,
|
||||
(a, b, c, d) => {
|
||||
let a = a.map(|(token, _)| token);
|
||||
let present = [a, b, c, d].into_iter().flatten().collect();
|
||||
return Err(ExclusiveOptions(present));
|
||||
}
|
||||
};
|
||||
|
||||
Ok(Self { inner, embedder: embedder_name })
|
||||
}
|
||||
|
||||
pub(super) fn evaluate(
|
||||
self,
|
||||
rtxn: &heed::RoTxn<'_>,
|
||||
index: &Index,
|
||||
universe: Option<&RoaringBitmap>,
|
||||
) -> crate::Result<RoaringBitmap> {
|
||||
let index_embedding_configs = index.embedding_configs();
|
||||
let embedding_configs = index_embedding_configs.embedding_configs(rtxn)?;
|
||||
|
||||
let embedders = match self.embedder {
|
||||
Some(embedder) => vec![embedder],
|
||||
None => {
|
||||
embedding_configs.iter().map(|config| Token::from(config.name.as_str())).collect()
|
||||
}
|
||||
};
|
||||
|
||||
let mut docids = embedders
|
||||
.iter()
|
||||
.map(|e| self.inner.evaluate(rtxn, index, e, &embedding_configs))
|
||||
.union()?;
|
||||
|
||||
if let Some(universe) = universe {
|
||||
docids &= universe;
|
||||
}
|
||||
|
||||
Ok(docids)
|
||||
}
|
||||
Ok(docids)
|
||||
}
|
||||
|
||||
impl VectorFilterInner<'_> {
|
||||
fn evaluate(
|
||||
&self,
|
||||
rtxn: &heed::RoTxn<'_>,
|
||||
index: &Index,
|
||||
embedder: &Token<'_>,
|
||||
embedding_configs: &[IndexEmbeddingConfig],
|
||||
) -> crate::Result<RoaringBitmap> {
|
||||
let embedder_name = embedder.value();
|
||||
let available_embedders =
|
||||
|| embedding_configs.iter().map(|c| c.name.clone()).collect::<Vec<_>>();
|
||||
fn evaluate_inner(
|
||||
rtxn: &heed::RoTxn<'_>,
|
||||
index: &Index,
|
||||
embedder: &Token<'_>,
|
||||
embedding_configs: &[IndexEmbeddingConfig],
|
||||
filter: &VectorFilter<'_>,
|
||||
) -> crate::Result<RoaringBitmap> {
|
||||
let embedder_name = embedder.value();
|
||||
let available_embedders =
|
||||
|| embedding_configs.iter().map(|c| c.name.clone()).collect::<Vec<_>>();
|
||||
|
||||
let embedding_config = embedding_configs
|
||||
.iter()
|
||||
.find(|config| config.name == embedder_name)
|
||||
.ok_or_else(|| EmbedderDoesNotExist { embedder, available: available_embedders() })?;
|
||||
let embedding_config = embedding_configs
|
||||
.iter()
|
||||
.find(|config| config.name == embedder_name)
|
||||
.ok_or_else(|| EmbedderDoesNotExist { embedder, available: available_embedders() })?;
|
||||
|
||||
let embedder_info = index
|
||||
.embedding_configs()
|
||||
.embedder_info(rtxn, embedder_name)?
|
||||
.ok_or_else(|| EmbedderDoesNotExist { embedder, available: available_embedders() })?;
|
||||
let embedder_info = index
|
||||
.embedding_configs()
|
||||
.embedder_info(rtxn, embedder_name)?
|
||||
.ok_or_else(|| EmbedderDoesNotExist { embedder, available: available_embedders() })?;
|
||||
|
||||
let arroy_wrapper = ArroyWrapper::new(
|
||||
index.vector_arroy,
|
||||
embedder_info.embedder_id,
|
||||
embedding_config.config.quantized(),
|
||||
);
|
||||
let arroy_wrapper = ArroyWrapper::new(
|
||||
index.vector_arroy,
|
||||
embedder_info.embedder_id,
|
||||
embedding_config.config.quantized(),
|
||||
);
|
||||
|
||||
let docids = match self {
|
||||
VectorFilterInner::Fragment(fragment) => {
|
||||
let fragment_name = fragment.value();
|
||||
let fragment_config = embedding_config
|
||||
.fragments
|
||||
.as_slice()
|
||||
.iter()
|
||||
.find(|fragment| fragment.name == fragment_name)
|
||||
.ok_or_else(|| FragmentDoesNotExist {
|
||||
embedder,
|
||||
fragment,
|
||||
available: embedding_config
|
||||
.fragments
|
||||
.as_slice()
|
||||
.iter()
|
||||
.map(|f| f.name.clone())
|
||||
.collect(),
|
||||
})?;
|
||||
let docids = match filter {
|
||||
VectorFilter::Fragment(fragment) => {
|
||||
let fragment_name = fragment.value();
|
||||
let fragment_config = embedding_config
|
||||
.fragments
|
||||
.as_slice()
|
||||
.iter()
|
||||
.find(|fragment| fragment.name == fragment_name)
|
||||
.ok_or_else(|| FragmentDoesNotExist {
|
||||
embedder,
|
||||
fragment,
|
||||
available: embedding_config
|
||||
.fragments
|
||||
.as_slice()
|
||||
.iter()
|
||||
.map(|f| f.name.clone())
|
||||
.collect(),
|
||||
})?;
|
||||
|
||||
arroy_wrapper.items_in_store(rtxn, fragment_config.id, |bitmap| bitmap.clone())?
|
||||
arroy_wrapper.items_in_store(rtxn, fragment_config.id, |bitmap| bitmap.clone())?
|
||||
}
|
||||
VectorFilter::DocumentTemplate => {
|
||||
if !embedding_config.fragments.as_slice().is_empty() {
|
||||
return Ok(RoaringBitmap::new());
|
||||
}
|
||||
VectorFilterInner::DocumentTemplate => {
|
||||
if !embedding_config.fragments.as_slice().is_empty() {
|
||||
return Ok(RoaringBitmap::new());
|
||||
}
|
||||
|
||||
let user_provided_docsids = embedder_info.embedding_status.user_provided_docids();
|
||||
let mut stats = ArroyStats::default();
|
||||
arroy_wrapper.aggregate_stats(rtxn, &mut stats)?;
|
||||
stats.documents - user_provided_docsids.clone()
|
||||
}
|
||||
VectorFilterInner::UserProvided => {
|
||||
let user_provided_docsids = embedder_info.embedding_status.user_provided_docids();
|
||||
user_provided_docsids.clone()
|
||||
}
|
||||
VectorFilterInner::Regenerate => {
|
||||
let mut stats = ArroyStats::default();
|
||||
arroy_wrapper.aggregate_stats(rtxn, &mut stats)?;
|
||||
let skip_regenerate = embedder_info.embedding_status.skip_regenerate_docids();
|
||||
stats.documents - skip_regenerate
|
||||
}
|
||||
VectorFilterInner::None => {
|
||||
let mut stats = ArroyStats::default();
|
||||
arroy_wrapper.aggregate_stats(rtxn, &mut stats)?;
|
||||
stats.documents
|
||||
}
|
||||
};
|
||||
let user_provided_docsids = embedder_info.embedding_status.user_provided_docids();
|
||||
let mut stats = ArroyStats::default();
|
||||
arroy_wrapper.aggregate_stats(rtxn, &mut stats)?;
|
||||
stats.documents - user_provided_docsids.clone()
|
||||
}
|
||||
VectorFilter::UserProvided => {
|
||||
let user_provided_docsids = embedder_info.embedding_status.user_provided_docids();
|
||||
user_provided_docsids.clone()
|
||||
}
|
||||
VectorFilter::Regenerate => {
|
||||
let mut stats = ArroyStats::default();
|
||||
arroy_wrapper.aggregate_stats(rtxn, &mut stats)?;
|
||||
let skip_regenerate = embedder_info.embedding_status.skip_regenerate_docids();
|
||||
stats.documents - skip_regenerate
|
||||
}
|
||||
VectorFilter::None => {
|
||||
let mut stats = ArroyStats::default();
|
||||
arroy_wrapper.aggregate_stats(rtxn, &mut stats)?;
|
||||
stats.documents
|
||||
}
|
||||
};
|
||||
|
||||
Ok(docids)
|
||||
}
|
||||
Ok(docids)
|
||||
}
|
||||
|
Reference in New Issue
Block a user