mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-07-27 08:41:00 +00:00
Implement core filter logic
This commit is contained in:
@ -1776,7 +1776,7 @@ impl Index {
|
||||
embedder_info.embedder_id,
|
||||
config.config.quantized(),
|
||||
);
|
||||
let embeddings = reader.item_vectors(rtxn, docid)?;
|
||||
let embeddings = reader.item_vectors(rtxn, docid)?; // MARKER
|
||||
res.insert(
|
||||
config.name.to_owned(),
|
||||
(embeddings, embedder_info.embedding_status.must_regenerate(docid)),
|
||||
|
@ -10,7 +10,7 @@ use memchr::memmem::Finder;
|
||||
use roaring::{MultiOps, RoaringBitmap};
|
||||
use serde_json::Value;
|
||||
|
||||
use super::facet_range_search;
|
||||
use super::{facet_range_search, filter_vector::VectorFilter};
|
||||
use crate::constants::RESERVED_GEO_FIELD_NAME;
|
||||
use crate::error::{Error, UserError};
|
||||
use crate::filterable_attributes_rules::{filtered_matching_patterns, matching_features};
|
||||
@ -234,8 +234,11 @@ impl<'a> Filter<'a> {
|
||||
pub fn evaluate(&self, rtxn: &heed::RoTxn<'_>, index: &Index) -> Result<RoaringBitmap> {
|
||||
// to avoid doing this for each recursive call we're going to do it ONCE ahead of time
|
||||
let fields_ids_map = index.fields_ids_map(rtxn)?;
|
||||
let filterable_attributes_rules = index.filterable_attributes_rules(rtxn)?;
|
||||
let filterable_attributes_rules = dbg!(index.filterable_attributes_rules(rtxn)?);
|
||||
|
||||
for fid in self.condition.fids(MAX_FILTER_DEPTH) {
|
||||
println!("{fid:?}");
|
||||
|
||||
let attribute = fid.value();
|
||||
if matching_features(attribute, &filterable_attributes_rules)
|
||||
.is_some_and(|(_, features)| features.is_filterable())
|
||||
@ -542,7 +545,13 @@ impl<'a> Filter<'a> {
|
||||
.union()
|
||||
}
|
||||
FilterCondition::Condition { fid, op } => {
|
||||
let Some(field_id) = field_ids_map.id(fid.value()) else {
|
||||
let value = fid.value();
|
||||
if VectorFilter::matches(value, op) {
|
||||
let vector_filter = VectorFilter::parse(value)?;
|
||||
return vector_filter.evaluate(rtxn, index, universe);
|
||||
}
|
||||
|
||||
let Some(field_id) = field_ids_map.id(value) else {
|
||||
return Ok(RoaringBitmap::new());
|
||||
};
|
||||
let Some((rule_index, features)) =
|
||||
|
123
crates/milli/src/search/facet/filter_vector.rs
Normal file
123
crates/milli/src/search/facet/filter_vector.rs
Normal file
@ -0,0 +1,123 @@
|
||||
use filter_parser::Condition;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::error::{Error, UserError};
|
||||
use crate::vector::{ArroyStats, ArroyWrapper};
|
||||
use crate::{Index, Result};
|
||||
|
||||
pub(super) struct VectorFilter<'a> {
|
||||
embedder_name: &'a str,
|
||||
fragment_name: Option<&'a str>,
|
||||
user_provided: bool,
|
||||
// TODO: not_user_provided: bool,
|
||||
}
|
||||
|
||||
impl<'a> VectorFilter<'a> {
|
||||
pub(super) fn matches(value: &str, op: &Condition) -> bool {
|
||||
matches!(op, Condition::Exists) && value.starts_with("_vectors.")
|
||||
}
|
||||
|
||||
/// Parses a vector filter string.
|
||||
///
|
||||
/// Valid formats:
|
||||
/// - `_vectors.{embedder_name}`
|
||||
/// - `_vectors.{embedder_name}.userProvided`
|
||||
/// - `_vectors.{embedder_name}.fragments.{fragment_name}`
|
||||
/// - `_vectors.{embedder_name}.fragments.{fragment_name}.userProvided`
|
||||
pub(super) fn parse(s: &'a str) -> Result<Self> {
|
||||
let mut split = s.split('.').peekable();
|
||||
|
||||
if split.next() != Some("_vectors") {
|
||||
return Err(Error::UserError(UserError::InvalidFilter(String::from(
|
||||
"Vector filter must start with '_vectors'",
|
||||
))));
|
||||
}
|
||||
|
||||
let embedder_name = split.next().ok_or_else(|| {
|
||||
Error::UserError(UserError::InvalidFilter(String::from(
|
||||
"Vector filter must contain an embedder name",
|
||||
)))
|
||||
})?;
|
||||
|
||||
let mut fragment_name = None;
|
||||
if split.peek() == Some(&"fragments") {
|
||||
split.next();
|
||||
|
||||
fragment_name = Some(split.next().ok_or_else(|| {
|
||||
Error::UserError(UserError::InvalidFilter(
|
||||
String::from("Vector filter is inconsistent: either specify a fragment name or remove the 'fragments' part"),
|
||||
))
|
||||
})?);
|
||||
}
|
||||
|
||||
let mut user_provided = false;
|
||||
if split.peek() == Some(&"userProvided") || split.peek() == Some(&"user_provided") {
|
||||
split.next();
|
||||
user_provided = true;
|
||||
}
|
||||
|
||||
if let Some(next) = split.next() {
|
||||
return Err(Error::UserError(UserError::InvalidFilter(format!(
|
||||
"Unexpected part in vector filter: '{next}'"
|
||||
))));
|
||||
}
|
||||
|
||||
Ok(Self { embedder_name, fragment_name, user_provided })
|
||||
}
|
||||
|
||||
pub(super) fn evaluate(
|
||||
&self,
|
||||
rtxn: &heed::RoTxn<'_>,
|
||||
index: &Index,
|
||||
universe: Option<&RoaringBitmap>,
|
||||
) -> Result<RoaringBitmap> {
|
||||
let index_embedding_configs = index.embedding_configs();
|
||||
let embedding_configs = index_embedding_configs.embedding_configs(rtxn)?;
|
||||
|
||||
let Some(embedder_config) =
|
||||
embedding_configs.iter().find(|config| config.name == self.embedder_name)
|
||||
else {
|
||||
return Ok(RoaringBitmap::new());
|
||||
};
|
||||
let Some(embedder_info) =
|
||||
index_embedding_configs.embedder_info(rtxn, self.embedder_name)?
|
||||
else {
|
||||
return Ok(RoaringBitmap::new());
|
||||
};
|
||||
|
||||
let arroy_wrapper = ArroyWrapper::new(
|
||||
index.vector_arroy,
|
||||
embedder_info.embedder_id,
|
||||
embedder_config.config.quantized(),
|
||||
);
|
||||
|
||||
let mut docids = if let Some(fragment_name) = self.fragment_name {
|
||||
let Some(fragment_config) = embedder_config
|
||||
.fragments
|
||||
.as_slice()
|
||||
.iter()
|
||||
.find(|fragment| fragment.name == fragment_name)
|
||||
else {
|
||||
return Ok(RoaringBitmap::new());
|
||||
};
|
||||
|
||||
arroy_wrapper.items_in_store(rtxn, fragment_config.id, |bitmap| bitmap.clone())?
|
||||
} else {
|
||||
let mut stats = ArroyStats::default();
|
||||
arroy_wrapper.aggregate_stats(rtxn, &mut stats)?;
|
||||
stats.documents
|
||||
};
|
||||
|
||||
// FIXME: performance
|
||||
if self.user_provided {
|
||||
let user_provided_docsids = embedder_info.embedding_status.user_provided_docids();
|
||||
docids &= user_provided_docsids;
|
||||
}
|
||||
|
||||
if let Some(universe) = universe {
|
||||
docids &= universe;
|
||||
}
|
||||
|
||||
Ok(docids)
|
||||
}
|
||||
}
|
@ -17,6 +17,7 @@ mod facet_range_search;
|
||||
mod facet_sort_ascending;
|
||||
mod facet_sort_descending;
|
||||
mod filter;
|
||||
mod filter_vector;
|
||||
mod search;
|
||||
|
||||
fn facet_extreme_value<'t>(
|
||||
|
@ -966,7 +966,7 @@ impl<'a, 'i> Transform<'a, 'i> {
|
||||
// some user provided, remove only the ids that are not user provided
|
||||
let to_delete = arroy.items_in_store(wtxn, *fragment_id, |items| {
|
||||
items - infos.embedding_status.user_provided_docids()
|
||||
})?;
|
||||
})?; // MARKER
|
||||
|
||||
for to_delete in to_delete {
|
||||
arroy.del_item_in_store(wtxn, to_delete, *fragment_id, dimensions)?;
|
||||
|
Reference in New Issue
Block a user