mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-07-27 08:41:00 +00:00
Support not specifying an embedder in the vector filter
This commit is contained in:
@ -6,7 +6,7 @@ use crate::vector::{ArroyStats, ArroyWrapper};
|
|||||||
use crate::{Index, Result};
|
use crate::{Index, Result};
|
||||||
|
|
||||||
pub(super) struct VectorFilter<'a> {
|
pub(super) struct VectorFilter<'a> {
|
||||||
embedder_name: &'a str,
|
embedder_name: Option<&'a str>,
|
||||||
fragment_name: Option<&'a str>,
|
fragment_name: Option<&'a str>,
|
||||||
user_provided: bool,
|
user_provided: bool,
|
||||||
// TODO: not_user_provided: bool,
|
// TODO: not_user_provided: bool,
|
||||||
@ -14,12 +14,14 @@ pub(super) struct VectorFilter<'a> {
|
|||||||
|
|
||||||
impl<'a> VectorFilter<'a> {
|
impl<'a> VectorFilter<'a> {
|
||||||
pub(super) fn matches(value: &str, op: &Condition) -> bool {
|
pub(super) fn matches(value: &str, op: &Condition) -> bool {
|
||||||
matches!(op, Condition::Exists) && value.starts_with("_vectors.")
|
matches!(op, Condition::Exists) && (value.starts_with("_vectors.") || value == "_vectors")
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Parses a vector filter string.
|
/// Parses a vector filter string.
|
||||||
///
|
///
|
||||||
/// Valid formats:
|
/// Valid formats:
|
||||||
|
/// - `_vectors`
|
||||||
|
/// - `_vectors.userProvided`
|
||||||
/// - `_vectors.{embedder_name}`
|
/// - `_vectors.{embedder_name}`
|
||||||
/// - `_vectors.{embedder_name}.userProvided`
|
/// - `_vectors.{embedder_name}.userProvided`
|
||||||
/// - `_vectors.{embedder_name}.fragments.{fragment_name}`
|
/// - `_vectors.{embedder_name}.fragments.{fragment_name}`
|
||||||
@ -33,11 +35,7 @@ impl<'a> VectorFilter<'a> {
|
|||||||
))));
|
))));
|
||||||
}
|
}
|
||||||
|
|
||||||
let embedder_name = split.next().ok_or_else(|| {
|
let embedder_name = split.next();
|
||||||
Error::UserError(UserError::InvalidFilter(String::from(
|
|
||||||
"Vector filter must contain an embedder name",
|
|
||||||
)))
|
|
||||||
})?;
|
|
||||||
|
|
||||||
let mut fragment_name = None;
|
let mut fragment_name = None;
|
||||||
if split.peek() == Some(&"fragments") {
|
if split.peek() == Some(&"fragments") {
|
||||||
@ -74,44 +72,63 @@ impl<'a> VectorFilter<'a> {
|
|||||||
let index_embedding_configs = index.embedding_configs();
|
let index_embedding_configs = index.embedding_configs();
|
||||||
let embedding_configs = index_embedding_configs.embedding_configs(rtxn)?;
|
let embedding_configs = index_embedding_configs.embedding_configs(rtxn)?;
|
||||||
|
|
||||||
let Some(embedder_config) =
|
let mut embedders = Vec::new();
|
||||||
embedding_configs.iter().find(|config| config.name == self.embedder_name)
|
if let Some(embedder_name) = self.embedder_name {
|
||||||
else {
|
let Some(embedder_config) =
|
||||||
return Ok(RoaringBitmap::new());
|
embedding_configs.iter().find(|config| config.name == embedder_name)
|
||||||
};
|
|
||||||
let Some(embedder_info) =
|
|
||||||
index_embedding_configs.embedder_info(rtxn, self.embedder_name)?
|
|
||||||
else {
|
|
||||||
return Ok(RoaringBitmap::new());
|
|
||||||
};
|
|
||||||
|
|
||||||
let arroy_wrapper = ArroyWrapper::new(
|
|
||||||
index.vector_arroy,
|
|
||||||
embedder_info.embedder_id,
|
|
||||||
embedder_config.config.quantized(),
|
|
||||||
);
|
|
||||||
|
|
||||||
let mut docids = if let Some(fragment_name) = self.fragment_name {
|
|
||||||
let Some(fragment_config) = embedder_config
|
|
||||||
.fragments
|
|
||||||
.as_slice()
|
|
||||||
.iter()
|
|
||||||
.find(|fragment| fragment.name == fragment_name)
|
|
||||||
else {
|
else {
|
||||||
return Ok(RoaringBitmap::new());
|
return Ok(RoaringBitmap::new());
|
||||||
};
|
};
|
||||||
|
let Some(embedder_info) =
|
||||||
arroy_wrapper.items_in_store(rtxn, fragment_config.id, |bitmap| bitmap.clone())?
|
index_embedding_configs.embedder_info(rtxn, embedder_name)?
|
||||||
|
else {
|
||||||
|
return Ok(RoaringBitmap::new());
|
||||||
|
};
|
||||||
|
|
||||||
|
embedders.push((embedder_config, embedder_info));
|
||||||
} else {
|
} else {
|
||||||
let mut stats = ArroyStats::default();
|
for embedder_config in embedding_configs.iter() {
|
||||||
arroy_wrapper.aggregate_stats(rtxn, &mut stats)?;
|
let Some(embedder_info) =
|
||||||
stats.documents
|
index_embedding_configs.embedder_info(rtxn, &embedder_config.name)?
|
||||||
|
else {
|
||||||
|
continue;
|
||||||
|
};
|
||||||
|
embedders.push((embedder_config, embedder_info));
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
let mut docids = RoaringBitmap::new();
|
||||||
|
for (embedder_config, embedder_info) in embedders {
|
||||||
|
let arroy_wrapper = ArroyWrapper::new(
|
||||||
|
index.vector_arroy,
|
||||||
|
embedder_info.embedder_id,
|
||||||
|
embedder_config.config.quantized(),
|
||||||
|
);
|
||||||
|
|
||||||
// FIXME: performance
|
let mut new_docids = if let Some(fragment_name) = self.fragment_name {
|
||||||
if self.user_provided {
|
let Some(fragment_config) = embedder_config
|
||||||
let user_provided_docsids = embedder_info.embedding_status.user_provided_docids();
|
.fragments
|
||||||
docids &= user_provided_docsids;
|
.as_slice()
|
||||||
|
.iter()
|
||||||
|
.find(|fragment| fragment.name == fragment_name)
|
||||||
|
else {
|
||||||
|
return Ok(RoaringBitmap::new());
|
||||||
|
};
|
||||||
|
|
||||||
|
arroy_wrapper.items_in_store(rtxn, fragment_config.id, |bitmap| bitmap.clone())?
|
||||||
|
} else {
|
||||||
|
let mut stats = ArroyStats::default();
|
||||||
|
arroy_wrapper.aggregate_stats(rtxn, &mut stats)?;
|
||||||
|
stats.documents
|
||||||
|
};
|
||||||
|
|
||||||
|
// FIXME: performance
|
||||||
|
if self.user_provided {
|
||||||
|
let user_provided_docsids = embedder_info.embedding_status.user_provided_docids();
|
||||||
|
new_docids &= user_provided_docsids;
|
||||||
|
}
|
||||||
|
|
||||||
|
docids |= new_docids;
|
||||||
}
|
}
|
||||||
|
|
||||||
if let Some(universe) = universe {
|
if let Some(universe) = universe {
|
||||||
|
Reference in New Issue
Block a user