Implement a documentTemplate filter

This commit is contained in:
Mubelotix
2025-07-09 18:03:32 +02:00
parent 8adf6141e0
commit 39f808714d
4 changed files with 115 additions and 9 deletions

View File

@ -989,6 +989,85 @@ async fn vector_filter_specific_fragment_user_provided() {
"#); "#);
} }
#[actix_rt::test]
async fn vector_filter_document_template_but_fragments_used() {
let index = crate::vector::shared_index_for_fragments().await;
let (value, _code) = index
.search_post(json!({
"filter": "_vectors.rest.documentTemplate EXISTS",
"attributesToRetrieve": ["name"]
}))
.await;
snapshot!(value, @r#"
{
"hits": [],
"query": "",
"processingTimeMs": "[duration]",
"limit": 20,
"offset": 0,
"estimatedTotalHits": 0
}
"#);
}
#[actix_rt::test]
async fn vector_filter_document_template() {
let (_mock, setting) = crate::vector::create_mock().await;
let server = crate::vector::get_server_vector().await;
let index = server.index("doggo");
let (response, code) = index
.update_settings(json!({
"embedders": {
"rest": setting,
},
}))
.await;
snapshot!(code, @"202 Accepted");
server.wait_task(response.uid()).await.succeeded();
let documents = json!([
{"id": 0, "name": "kefir"},
{"id": 1, "name": "echo", "_vectors": { "rest": [1, 1, 1] }},
{"id": 2, "name": "intel"},
{"id": 3, "name": "iko" }
]);
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
index.wait_task(value.uid()).await.succeeded();
let (value, _code) = index
.search_post(json!({
"filter": "_vectors.rest.documentTemplate EXISTS",
"attributesToRetrieve": ["name"]
}))
.await;
snapshot!(value, @r#"
{
"hits": [
{
"name": "kefir"
},
{
"name": "echo"
},
{
"name": "intel"
},
{
"name": "iko"
}
],
"query": "",
"processingTimeMs": "[duration]",
"limit": 20,
"offset": 0,
"estimatedTotalHits": 4
}
"#);
}
#[actix_rt::test] #[actix_rt::test]
async fn vector_filter_negation() { async fn vector_filter_negation() {
let index = crate::vector::shared_index_for_fragments().await; let index = crate::vector::shared_index_for_fragments().await;

View File

@ -14,9 +14,9 @@ use meilisearch::option::MaxThreads;
use crate::common::index::Index; use crate::common::index::Index;
use crate::common::{default_settings, GetAllDocumentsOptions, Server}; use crate::common::{default_settings, GetAllDocumentsOptions, Server};
use crate::json; use crate::json;
pub use fragments::shared_index_for_fragments; pub use {fragments::shared_index_for_fragments, rest::create_mock};
async fn get_server_vector() -> Server { pub async fn get_server_vector() -> Server {
Server::new().await Server::new().await
} }

View File

@ -12,7 +12,7 @@ use crate::common::Value;
use crate::json; use crate::json;
use crate::vector::{get_server_vector, GetAllDocumentsOptions}; use crate::vector::{get_server_vector, GetAllDocumentsOptions};
async fn create_mock() -> (&'static MockServer, Value) { pub async fn create_mock() -> (&'static MockServer, Value) {
let mock_server = Box::leak(Box::new(MockServer::start().await)); let mock_server = Box::leak(Box::new(MockServer::start().await));
let text_to_embedding: BTreeMap<_, _> = vec![ let text_to_embedding: BTreeMap<_, _> = vec![

View File

@ -8,6 +8,7 @@ use crate::Index;
pub(super) struct VectorFilter<'a> { pub(super) struct VectorFilter<'a> {
embedder_token: Option<Token<'a>>, embedder_token: Option<Token<'a>>,
fragment_token: Option<Token<'a>>, fragment_token: Option<Token<'a>>,
document_template: bool,
user_provided: bool, user_provided: bool,
} }
@ -17,6 +18,7 @@ pub enum VectorFilterError<'a> {
InvalidPrefix(Token<'a>), InvalidPrefix(Token<'a>),
MissingFragmentName(Token<'a>), MissingFragmentName(Token<'a>),
UserProvidedWithFragment(Token<'a>), UserProvidedWithFragment(Token<'a>),
DocumentTemplateWithFragment(Token<'a>),
LeftoverToken(Token<'a>), LeftoverToken(Token<'a>),
EmbedderDoesNotExist { EmbedderDoesNotExist {
embedder: &'a Token<'a>, embedder: &'a Token<'a>,
@ -52,6 +54,9 @@ impl std::fmt::Display for VectorFilterError<'_> {
UserProvidedWithFragment(_token) => { UserProvidedWithFragment(_token) => {
write!(f, "Vector filter cannot specify both a fragment name and userProvided.") write!(f, "Vector filter cannot specify both a fragment name and userProvided.")
} }
DocumentTemplateWithFragment(_token) => {
write!(f, "Vector filter cannot specify both a fragment name and documentTemplate.")
}
LeftoverToken(token) => { LeftoverToken(token) => {
write!(f, "Vector filter has leftover token: `{}`.", token.value()) write!(f, "Vector filter has leftover token: `{}`.", token.value())
} }
@ -105,6 +110,7 @@ impl<'a> From<VectorFilterError<'a>> for Error {
InvalidPrefix(token) InvalidPrefix(token)
| MissingFragmentName(token) | MissingFragmentName(token)
| UserProvidedWithFragment(token) | UserProvidedWithFragment(token)
| DocumentTemplateWithFragment(token)
| LeftoverToken(token) => token.clone().as_external_error(err).into(), | LeftoverToken(token) => token.clone().as_external_error(err).into(),
EmbedderDoesNotExist { embedder: token, .. } EmbedderDoesNotExist { embedder: token, .. }
| FragmentDoesNotExist { fragment: token, .. } => token.as_external_error(err).into(), | FragmentDoesNotExist { fragment: token, .. } => token.as_external_error(err).into(),
@ -123,6 +129,8 @@ impl<'a> VectorFilter<'a> {
/// - `_vectors` /// - `_vectors`
/// - `_vectors.{embedder_name}` /// - `_vectors.{embedder_name}`
/// - `_vectors.{embedder_name}.userProvided` /// - `_vectors.{embedder_name}.userProvided`
/// - `_vectors.{embedder_name}.documentTemplate`
/// - `_vectors.{embedder_name}.documentTemplate.userProvided`
/// - `_vectors.{embedder_name}.fragments.{fragment_name}` /// - `_vectors.{embedder_name}.fragments.{fragment_name}`
pub(super) fn parse(s: &'a Token<'a>) -> Result<Self, VectorFilterError<'a>> { pub(super) fn parse(s: &'a Token<'a>) -> Result<Self, VectorFilterError<'a>> {
let mut split = s.split(".").peekable(); let mut split = s.split(".").peekable();
@ -149,10 +157,22 @@ impl<'a> VectorFilter<'a> {
user_provided_token = split.next(); user_provided_token = split.next();
} }
let mut document_template_token = None;
if split.peek().map(|t| t.value()) == Some("documentTemplate")
|| split.peek().map(|t| t.value()) == Some("document_template")
{
document_template_token = split.next();
}
if let (Some(_), Some(user_provided_token)) = (&fragment_name, &user_provided_token) { if let (Some(_), Some(user_provided_token)) = (&fragment_name, &user_provided_token) {
return Err(UserProvidedWithFragment(user_provided_token.clone()))?; return Err(UserProvidedWithFragment(user_provided_token.clone()))?;
} }
if let (Some(_), Some(document_template_token)) = (&fragment_name, &document_template_token)
{
return Err(DocumentTemplateWithFragment(document_template_token.clone()))?;
}
if let Some(next) = split.next() { if let Some(next) = split.next() {
return Err(LeftoverToken(next))?; return Err(LeftoverToken(next))?;
} }
@ -161,6 +181,7 @@ impl<'a> VectorFilter<'a> {
embedder_token: embedder_name, embedder_token: embedder_name,
fragment_token: fragment_name, fragment_token: fragment_name,
user_provided: user_provided_token.is_some(), user_provided: user_provided_token.is_some(),
document_template: document_template_token.is_some(),
}) })
} }
@ -176,7 +197,8 @@ impl<'a> VectorFilter<'a> {
let mut embedders = Vec::new(); let mut embedders = Vec::new();
if let Some(embedder_token) = &self.embedder_token { if let Some(embedder_token) = &self.embedder_token {
let embedder_name = embedder_token.value(); let embedder_name = embedder_token.value();
let Some(embedder_config) =
let Some(embedding_config) =
embedding_configs.iter().find(|config| config.name == embedder_name) embedding_configs.iter().find(|config| config.name == embedder_name)
else { else {
return Err(EmbedderDoesNotExist { return Err(EmbedderDoesNotExist {
@ -184,6 +206,7 @@ impl<'a> VectorFilter<'a> {
available: embedding_configs.iter().map(|c| c.name.clone()).collect(), available: embedding_configs.iter().map(|c| c.name.clone()).collect(),
})?; })?;
}; };
let Some(embedder_info) = index_embedding_configs.embedder_info(rtxn, embedder_name)? let Some(embedder_info) = index_embedding_configs.embedder_info(rtxn, embedder_name)?
else { else {
return Err(EmbedderDoesNotExist { return Err(EmbedderDoesNotExist {
@ -192,7 +215,11 @@ impl<'a> VectorFilter<'a> {
})?; })?;
}; };
embedders.push((embedder_config, embedder_info)); if self.document_template && !embedding_config.fragments.as_slice().is_empty() {
return Ok(RoaringBitmap::new());
}
embedders.push((embedding_config, embedder_info));
} else { } else {
for embedder_config in embedding_configs.iter() { for embedder_config in embedding_configs.iter() {
let Some(embedder_info) = let Some(embedder_info) =
@ -205,16 +232,16 @@ impl<'a> VectorFilter<'a> {
}; };
let mut docids = RoaringBitmap::new(); let mut docids = RoaringBitmap::new();
for (embedder_config, embedder_info) in embedders { for (embedding_config, embedder_info) in embedders {
let arroy_wrapper = ArroyWrapper::new( let arroy_wrapper = ArroyWrapper::new(
index.vector_arroy, index.vector_arroy,
embedder_info.embedder_id, embedder_info.embedder_id,
embedder_config.config.quantized(), embedding_config.config.quantized(),
); );
docids |= if let Some(fragment_token) = &self.fragment_token { docids |= if let Some(fragment_token) = &self.fragment_token {
let fragment_name = fragment_token.value(); let fragment_name = fragment_token.value();
let Some(fragment_config) = embedder_config let Some(fragment_config) = embedding_config
.fragments .fragments
.as_slice() .as_slice()
.iter() .iter()
@ -226,7 +253,7 @@ impl<'a> VectorFilter<'a> {
.as_ref() .as_ref()
.expect("there can't be a fragment without an embedder"), .expect("there can't be a fragment without an embedder"),
fragment: fragment_token, fragment: fragment_token,
available: embedder_config available: embedding_config
.fragments .fragments
.as_slice() .as_slice()
.iter() .iter()