diff --git a/crates/meilisearch/tests/search/filters.rs b/crates/meilisearch/tests/search/filters.rs index 3b26ab4ee..d0f388220 100644 --- a/crates/meilisearch/tests/search/filters.rs +++ b/crates/meilisearch/tests/search/filters.rs @@ -989,6 +989,85 @@ async fn vector_filter_specific_fragment_user_provided() { "#); } +#[actix_rt::test] +async fn vector_filter_document_template_but_fragments_used() { + let index = crate::vector::shared_index_for_fragments().await; + + let (value, _code) = index + .search_post(json!({ + "filter": "_vectors.rest.documentTemplate EXISTS", + "attributesToRetrieve": ["name"] + })) + .await; + snapshot!(value, @r#" + { + "hits": [], + "query": "", + "processingTimeMs": "[duration]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 0 + } + "#); +} + +#[actix_rt::test] +async fn vector_filter_document_template() { + let (_mock, setting) = crate::vector::create_mock().await; + let server = crate::vector::get_server_vector().await; + let index = server.index("doggo"); + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "rest": setting, + }, + })) + .await; + snapshot!(code, @"202 Accepted"); + server.wait_task(response.uid()).await.succeeded(); + + let documents = json!([ + {"id": 0, "name": "kefir"}, + {"id": 1, "name": "echo", "_vectors": { "rest": [1, 1, 1] }}, + {"id": 2, "name": "intel"}, + {"id": 3, "name": "iko" } + ]); + let (value, code) = index.add_documents(documents, None).await; + snapshot!(code, @"202 Accepted"); + index.wait_task(value.uid()).await.succeeded(); + + let (value, _code) = index + .search_post(json!({ + "filter": "_vectors.rest.documentTemplate EXISTS", + "attributesToRetrieve": ["name"] + })) + .await; + snapshot!(value, @r#" + { + "hits": [ + { + "name": "kefir" + }, + { + "name": "echo" + }, + { + "name": "intel" + }, + { + "name": "iko" + } + ], + "query": "", + "processingTimeMs": "[duration]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 4 + } + "#); +} + #[actix_rt::test] async fn vector_filter_negation() { let index = crate::vector::shared_index_for_fragments().await; diff --git a/crates/meilisearch/tests/vector/mod.rs b/crates/meilisearch/tests/vector/mod.rs index 9ba37cae3..8851d029e 100644 --- a/crates/meilisearch/tests/vector/mod.rs +++ b/crates/meilisearch/tests/vector/mod.rs @@ -14,9 +14,9 @@ use meilisearch::option::MaxThreads; use crate::common::index::Index; use crate::common::{default_settings, GetAllDocumentsOptions, Server}; use crate::json; -pub use fragments::shared_index_for_fragments; +pub use {fragments::shared_index_for_fragments, rest::create_mock}; -async fn get_server_vector() -> Server { +pub async fn get_server_vector() -> Server { Server::new().await } diff --git a/crates/meilisearch/tests/vector/rest.rs b/crates/meilisearch/tests/vector/rest.rs index 974341cd0..dae9e9139 100644 --- a/crates/meilisearch/tests/vector/rest.rs +++ b/crates/meilisearch/tests/vector/rest.rs @@ -12,7 +12,7 @@ use crate::common::Value; use crate::json; use crate::vector::{get_server_vector, GetAllDocumentsOptions}; -async fn create_mock() -> (&'static MockServer, Value) { +pub async fn create_mock() -> (&'static MockServer, Value) { let mock_server = Box::leak(Box::new(MockServer::start().await)); let text_to_embedding: BTreeMap<_, _> = vec![ diff --git a/crates/milli/src/search/facet/filter_vector.rs b/crates/milli/src/search/facet/filter_vector.rs index 0b9cad702..e3ec698f5 100644 --- a/crates/milli/src/search/facet/filter_vector.rs +++ b/crates/milli/src/search/facet/filter_vector.rs @@ -8,6 +8,7 @@ use crate::Index; pub(super) struct VectorFilter<'a> { embedder_token: Option>, fragment_token: Option>, + document_template: bool, user_provided: bool, } @@ -17,6 +18,7 @@ pub enum VectorFilterError<'a> { InvalidPrefix(Token<'a>), MissingFragmentName(Token<'a>), UserProvidedWithFragment(Token<'a>), + DocumentTemplateWithFragment(Token<'a>), LeftoverToken(Token<'a>), EmbedderDoesNotExist { embedder: &'a Token<'a>, @@ -52,6 +54,9 @@ impl std::fmt::Display for VectorFilterError<'_> { UserProvidedWithFragment(_token) => { write!(f, "Vector filter cannot specify both a fragment name and userProvided.") } + DocumentTemplateWithFragment(_token) => { + write!(f, "Vector filter cannot specify both a fragment name and documentTemplate.") + } LeftoverToken(token) => { write!(f, "Vector filter has leftover token: `{}`.", token.value()) } @@ -105,6 +110,7 @@ impl<'a> From> for Error { InvalidPrefix(token) | MissingFragmentName(token) | UserProvidedWithFragment(token) + | DocumentTemplateWithFragment(token) | LeftoverToken(token) => token.clone().as_external_error(err).into(), EmbedderDoesNotExist { embedder: token, .. } | FragmentDoesNotExist { fragment: token, .. } => token.as_external_error(err).into(), @@ -123,6 +129,8 @@ impl<'a> VectorFilter<'a> { /// - `_vectors` /// - `_vectors.{embedder_name}` /// - `_vectors.{embedder_name}.userProvided` + /// - `_vectors.{embedder_name}.documentTemplate` + /// - `_vectors.{embedder_name}.documentTemplate.userProvided` /// - `_vectors.{embedder_name}.fragments.{fragment_name}` pub(super) fn parse(s: &'a Token<'a>) -> Result> { let mut split = s.split(".").peekable(); @@ -149,10 +157,22 @@ impl<'a> VectorFilter<'a> { user_provided_token = split.next(); } + let mut document_template_token = None; + if split.peek().map(|t| t.value()) == Some("documentTemplate") + || split.peek().map(|t| t.value()) == Some("document_template") + { + document_template_token = split.next(); + } + if let (Some(_), Some(user_provided_token)) = (&fragment_name, &user_provided_token) { return Err(UserProvidedWithFragment(user_provided_token.clone()))?; } + if let (Some(_), Some(document_template_token)) = (&fragment_name, &document_template_token) + { + return Err(DocumentTemplateWithFragment(document_template_token.clone()))?; + } + if let Some(next) = split.next() { return Err(LeftoverToken(next))?; } @@ -161,6 +181,7 @@ impl<'a> VectorFilter<'a> { embedder_token: embedder_name, fragment_token: fragment_name, user_provided: user_provided_token.is_some(), + document_template: document_template_token.is_some(), }) } @@ -176,7 +197,8 @@ impl<'a> VectorFilter<'a> { let mut embedders = Vec::new(); if let Some(embedder_token) = &self.embedder_token { let embedder_name = embedder_token.value(); - let Some(embedder_config) = + + let Some(embedding_config) = embedding_configs.iter().find(|config| config.name == embedder_name) else { return Err(EmbedderDoesNotExist { @@ -184,6 +206,7 @@ impl<'a> VectorFilter<'a> { available: embedding_configs.iter().map(|c| c.name.clone()).collect(), })?; }; + let Some(embedder_info) = index_embedding_configs.embedder_info(rtxn, embedder_name)? else { return Err(EmbedderDoesNotExist { @@ -192,7 +215,11 @@ impl<'a> VectorFilter<'a> { })?; }; - embedders.push((embedder_config, embedder_info)); + if self.document_template && !embedding_config.fragments.as_slice().is_empty() { + return Ok(RoaringBitmap::new()); + } + + embedders.push((embedding_config, embedder_info)); } else { for embedder_config in embedding_configs.iter() { let Some(embedder_info) = @@ -205,16 +232,16 @@ impl<'a> VectorFilter<'a> { }; let mut docids = RoaringBitmap::new(); - for (embedder_config, embedder_info) in embedders { + for (embedding_config, embedder_info) in embedders { let arroy_wrapper = ArroyWrapper::new( index.vector_arroy, embedder_info.embedder_id, - embedder_config.config.quantized(), + embedding_config.config.quantized(), ); docids |= if let Some(fragment_token) = &self.fragment_token { let fragment_name = fragment_token.value(); - let Some(fragment_config) = embedder_config + let Some(fragment_config) = embedding_config .fragments .as_slice() .iter() @@ -226,7 +253,7 @@ impl<'a> VectorFilter<'a> { .as_ref() .expect("there can't be a fragment without an embedder"), fragment: fragment_token, - available: embedder_config + available: embedding_config .fragments .as_slice() .iter()