From a376525348d3d046df6c8d32ecc060f9196b6e1e Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Mon, 17 Nov 2025 13:18:58 +0100 Subject: [PATCH 1/3] Do not skip embedding request for the document that exceeds capacity --- crates/milli/src/vector/session.rs | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/crates/milli/src/vector/session.rs b/crates/milli/src/vector/session.rs index b8905e6ee..d1ae04640 100644 --- a/crates/milli/src/vector/session.rs +++ b/crates/milli/src/vector/session.rs @@ -112,13 +112,12 @@ impl<'doc, C: OnEmbed<'doc>, I: Input> EmbedSession<'doc, C, I> { rendered: I, unused_vectors_distribution: &C::ErrorMetadata, ) -> Result<()> { - if self.inputs.len() < self.inputs.capacity() { - self.inputs.push(rendered); - self.metadata.push(metadata); - return Ok(()); + if self.inputs.len() >= self.inputs.capacity() { + self.embed_chunks(unused_vectors_distribution)?; } - - self.embed_chunks(unused_vectors_distribution) + self.inputs.push(rendered); + self.metadata.push(metadata); + Ok(()) } pub fn drain(mut self, unused_vectors_distribution: &C::ErrorMetadata) -> Result { From a235434910c75f4c15f017e1ac38abf0c295ab96 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Mon, 17 Nov 2025 13:52:23 +0100 Subject: [PATCH 2/3] Add test --- crates/meilisearch/tests/search/hybrid.rs | 86 +++++++++++++++++++++++ 1 file changed, 86 insertions(+) diff --git a/crates/meilisearch/tests/search/hybrid.rs b/crates/meilisearch/tests/search/hybrid.rs index bcc3ac4d4..5c97c0199 100644 --- a/crates/meilisearch/tests/search/hybrid.rs +++ b/crates/meilisearch/tests/search/hybrid.rs @@ -137,6 +137,60 @@ static SIMPLE_SEARCH_DOCUMENTS: Lazy = Lazy::new(|| { }]) }); +static MANY_DOCS: Lazy = Lazy::new(|| { + json!([ + { + "title": "Shazam!", + "desc": "a Captain Marvel ersatz", + "id": "1", + }, + { + "title": "Captain Planet", + "desc": "He's not part of the Marvel Cinematic Universe", + "id": "2", + }, + { + "title": "Captain Marvel", + "desc": "a Shazam ersatz", + "id": "3", + }, + { + "title": "Captain Marvel", + "desc": "a Shazam ersatz", + "id": "4", + }, + { + "title": "Captain Marvel", + "desc": "a Shazam ersatz", + "id": "5", + }, + { + "title": "Captain Marvel", + "desc": "a Shazam ersatz", + "id": "6", + }, + { + "title": "Captain Marvel", + "desc": "a Shazam ersatz", + "id": "7", + }, + { + "title": "Captain Marvel", + "desc": "a Shazam ersatz", + "id": "8", + }, + { + "title": "Captain Marvel", + "desc": "a Shazam ersatz", + "id": "9", + }, + { + "title": "Captain Marvel", + "desc": "a Shazam ersatz", + "id": "10", + }]) +}); + #[actix_rt::test] async fn simple_search() { let server = Server::new_shared(); @@ -449,6 +503,38 @@ async fn simple_search_hf() { snapshot!(response["semanticHitCount"], @"3"); } +#[actix_rt::test] +async fn issue_5976_missing_docs_hf() { + let server = Server::new_shared(); + let index = index_with_documents_hf(server, &MANY_DOCS).await; + let (response, code) = index + .search_post( + json!({"q": "Wonder replacement", "hybrid": {"embedder": "default", "semanticRatio": 1.0}, "retrieveVectors": true}), + ) + .await; + snapshot!(code, @"200 OK"); + let are_empty: Vec<_> = response["hits"] + .as_array() + .unwrap() + .into_iter() + .map(|hit| hit["_vectors"]["default"]["embeddings"].as_array().unwrap().is_empty()) + .collect(); + snapshot!(json!(are_empty), @r###" + [ + false, + false, + false, + false, + false, + false, + false, + false, + false, + false + ] + "###); +} + #[actix_rt::test] async fn distribution_shift() { let server = Server::new_shared(); From ddeff5678ff1e363f55d41774226e9b39fa0bc3e Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Mon, 17 Nov 2025 14:48:40 +0100 Subject: [PATCH 3/3] Clippy happy --- crates/meilisearch/tests/search/hybrid.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/meilisearch/tests/search/hybrid.rs b/crates/meilisearch/tests/search/hybrid.rs index 5c97c0199..ec6bc66ca 100644 --- a/crates/meilisearch/tests/search/hybrid.rs +++ b/crates/meilisearch/tests/search/hybrid.rs @@ -516,7 +516,7 @@ async fn issue_5976_missing_docs_hf() { let are_empty: Vec<_> = response["hits"] .as_array() .unwrap() - .into_iter() + .iter() .map(|hit| hit["_vectors"]["default"]["embeddings"].as_array().unwrap().is_empty()) .collect(); snapshot!(json!(are_empty), @r###"