diff --git a/crates/meilisearch/tests/search/hybrid.rs b/crates/meilisearch/tests/search/hybrid.rs index bcc3ac4d4..ec6bc66ca 100644 --- a/crates/meilisearch/tests/search/hybrid.rs +++ b/crates/meilisearch/tests/search/hybrid.rs @@ -137,6 +137,60 @@ static SIMPLE_SEARCH_DOCUMENTS: Lazy = Lazy::new(|| { }]) }); +static MANY_DOCS: Lazy = Lazy::new(|| { + json!([ + { + "title": "Shazam!", + "desc": "a Captain Marvel ersatz", + "id": "1", + }, + { + "title": "Captain Planet", + "desc": "He's not part of the Marvel Cinematic Universe", + "id": "2", + }, + { + "title": "Captain Marvel", + "desc": "a Shazam ersatz", + "id": "3", + }, + { + "title": "Captain Marvel", + "desc": "a Shazam ersatz", + "id": "4", + }, + { + "title": "Captain Marvel", + "desc": "a Shazam ersatz", + "id": "5", + }, + { + "title": "Captain Marvel", + "desc": "a Shazam ersatz", + "id": "6", + }, + { + "title": "Captain Marvel", + "desc": "a Shazam ersatz", + "id": "7", + }, + { + "title": "Captain Marvel", + "desc": "a Shazam ersatz", + "id": "8", + }, + { + "title": "Captain Marvel", + "desc": "a Shazam ersatz", + "id": "9", + }, + { + "title": "Captain Marvel", + "desc": "a Shazam ersatz", + "id": "10", + }]) +}); + #[actix_rt::test] async fn simple_search() { let server = Server::new_shared(); @@ -449,6 +503,38 @@ async fn simple_search_hf() { snapshot!(response["semanticHitCount"], @"3"); } +#[actix_rt::test] +async fn issue_5976_missing_docs_hf() { + let server = Server::new_shared(); + let index = index_with_documents_hf(server, &MANY_DOCS).await; + let (response, code) = index + .search_post( + json!({"q": "Wonder replacement", "hybrid": {"embedder": "default", "semanticRatio": 1.0}, "retrieveVectors": true}), + ) + .await; + snapshot!(code, @"200 OK"); + let are_empty: Vec<_> = response["hits"] + .as_array() + .unwrap() + .iter() + .map(|hit| hit["_vectors"]["default"]["embeddings"].as_array().unwrap().is_empty()) + .collect(); + snapshot!(json!(are_empty), @r###" + [ + false, + false, + false, + false, + false, + false, + false, + false, + false, + false + ] + "###); +} + #[actix_rt::test] async fn distribution_shift() { let server = Server::new_shared(); diff --git a/crates/milli/src/vector/session.rs b/crates/milli/src/vector/session.rs index b8905e6ee..d1ae04640 100644 --- a/crates/milli/src/vector/session.rs +++ b/crates/milli/src/vector/session.rs @@ -112,13 +112,12 @@ impl<'doc, C: OnEmbed<'doc>, I: Input> EmbedSession<'doc, C, I> { rendered: I, unused_vectors_distribution: &C::ErrorMetadata, ) -> Result<()> { - if self.inputs.len() < self.inputs.capacity() { - self.inputs.push(rendered); - self.metadata.push(metadata); - return Ok(()); + if self.inputs.len() >= self.inputs.capacity() { + self.embed_chunks(unused_vectors_distribution)?; } - - self.embed_chunks(unused_vectors_distribution) + self.inputs.push(rendered); + self.metadata.push(metadata); + Ok(()) } pub fn drain(mut self, unused_vectors_distribution: &C::ErrorMetadata) -> Result {