Merge pull request #5995 from meilisearch/fix-embedding-skip

Fix embedding skip
This commit is contained in:
Clément Renault
2025-11-18 10:02:53 +00:00
committed by GitHub
2 changed files with 91 additions and 6 deletions

View File

@@ -137,6 +137,60 @@ static SIMPLE_SEARCH_DOCUMENTS: Lazy<Value> = Lazy::new(|| {
}]) }])
}); });
static MANY_DOCS: Lazy<Value> = Lazy::new(|| {
json!([
{
"title": "Shazam!",
"desc": "a Captain Marvel ersatz",
"id": "1",
},
{
"title": "Captain Planet",
"desc": "He's not part of the Marvel Cinematic Universe",
"id": "2",
},
{
"title": "Captain Marvel",
"desc": "a Shazam ersatz",
"id": "3",
},
{
"title": "Captain Marvel",
"desc": "a Shazam ersatz",
"id": "4",
},
{
"title": "Captain Marvel",
"desc": "a Shazam ersatz",
"id": "5",
},
{
"title": "Captain Marvel",
"desc": "a Shazam ersatz",
"id": "6",
},
{
"title": "Captain Marvel",
"desc": "a Shazam ersatz",
"id": "7",
},
{
"title": "Captain Marvel",
"desc": "a Shazam ersatz",
"id": "8",
},
{
"title": "Captain Marvel",
"desc": "a Shazam ersatz",
"id": "9",
},
{
"title": "Captain Marvel",
"desc": "a Shazam ersatz",
"id": "10",
}])
});
#[actix_rt::test] #[actix_rt::test]
async fn simple_search() { async fn simple_search() {
let server = Server::new_shared(); let server = Server::new_shared();
@@ -449,6 +503,38 @@ async fn simple_search_hf() {
snapshot!(response["semanticHitCount"], @"3"); snapshot!(response["semanticHitCount"], @"3");
} }
#[actix_rt::test]
async fn issue_5976_missing_docs_hf() {
let server = Server::new_shared();
let index = index_with_documents_hf(server, &MANY_DOCS).await;
let (response, code) = index
.search_post(
json!({"q": "Wonder replacement", "hybrid": {"embedder": "default", "semanticRatio": 1.0}, "retrieveVectors": true}),
)
.await;
snapshot!(code, @"200 OK");
let are_empty: Vec<_> = response["hits"]
.as_array()
.unwrap()
.iter()
.map(|hit| hit["_vectors"]["default"]["embeddings"].as_array().unwrap().is_empty())
.collect();
snapshot!(json!(are_empty), @r###"
[
false,
false,
false,
false,
false,
false,
false,
false,
false,
false
]
"###);
}
#[actix_rt::test] #[actix_rt::test]
async fn distribution_shift() { async fn distribution_shift() {
let server = Server::new_shared(); let server = Server::new_shared();

View File

@@ -112,13 +112,12 @@ impl<'doc, C: OnEmbed<'doc>, I: Input> EmbedSession<'doc, C, I> {
rendered: I, rendered: I,
unused_vectors_distribution: &C::ErrorMetadata, unused_vectors_distribution: &C::ErrorMetadata,
) -> Result<()> { ) -> Result<()> {
if self.inputs.len() < self.inputs.capacity() { if self.inputs.len() >= self.inputs.capacity() {
self.inputs.push(rendered); self.embed_chunks(unused_vectors_distribution)?;
self.metadata.push(metadata);
return Ok(());
} }
self.inputs.push(rendered);
self.embed_chunks(unused_vectors_distribution) self.metadata.push(metadata);
Ok(())
} }
pub fn drain(mut self, unused_vectors_distribution: &C::ErrorMetadata) -> Result<C> { pub fn drain(mut self, unused_vectors_distribution: &C::ErrorMetadata) -> Result<C> {