From 59316e8d5a7d65d432eb54c3557eeb59d060d941 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Wed, 15 Oct 2025 21:36:26 +0200 Subject: [PATCH] add unit test --- crates/milli/src/search/new/tests/cutoff.rs | 331 ++++++++++++++++++++ 1 file changed, 331 insertions(+) diff --git a/crates/milli/src/search/new/tests/cutoff.rs b/crates/milli/src/search/new/tests/cutoff.rs index 6e8260ef3..6b436c212 100644 --- a/crates/milli/src/search/new/tests/cutoff.rs +++ b/crates/milli/src/search/new/tests/cutoff.rs @@ -3,12 +3,17 @@ //! 2. A test that ensure the filters are affectively applied even with a cutoff of 0 //! 3. A test that ensure the cutoff works well with the ranking scores +use std::collections::BTreeMap; +use std::sync::Arc; use std::time::Duration; use meili_snap::snapshot; use crate::index::tests::TempIndex; use crate::score_details::{ScoreDetails, ScoringStrategy}; +use crate::update::Setting; +use crate::vector::settings::EmbeddingSettings; +use crate::vector::{Embedder, EmbedderOptions}; use crate::{Criterion, Filter, FilterableAttributesRule, Search, TimeBudget}; fn create_index() -> TempIndex { @@ -493,3 +498,329 @@ fn degraded_search_and_score_details() { ] "###); } + +#[test] +fn degraded_search_and_score_details_vector() { + let index = create_index(); + + index + .add_documents(documents!([ + { + "id": 4, + "text": "hella puppo kefir", + "_vectors": { + "default": [0.1, 0.1] + } + }, + { + "id": 3, + "text": "hella puppy kefir", + "_vectors": { + "default": [-0.1, 0.1] + } + }, + { + "id": 2, + "text": "hello", + "_vectors": { + "default": [0.1, -0.1] + } + }, + { + "id": 1, + "text": "hello puppy", + "_vectors": { + "default": [-0.1, -0.1] + } + }, + { + "id": 0, + "text": "hello puppy kefir", + "_vectors": { + "default": null + } + }, + ])) + .unwrap(); + + index + .update_settings(|settings| { + let mut embedders = BTreeMap::new(); + embedders.insert( + "default".into(), + Setting::Set(EmbeddingSettings { + source: Setting::Set(crate::vector::settings::EmbedderSource::UserProvided), + dimensions: Setting::Set(2), + ..Default::default() + }), + ); + settings.set_embedder_settings(embedders); + settings.set_vector_store(crate::vector::VectorStoreBackend::Hannoy); + }) + .unwrap(); + + let rtxn = index.read_txn().unwrap(); + let mut search = Search::new(&rtxn, &index); + + let embedder = Arc::new( + Embedder::new( + EmbedderOptions::UserProvided(crate::vector::embedder::manual::EmbedderOptions { + dimensions: 2, + distribution: None, + }), + 0, + ) + .unwrap(), + ); + + search.semantic("default".into(), embedder, false, Some(vec![1., -1.]), None); + + search.limit(4); + search.scoring_strategy(ScoringStrategy::Detailed); + search.time_budget(TimeBudget::max()); + + let result = search.execute().unwrap(); + snapshot!(format!("IDs: {:?}\nScores: {}\nScore Details:\n{:#?}", result.documents_ids, result.document_scores.iter().map(|scores| format!("{:.4} ", ScoreDetails::global_score(scores.iter()))).collect::(), result.document_scores), @r###" + IDs: [2, 0, 3, 1] + Scores: 1.0000 0.5000 0.5000 0.0000 + Score Details: + [ + [ + Vector( + Vector { + similarity: Some( + 1.0, + ), + }, + ), + ], + [ + Vector( + Vector { + similarity: Some( + 0.5, + ), + }, + ), + ], + [ + Vector( + Vector { + similarity: Some( + 0.5, + ), + }, + ), + ], + [ + Vector( + Vector { + similarity: Some( + 0.0, + ), + }, + ), + ], + ] + "###); + + // Do ONE loop iteration. Not much can be deduced, almost everyone matched the words first bucket. + search.time_budget(TimeBudget::max().with_stop_after(1)); + + let result = search.execute().unwrap(); + snapshot!(format!("IDs: {:?}\nScores: {}\nScore Details:\n{:#?}", result.documents_ids, result.document_scores.iter().map(|scores| format!("{:.4} ", ScoreDetails::global_score(scores.iter()))).collect::(), result.document_scores), @r###" + IDs: [0, 1, 2, 3] + Scores: 0.5000 0.0000 0.0000 0.0000 + Score Details: + [ + [ + Vector( + Vector { + similarity: Some( + 0.5, + ), + }, + ), + ], + [ + Skipped, + ], + [ + Skipped, + ], + [ + Skipped, + ], + ] + "###); + + search.time_budget(TimeBudget::max().with_stop_after(2)); + + let result = search.execute().unwrap(); + snapshot!(format!("IDs: {:?}\nScores: {}\nScore Details:\n{:#?}", result.documents_ids, result.document_scores.iter().map(|scores| format!("{:.4} ", ScoreDetails::global_score(scores.iter()))).collect::(), result.document_scores), @r###" + IDs: [0, 1, 2, 3] + Scores: 0.5000 0.0000 0.0000 0.0000 + Score Details: + [ + [ + Vector( + Vector { + similarity: Some( + 0.5, + ), + }, + ), + ], + [ + Vector( + Vector { + similarity: Some( + 0.0, + ), + }, + ), + ], + [ + Skipped, + ], + [ + Skipped, + ], + ] + "###); + + search.time_budget(TimeBudget::max().with_stop_after(3)); + + let result = search.execute().unwrap(); + snapshot!(format!("IDs: {:?}\nScores: {}\nScore Details:\n{:#?}", result.documents_ids, result.document_scores.iter().map(|scores| format!("{:.4} ", ScoreDetails::global_score(scores.iter()))).collect::(), result.document_scores), @r###" + IDs: [2, 0, 1, 3] + Scores: 1.0000 0.5000 0.0000 0.0000 + Score Details: + [ + [ + Vector( + Vector { + similarity: Some( + 1.0, + ), + }, + ), + ], + [ + Vector( + Vector { + similarity: Some( + 0.5, + ), + }, + ), + ], + [ + Vector( + Vector { + similarity: Some( + 0.0, + ), + }, + ), + ], + [ + Skipped, + ], + ] + "###); + + search.time_budget(TimeBudget::max().with_stop_after(4)); + + let result = search.execute().unwrap(); + snapshot!(format!("IDs: {:?}\nScores: {}\nScore Details:\n{:#?}", result.documents_ids, result.document_scores.iter().map(|scores| format!("{:.4} ", ScoreDetails::global_score(scores.iter()))).collect::(), result.document_scores), @r###" + IDs: [2, 0, 3, 1] + Scores: 1.0000 0.5000 0.5000 0.0000 + Score Details: + [ + [ + Vector( + Vector { + similarity: Some( + 1.0, + ), + }, + ), + ], + [ + Vector( + Vector { + similarity: Some( + 0.5, + ), + }, + ), + ], + [ + Vector( + Vector { + similarity: Some( + 0.5, + ), + }, + ), + ], + [ + Vector( + Vector { + similarity: Some( + 0.0, + ), + }, + ), + ], + ] + "###); + + search.time_budget(TimeBudget::max().with_stop_after(5)); + + let result = search.execute().unwrap(); + snapshot!(format!("IDs: {:?}\nScores: {}\nScore Details:\n{:#?}", result.documents_ids, result.document_scores.iter().map(|scores| format!("{:.4} ", ScoreDetails::global_score(scores.iter()))).collect::(), result.document_scores), @r###" + IDs: [2, 0, 3, 1] + Scores: 1.0000 0.5000 0.5000 0.0000 + Score Details: + [ + [ + Vector( + Vector { + similarity: Some( + 1.0, + ), + }, + ), + ], + [ + Vector( + Vector { + similarity: Some( + 0.5, + ), + }, + ), + ], + [ + Vector( + Vector { + similarity: Some( + 0.5, + ), + }, + ), + ], + [ + Vector( + Vector { + similarity: Some( + 0.0, + ), + }, + ), + ], + ] + "###); +}