mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-07-29 09:39:58 +00:00
fix the search cutoff and add a test
This commit is contained in:
@ -132,7 +132,7 @@ impl<'a> Search<'a> {
|
||||
index: self.index,
|
||||
distribution_shift: self.distribution_shift,
|
||||
embedder_name: self.embedder_name.clone(),
|
||||
time_budget: self.time_budget,
|
||||
time_budget: self.time_budget.clone(),
|
||||
};
|
||||
|
||||
let vector_query = search.vector.take();
|
||||
|
@ -195,7 +195,7 @@ impl<'a> Search<'a> {
|
||||
self.limit,
|
||||
self.distribution_shift,
|
||||
embedder_name,
|
||||
self.time_budget,
|
||||
self.time_budget.clone(),
|
||||
)?,
|
||||
None => execute_search(
|
||||
&mut ctx,
|
||||
@ -211,7 +211,7 @@ impl<'a> Search<'a> {
|
||||
Some(self.words_limit),
|
||||
&mut DefaultSearchLogger,
|
||||
&mut DefaultSearchLogger,
|
||||
self.time_budget,
|
||||
self.time_budget.clone(),
|
||||
)?,
|
||||
};
|
||||
|
||||
|
@ -161,11 +161,21 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
|
||||
|
||||
while valid_docids.len() < length {
|
||||
if time_budget.exceeded() {
|
||||
let bucket = std::mem::take(&mut ranking_rule_universes[cur_ranking_rule_index]);
|
||||
maybe_add_to_results!(bucket);
|
||||
loop {
|
||||
let bucket = std::mem::take(&mut ranking_rule_universes[cur_ranking_rule_index]);
|
||||
ranking_rule_scores.push(ScoreDetails::Skipped);
|
||||
maybe_add_to_results!(bucket);
|
||||
ranking_rule_scores.pop();
|
||||
|
||||
if cur_ranking_rule_index == 0 {
|
||||
break;
|
||||
}
|
||||
|
||||
back!();
|
||||
}
|
||||
|
||||
return Ok(BucketSortOutput {
|
||||
scores: vec![Default::default(); valid_docids.len()],
|
||||
scores: valid_scores,
|
||||
docids: valid_docids,
|
||||
all_candidates,
|
||||
degraded: true,
|
||||
|
419
milli/src/search/new/tests/cutoff.rs
Normal file
419
milli/src/search/new/tests/cutoff.rs
Normal file
@ -0,0 +1,419 @@
|
||||
//! This module test the search cutoff and ensure a few things:
|
||||
//! 1. A basic test works and mark the search as degraded
|
||||
//! 2. A test that ensure the filters are affectively applied even with a cutoff of 0
|
||||
//! 3. A test that ensure the cutoff works well with the ranking scores
|
||||
|
||||
use std::time::Duration;
|
||||
|
||||
use big_s::S;
|
||||
use maplit::hashset;
|
||||
use meili_snap::snapshot;
|
||||
|
||||
use crate::index::tests::TempIndex;
|
||||
use crate::{Criterion, Filter, Search, TimeBudget};
|
||||
|
||||
fn create_index() -> TempIndex {
|
||||
let index = TempIndex::new();
|
||||
|
||||
index
|
||||
.update_settings(|s| {
|
||||
s.set_primary_key("id".to_owned());
|
||||
s.set_searchable_fields(vec!["text".to_owned()]);
|
||||
s.set_filterable_fields(hashset! { S("id") });
|
||||
s.set_criteria(vec![Criterion::Words, Criterion::Typo]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
// reverse the ID / insertion order so we see better what was sorted from what got the insertion order ordering
|
||||
index
|
||||
.add_documents(documents!([
|
||||
{
|
||||
"id": 4,
|
||||
"text": "hella puppo kefir",
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"text": "hella puppy kefir",
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"text": "hello",
|
||||
},
|
||||
{
|
||||
"id": 1,
|
||||
"text": "hello puppy",
|
||||
},
|
||||
{
|
||||
"id": 0,
|
||||
"text": "hello puppy kefir",
|
||||
},
|
||||
]))
|
||||
.unwrap();
|
||||
index
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn basic_degraded_search() {
|
||||
let index = create_index();
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
|
||||
let mut search = Search::new(&rtxn, &index);
|
||||
search.query("hello puppy kefir");
|
||||
search.limit(3);
|
||||
search.time_budget(TimeBudget::new(Duration::from_millis(0)));
|
||||
|
||||
let result = search.execute().unwrap();
|
||||
assert!(result.degraded);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn degraded_search_cannot_skip_filter() {
|
||||
let index = create_index();
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
|
||||
let mut search = Search::new(&rtxn, &index);
|
||||
search.query("hello puppy kefir");
|
||||
search.limit(100);
|
||||
search.time_budget(TimeBudget::new(Duration::from_millis(0)));
|
||||
let filter_condition = Filter::from_str("id > 2").unwrap().unwrap();
|
||||
search.filter(filter_condition);
|
||||
|
||||
let result = search.execute().unwrap();
|
||||
assert!(result.degraded);
|
||||
snapshot!(format!("{:?}\n{:?}", result.candidates, result.documents_ids), @r###"
|
||||
RoaringBitmap<[0, 1]>
|
||||
[0, 1]
|
||||
"###);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn degraded_search_and_score_details() {
|
||||
let index = create_index();
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
|
||||
let mut search = Search::new(&rtxn, &index);
|
||||
search.query("hello puppy kefir");
|
||||
search.limit(4);
|
||||
search.time_budget(TimeBudget::max());
|
||||
|
||||
let result = search.execute().unwrap();
|
||||
snapshot!(format!("{:#?}\n{:#?}", result.documents_ids, result.document_scores), @r###"
|
||||
[
|
||||
4,
|
||||
1,
|
||||
0,
|
||||
3,
|
||||
]
|
||||
[
|
||||
[
|
||||
Words(
|
||||
Words {
|
||||
matching_words: 3,
|
||||
max_matching_words: 3,
|
||||
},
|
||||
),
|
||||
Typo(
|
||||
Typo {
|
||||
typo_count: 0,
|
||||
max_typo_count: 3,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Words(
|
||||
Words {
|
||||
matching_words: 3,
|
||||
max_matching_words: 3,
|
||||
},
|
||||
),
|
||||
Typo(
|
||||
Typo {
|
||||
typo_count: 1,
|
||||
max_typo_count: 3,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Words(
|
||||
Words {
|
||||
matching_words: 3,
|
||||
max_matching_words: 3,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Words(
|
||||
Words {
|
||||
matching_words: 2,
|
||||
max_matching_words: 3,
|
||||
},
|
||||
),
|
||||
],
|
||||
]
|
||||
"###);
|
||||
|
||||
// Do ONE loop iteration. Not much can be deduced, almost everyone matched the words first bucket.
|
||||
search.time_budget(TimeBudget::max().with_stop_after(1));
|
||||
|
||||
let result = search.execute().unwrap();
|
||||
snapshot!(format!("{:#?}\n{:#?}", result.documents_ids, result.document_scores), @r###"
|
||||
[
|
||||
0,
|
||||
1,
|
||||
4,
|
||||
2,
|
||||
]
|
||||
[
|
||||
[
|
||||
Words(
|
||||
Words {
|
||||
matching_words: 3,
|
||||
max_matching_words: 3,
|
||||
},
|
||||
),
|
||||
Skipped,
|
||||
],
|
||||
[
|
||||
Words(
|
||||
Words {
|
||||
matching_words: 3,
|
||||
max_matching_words: 3,
|
||||
},
|
||||
),
|
||||
Skipped,
|
||||
],
|
||||
[
|
||||
Words(
|
||||
Words {
|
||||
matching_words: 3,
|
||||
max_matching_words: 3,
|
||||
},
|
||||
),
|
||||
Skipped,
|
||||
],
|
||||
[
|
||||
Skipped,
|
||||
],
|
||||
]
|
||||
"###);
|
||||
|
||||
// Do TWO loop iterations. The first document should be entirely sorted
|
||||
search.time_budget(TimeBudget::max().with_stop_after(2));
|
||||
|
||||
let result = search.execute().unwrap();
|
||||
snapshot!(format!("{:#?}\n{:#?}", result.documents_ids, result.document_scores), @r###"
|
||||
[
|
||||
4,
|
||||
0,
|
||||
1,
|
||||
2,
|
||||
]
|
||||
[
|
||||
[
|
||||
Words(
|
||||
Words {
|
||||
matching_words: 3,
|
||||
max_matching_words: 3,
|
||||
},
|
||||
),
|
||||
Typo(
|
||||
Typo {
|
||||
typo_count: 0,
|
||||
max_typo_count: 3,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Words(
|
||||
Words {
|
||||
matching_words: 3,
|
||||
max_matching_words: 3,
|
||||
},
|
||||
),
|
||||
Skipped,
|
||||
],
|
||||
[
|
||||
Words(
|
||||
Words {
|
||||
matching_words: 3,
|
||||
max_matching_words: 3,
|
||||
},
|
||||
),
|
||||
Skipped,
|
||||
],
|
||||
[
|
||||
Skipped,
|
||||
],
|
||||
]
|
||||
"###);
|
||||
|
||||
// Do THREE loop iterations. The second document should be entirely sorted as well
|
||||
search.time_budget(TimeBudget::max().with_stop_after(3));
|
||||
|
||||
let result = search.execute().unwrap();
|
||||
snapshot!(format!("{:#?}\n{:#?}", result.documents_ids, result.document_scores), @r###"
|
||||
[
|
||||
4,
|
||||
1,
|
||||
0,
|
||||
2,
|
||||
]
|
||||
[
|
||||
[
|
||||
Words(
|
||||
Words {
|
||||
matching_words: 3,
|
||||
max_matching_words: 3,
|
||||
},
|
||||
),
|
||||
Typo(
|
||||
Typo {
|
||||
typo_count: 0,
|
||||
max_typo_count: 3,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Words(
|
||||
Words {
|
||||
matching_words: 3,
|
||||
max_matching_words: 3,
|
||||
},
|
||||
),
|
||||
Typo(
|
||||
Typo {
|
||||
typo_count: 1,
|
||||
max_typo_count: 3,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Words(
|
||||
Words {
|
||||
matching_words: 3,
|
||||
max_matching_words: 3,
|
||||
},
|
||||
),
|
||||
Skipped,
|
||||
],
|
||||
[
|
||||
Skipped,
|
||||
],
|
||||
]
|
||||
"###);
|
||||
|
||||
// Do FOUR loop iterations. The third document should be entirely sorted as well
|
||||
// The words bucket have still not progressed thus the last document doesn't have any info yet.
|
||||
search.time_budget(TimeBudget::max().with_stop_after(4));
|
||||
|
||||
let result = search.execute().unwrap();
|
||||
snapshot!(format!("{:#?}\n{:#?}", result.documents_ids, result.document_scores), @r###"
|
||||
[
|
||||
4,
|
||||
1,
|
||||
0,
|
||||
2,
|
||||
]
|
||||
[
|
||||
[
|
||||
Words(
|
||||
Words {
|
||||
matching_words: 3,
|
||||
max_matching_words: 3,
|
||||
},
|
||||
),
|
||||
Typo(
|
||||
Typo {
|
||||
typo_count: 0,
|
||||
max_typo_count: 3,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Words(
|
||||
Words {
|
||||
matching_words: 3,
|
||||
max_matching_words: 3,
|
||||
},
|
||||
),
|
||||
Typo(
|
||||
Typo {
|
||||
typo_count: 1,
|
||||
max_typo_count: 3,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Words(
|
||||
Words {
|
||||
matching_words: 3,
|
||||
max_matching_words: 3,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Skipped,
|
||||
],
|
||||
]
|
||||
"###);
|
||||
|
||||
// After FIVE loop iteration. The words ranking rule gave us a new bucket.
|
||||
// Since we reached the limit we were able to early exit without checking the typo ranking rule.
|
||||
search.time_budget(TimeBudget::max().with_stop_after(5));
|
||||
|
||||
let result = search.execute().unwrap();
|
||||
snapshot!(format!("{:#?}\n{:#?}", result.documents_ids, result.document_scores), @r###"
|
||||
[
|
||||
4,
|
||||
1,
|
||||
0,
|
||||
3,
|
||||
]
|
||||
[
|
||||
[
|
||||
Words(
|
||||
Words {
|
||||
matching_words: 3,
|
||||
max_matching_words: 3,
|
||||
},
|
||||
),
|
||||
Typo(
|
||||
Typo {
|
||||
typo_count: 0,
|
||||
max_typo_count: 3,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Words(
|
||||
Words {
|
||||
matching_words: 3,
|
||||
max_matching_words: 3,
|
||||
},
|
||||
),
|
||||
Typo(
|
||||
Typo {
|
||||
typo_count: 1,
|
||||
max_typo_count: 3,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Words(
|
||||
Words {
|
||||
matching_words: 3,
|
||||
max_matching_words: 3,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
Words(
|
||||
Words {
|
||||
matching_words: 2,
|
||||
max_matching_words: 3,
|
||||
},
|
||||
),
|
||||
],
|
||||
]
|
||||
"###);
|
||||
}
|
@ -1,5 +1,6 @@
|
||||
pub mod attribute_fid;
|
||||
pub mod attribute_position;
|
||||
pub mod cutoff;
|
||||
pub mod distinct;
|
||||
pub mod exactness;
|
||||
pub mod geo_sort;
|
||||
|
Reference in New Issue
Block a user