mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-09-19 03:06:26 +00:00
Merge #4693
4693: Introduce distinct attributes at search time r=irevoire a=Kerollmops This PR fixes #4611. ### To Do - [x] Remove the `distinguishableAttributes` settings (not even a commit about that). - [x] Use the `filterableAttributes` to be able to use the `distinct` parameter at search. - [x] Work on the errors and make tests. Co-authored-by: Clément Renault <clement@meilisearch.com> Co-authored-by: Tamo <tamo@meilisearch.com>
This commit is contained in:
@ -22,6 +22,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
|
||||
ctx: &mut SearchContext<'ctx>,
|
||||
mut ranking_rules: Vec<BoxRankingRule<'ctx, Q>>,
|
||||
query: &Q,
|
||||
distinct: Option<&str>,
|
||||
universe: &RoaringBitmap,
|
||||
from: usize,
|
||||
length: usize,
|
||||
@ -34,7 +35,12 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
|
||||
logger.ranking_rules(&ranking_rules);
|
||||
logger.initial_universe(universe);
|
||||
|
||||
let distinct_fid = if let Some(field) = ctx.index.distinct_field(ctx.txn)? {
|
||||
let distinct_field = match distinct {
|
||||
Some(distinct) => Some(distinct),
|
||||
None => ctx.index.distinct_field(ctx.txn)?,
|
||||
};
|
||||
|
||||
let distinct_fid = if let Some(field) = distinct_field {
|
||||
ctx.index.fields_ids_map(ctx.txn)?.id(field)
|
||||
} else {
|
||||
None
|
||||
|
@ -516,6 +516,7 @@ mod tests {
|
||||
false,
|
||||
universe,
|
||||
&None,
|
||||
&None,
|
||||
crate::search::new::GeoSortStrategy::default(),
|
||||
0,
|
||||
100,
|
||||
|
@ -568,6 +568,7 @@ pub fn execute_vector_search(
|
||||
scoring_strategy: ScoringStrategy,
|
||||
universe: RoaringBitmap,
|
||||
sort_criteria: &Option<Vec<AscDesc>>,
|
||||
distinct: &Option<String>,
|
||||
geo_strategy: geo_sort::Strategy,
|
||||
from: usize,
|
||||
length: usize,
|
||||
@ -598,6 +599,7 @@ pub fn execute_vector_search(
|
||||
ctx,
|
||||
ranking_rules,
|
||||
&PlaceholderQuery,
|
||||
distinct.as_deref(),
|
||||
&universe,
|
||||
from,
|
||||
length,
|
||||
@ -627,6 +629,7 @@ pub fn execute_search(
|
||||
exhaustive_number_hits: bool,
|
||||
mut universe: RoaringBitmap,
|
||||
sort_criteria: &Option<Vec<AscDesc>>,
|
||||
distinct: &Option<String>,
|
||||
geo_strategy: geo_sort::Strategy,
|
||||
from: usize,
|
||||
length: usize,
|
||||
@ -717,6 +720,7 @@ pub fn execute_search(
|
||||
ctx,
|
||||
ranking_rules,
|
||||
&graph,
|
||||
distinct.as_deref(),
|
||||
&universe,
|
||||
from,
|
||||
length,
|
||||
@ -732,6 +736,7 @@ pub fn execute_search(
|
||||
ctx,
|
||||
ranking_rules,
|
||||
&PlaceholderQuery,
|
||||
distinct.as_deref(),
|
||||
&universe,
|
||||
from,
|
||||
length,
|
||||
@ -748,7 +753,12 @@ pub fn execute_search(
|
||||
// The candidates is the universe unless the exhaustive number of hits
|
||||
// is requested and a distinct attribute is set.
|
||||
if exhaustive_number_hits {
|
||||
if let Some(f) = ctx.index.distinct_field(ctx.txn)? {
|
||||
let distinct_field = match distinct.as_deref() {
|
||||
Some(distinct) => Some(distinct),
|
||||
None => ctx.index.distinct_field(ctx.txn)?,
|
||||
};
|
||||
|
||||
if let Some(f) = distinct_field {
|
||||
if let Some(distinct_fid) = fields_ids_map.id(f) {
|
||||
all_candidates = apply_distinct_rule(ctx, distinct_fid, &all_candidates)?.remaining;
|
||||
}
|
||||
|
@ -205,8 +205,18 @@ fn create_index() -> TempIndex {
|
||||
index
|
||||
}
|
||||
|
||||
fn verify_distinct(index: &Index, txn: &RoTxn, docids: &[u32]) -> Vec<String> {
|
||||
let vs = collect_field_values(index, txn, index.distinct_field(txn).unwrap().unwrap(), docids);
|
||||
fn verify_distinct(
|
||||
index: &Index,
|
||||
txn: &RoTxn,
|
||||
distinct: Option<&str>,
|
||||
docids: &[u32],
|
||||
) -> Vec<String> {
|
||||
let vs = collect_field_values(
|
||||
index,
|
||||
txn,
|
||||
distinct.or_else(|| index.distinct_field(txn).unwrap()).unwrap(),
|
||||
docids,
|
||||
);
|
||||
|
||||
let mut unique = HashSet::new();
|
||||
for v in vs.iter() {
|
||||
@ -223,12 +233,49 @@ fn verify_distinct(index: &Index, txn: &RoTxn, docids: &[u32]) -> Vec<String> {
|
||||
fn test_distinct_placeholder_no_ranking_rules() {
|
||||
let index = create_index();
|
||||
|
||||
// Set the letter as filterable and unset the distinct attribute.
|
||||
index
|
||||
.update_settings(|s| {
|
||||
s.set_filterable_fields(hashset! { S("letter") });
|
||||
s.reset_distinct_field();
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.distinct(S("letter"));
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 2, 5, 8, 9, 15, 18, 20, 21, 24, 25, 26]");
|
||||
let distinct_values = verify_distinct(&index, &txn, Some("letter"), &documents_ids);
|
||||
insta::assert_debug_snapshot!(distinct_values, @r###"
|
||||
[
|
||||
"\"A\"",
|
||||
"\"B\"",
|
||||
"\"C\"",
|
||||
"\"D\"",
|
||||
"\"E\"",
|
||||
"\"F\"",
|
||||
"\"G\"",
|
||||
"\"H\"",
|
||||
"\"I\"",
|
||||
"__does_not_exist__",
|
||||
"__does_not_exist__",
|
||||
"__does_not_exist__",
|
||||
]
|
||||
"###);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_distinct_at_search_placeholder_no_ranking_rules() {
|
||||
let index = create_index();
|
||||
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
||||
let s = Search::new(&txn, &index);
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 2, 5, 8, 9, 15, 18, 20, 21, 24, 25, 26]");
|
||||
let distinct_values = verify_distinct(&index, &txn, &documents_ids);
|
||||
let distinct_values = verify_distinct(&index, &txn, None, &documents_ids);
|
||||
insta::assert_debug_snapshot!(distinct_values, @r###"
|
||||
[
|
||||
"\"A\"",
|
||||
@ -263,7 +310,7 @@ fn test_distinct_placeholder_sort() {
|
||||
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[14, 26, 4, 7, 17, 23, 1, 19, 25, 8, 20, 24]");
|
||||
let distinct_values = verify_distinct(&index, &txn, &documents_ids);
|
||||
let distinct_values = verify_distinct(&index, &txn, None, &documents_ids);
|
||||
insta::assert_debug_snapshot!(distinct_values, @r###"
|
||||
[
|
||||
"\"E\"",
|
||||
@ -303,7 +350,7 @@ fn test_distinct_placeholder_sort() {
|
||||
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[21, 20, 18, 15, 9, 8, 5, 2, 0, 24, 25, 26]");
|
||||
let distinct_values = verify_distinct(&index, &txn, &documents_ids);
|
||||
let distinct_values = verify_distinct(&index, &txn, None, &documents_ids);
|
||||
insta::assert_debug_snapshot!(distinct_values, @r###"
|
||||
[
|
||||
"\"I\"",
|
||||
@ -346,7 +393,7 @@ fn test_distinct_placeholder_sort() {
|
||||
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[23, 20, 19, 17, 14, 8, 7, 4, 1, 26, 25, 24]");
|
||||
let distinct_values = verify_distinct(&index, &txn, &documents_ids);
|
||||
let distinct_values = verify_distinct(&index, &txn, None, &documents_ids);
|
||||
insta::assert_debug_snapshot!(distinct_values, @r###"
|
||||
[
|
||||
"\"I\"",
|
||||
@ -399,7 +446,7 @@ fn test_distinct_words() {
|
||||
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 2, 26, 5, 8, 9, 15, 18, 20, 21, 25, 24]");
|
||||
let distinct_values = verify_distinct(&index, &txn, &documents_ids);
|
||||
let distinct_values = verify_distinct(&index, &txn, None, &documents_ids);
|
||||
insta::assert_debug_snapshot!(distinct_values, @r###"
|
||||
[
|
||||
"\"A\"",
|
||||
@ -453,7 +500,7 @@ fn test_distinct_sort_words() {
|
||||
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[22, 20, 19, 16, 9, 8, 7, 3, 1, 26, 25, 24]");
|
||||
let distinct_values = verify_distinct(&index, &txn, &documents_ids);
|
||||
let distinct_values = verify_distinct(&index, &txn, None, &documents_ids);
|
||||
insta::assert_debug_snapshot!(distinct_values, @r###"
|
||||
[
|
||||
"\"I\"",
|
||||
@ -549,7 +596,7 @@ fn test_distinct_typo() {
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[3, 26, 0, 7, 8, 9, 15, 22, 18, 20, 25, 24]");
|
||||
|
||||
let distinct_values = verify_distinct(&index, &txn, &documents_ids);
|
||||
let distinct_values = verify_distinct(&index, &txn, None, &documents_ids);
|
||||
insta::assert_debug_snapshot!(distinct_values, @r###"
|
||||
[
|
||||
"\"B\"",
|
||||
|
Reference in New Issue
Block a user