mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-07-28 01:01:00 +00:00
Merge #4693
4693: Introduce distinct attributes at search time r=irevoire a=Kerollmops This PR fixes #4611. ### To Do - [x] Remove the `distinguishableAttributes` settings (not even a commit about that). - [x] Use the `filterableAttributes` to be able to use the `distinct` parameter at search. - [x] Work on the errors and make tests. Co-authored-by: Clément Renault <clement@meilisearch.com> Co-authored-by: Tamo <tamo@meilisearch.com>
This commit is contained in:
@ -134,6 +134,17 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco
|
||||
}
|
||||
)]
|
||||
InvalidSortableAttribute { field: String, valid_fields: BTreeSet<String>, hidden_fields: bool },
|
||||
#[error("Attribute `{}` is not filterable and thus, cannot be used as distinct attribute. {}",
|
||||
.field,
|
||||
match .valid_fields.is_empty() {
|
||||
true => "This index does not have configured filterable attributes.".to_string(),
|
||||
false => format!("Available filterable attributes are: `{}{}`.",
|
||||
valid_fields.iter().map(AsRef::as_ref).collect::<Vec<&str>>().join(", "),
|
||||
.hidden_fields.then_some(", <..hidden-attributes>").unwrap_or(""),
|
||||
),
|
||||
}
|
||||
)]
|
||||
InvalidDistinctAttribute { field: String, valid_fields: BTreeSet<String>, hidden_fields: bool },
|
||||
#[error("Attribute `{}` is not facet-searchable. {}",
|
||||
.field,
|
||||
match .valid_fields.is_empty() {
|
||||
|
@ -159,6 +159,7 @@ impl<'a> Search<'a> {
|
||||
offset: 0,
|
||||
limit: self.limit + self.offset,
|
||||
sort_criteria: self.sort_criteria.clone(),
|
||||
distinct: self.distinct.clone(),
|
||||
searchable_attributes: self.searchable_attributes,
|
||||
geo_strategy: self.geo_strategy,
|
||||
terms_matching_strategy: self.terms_matching_strategy,
|
||||
|
@ -11,8 +11,8 @@ use self::new::{execute_vector_search, PartialSearchResult};
|
||||
use crate::score_details::{ScoreDetails, ScoringStrategy};
|
||||
use crate::vector::Embedder;
|
||||
use crate::{
|
||||
execute_search, filtered_universe, AscDesc, DefaultSearchLogger, DocumentId, Index, Result,
|
||||
SearchContext, TimeBudget,
|
||||
execute_search, filtered_universe, AscDesc, DefaultSearchLogger, DocumentId, Error, Index,
|
||||
Result, SearchContext, TimeBudget, UserError,
|
||||
};
|
||||
|
||||
// Building these factories is not free.
|
||||
@ -40,6 +40,7 @@ pub struct Search<'a> {
|
||||
offset: usize,
|
||||
limit: usize,
|
||||
sort_criteria: Option<Vec<AscDesc>>,
|
||||
distinct: Option<String>,
|
||||
searchable_attributes: Option<&'a [String]>,
|
||||
geo_strategy: new::GeoSortStrategy,
|
||||
terms_matching_strategy: TermsMatchingStrategy,
|
||||
@ -61,6 +62,7 @@ impl<'a> Search<'a> {
|
||||
offset: 0,
|
||||
limit: 20,
|
||||
sort_criteria: None,
|
||||
distinct: None,
|
||||
searchable_attributes: None,
|
||||
geo_strategy: new::GeoSortStrategy::default(),
|
||||
terms_matching_strategy: TermsMatchingStrategy::default(),
|
||||
@ -105,6 +107,11 @@ impl<'a> Search<'a> {
|
||||
self
|
||||
}
|
||||
|
||||
pub fn distinct(&mut self, distinct: String) -> &mut Search<'a> {
|
||||
self.distinct = Some(distinct);
|
||||
self
|
||||
}
|
||||
|
||||
pub fn searchable_attributes(&mut self, searchable: &'a [String]) -> &mut Search<'a> {
|
||||
self.searchable_attributes = Some(searchable);
|
||||
self
|
||||
@ -169,6 +176,19 @@ impl<'a> Search<'a> {
|
||||
ctx.attributes_to_search_on(searchable_attributes)?;
|
||||
}
|
||||
|
||||
if let Some(distinct) = &self.distinct {
|
||||
let filterable_fields = ctx.index.filterable_fields(ctx.txn)?;
|
||||
if !crate::is_faceted(distinct, &filterable_fields) {
|
||||
let (valid_fields, hidden_fields) =
|
||||
ctx.index.remove_hidden_fields(ctx.txn, filterable_fields)?;
|
||||
return Err(Error::UserError(UserError::InvalidDistinctAttribute {
|
||||
field: distinct.clone(),
|
||||
valid_fields,
|
||||
hidden_fields,
|
||||
}));
|
||||
}
|
||||
}
|
||||
|
||||
let universe = filtered_universe(ctx.index, ctx.txn, &self.filter)?;
|
||||
let PartialSearchResult {
|
||||
located_query_terms,
|
||||
@ -185,6 +205,7 @@ impl<'a> Search<'a> {
|
||||
self.scoring_strategy,
|
||||
universe,
|
||||
&self.sort_criteria,
|
||||
&self.distinct,
|
||||
self.geo_strategy,
|
||||
self.offset,
|
||||
self.limit,
|
||||
@ -202,6 +223,7 @@ impl<'a> Search<'a> {
|
||||
self.exhaustive_number_hits,
|
||||
universe,
|
||||
&self.sort_criteria,
|
||||
&self.distinct,
|
||||
self.geo_strategy,
|
||||
self.offset,
|
||||
self.limit,
|
||||
@ -238,6 +260,7 @@ impl fmt::Debug for Search<'_> {
|
||||
offset,
|
||||
limit,
|
||||
sort_criteria,
|
||||
distinct,
|
||||
searchable_attributes,
|
||||
geo_strategy: _,
|
||||
terms_matching_strategy,
|
||||
@ -257,6 +280,7 @@ impl fmt::Debug for Search<'_> {
|
||||
.field("offset", offset)
|
||||
.field("limit", limit)
|
||||
.field("sort_criteria", sort_criteria)
|
||||
.field("distinct", distinct)
|
||||
.field("searchable_attributes", searchable_attributes)
|
||||
.field("terms_matching_strategy", terms_matching_strategy)
|
||||
.field("scoring_strategy", scoring_strategy)
|
||||
|
@ -22,6 +22,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
|
||||
ctx: &mut SearchContext<'ctx>,
|
||||
mut ranking_rules: Vec<BoxRankingRule<'ctx, Q>>,
|
||||
query: &Q,
|
||||
distinct: Option<&str>,
|
||||
universe: &RoaringBitmap,
|
||||
from: usize,
|
||||
length: usize,
|
||||
@ -34,7 +35,12 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
|
||||
logger.ranking_rules(&ranking_rules);
|
||||
logger.initial_universe(universe);
|
||||
|
||||
let distinct_fid = if let Some(field) = ctx.index.distinct_field(ctx.txn)? {
|
||||
let distinct_field = match distinct {
|
||||
Some(distinct) => Some(distinct),
|
||||
None => ctx.index.distinct_field(ctx.txn)?,
|
||||
};
|
||||
|
||||
let distinct_fid = if let Some(field) = distinct_field {
|
||||
ctx.index.fields_ids_map(ctx.txn)?.id(field)
|
||||
} else {
|
||||
None
|
||||
|
@ -516,6 +516,7 @@ mod tests {
|
||||
false,
|
||||
universe,
|
||||
&None,
|
||||
&None,
|
||||
crate::search::new::GeoSortStrategy::default(),
|
||||
0,
|
||||
100,
|
||||
|
@ -568,6 +568,7 @@ pub fn execute_vector_search(
|
||||
scoring_strategy: ScoringStrategy,
|
||||
universe: RoaringBitmap,
|
||||
sort_criteria: &Option<Vec<AscDesc>>,
|
||||
distinct: &Option<String>,
|
||||
geo_strategy: geo_sort::Strategy,
|
||||
from: usize,
|
||||
length: usize,
|
||||
@ -598,6 +599,7 @@ pub fn execute_vector_search(
|
||||
ctx,
|
||||
ranking_rules,
|
||||
&PlaceholderQuery,
|
||||
distinct.as_deref(),
|
||||
&universe,
|
||||
from,
|
||||
length,
|
||||
@ -627,6 +629,7 @@ pub fn execute_search(
|
||||
exhaustive_number_hits: bool,
|
||||
mut universe: RoaringBitmap,
|
||||
sort_criteria: &Option<Vec<AscDesc>>,
|
||||
distinct: &Option<String>,
|
||||
geo_strategy: geo_sort::Strategy,
|
||||
from: usize,
|
||||
length: usize,
|
||||
@ -717,6 +720,7 @@ pub fn execute_search(
|
||||
ctx,
|
||||
ranking_rules,
|
||||
&graph,
|
||||
distinct.as_deref(),
|
||||
&universe,
|
||||
from,
|
||||
length,
|
||||
@ -732,6 +736,7 @@ pub fn execute_search(
|
||||
ctx,
|
||||
ranking_rules,
|
||||
&PlaceholderQuery,
|
||||
distinct.as_deref(),
|
||||
&universe,
|
||||
from,
|
||||
length,
|
||||
@ -748,7 +753,12 @@ pub fn execute_search(
|
||||
// The candidates is the universe unless the exhaustive number of hits
|
||||
// is requested and a distinct attribute is set.
|
||||
if exhaustive_number_hits {
|
||||
if let Some(f) = ctx.index.distinct_field(ctx.txn)? {
|
||||
let distinct_field = match distinct.as_deref() {
|
||||
Some(distinct) => Some(distinct),
|
||||
None => ctx.index.distinct_field(ctx.txn)?,
|
||||
};
|
||||
|
||||
if let Some(f) = distinct_field {
|
||||
if let Some(distinct_fid) = fields_ids_map.id(f) {
|
||||
all_candidates = apply_distinct_rule(ctx, distinct_fid, &all_candidates)?.remaining;
|
||||
}
|
||||
|
@ -205,8 +205,18 @@ fn create_index() -> TempIndex {
|
||||
index
|
||||
}
|
||||
|
||||
fn verify_distinct(index: &Index, txn: &RoTxn, docids: &[u32]) -> Vec<String> {
|
||||
let vs = collect_field_values(index, txn, index.distinct_field(txn).unwrap().unwrap(), docids);
|
||||
fn verify_distinct(
|
||||
index: &Index,
|
||||
txn: &RoTxn,
|
||||
distinct: Option<&str>,
|
||||
docids: &[u32],
|
||||
) -> Vec<String> {
|
||||
let vs = collect_field_values(
|
||||
index,
|
||||
txn,
|
||||
distinct.or_else(|| index.distinct_field(txn).unwrap()).unwrap(),
|
||||
docids,
|
||||
);
|
||||
|
||||
let mut unique = HashSet::new();
|
||||
for v in vs.iter() {
|
||||
@ -223,12 +233,49 @@ fn verify_distinct(index: &Index, txn: &RoTxn, docids: &[u32]) -> Vec<String> {
|
||||
fn test_distinct_placeholder_no_ranking_rules() {
|
||||
let index = create_index();
|
||||
|
||||
// Set the letter as filterable and unset the distinct attribute.
|
||||
index
|
||||
.update_settings(|s| {
|
||||
s.set_filterable_fields(hashset! { S("letter") });
|
||||
s.reset_distinct_field();
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.distinct(S("letter"));
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 2, 5, 8, 9, 15, 18, 20, 21, 24, 25, 26]");
|
||||
let distinct_values = verify_distinct(&index, &txn, Some("letter"), &documents_ids);
|
||||
insta::assert_debug_snapshot!(distinct_values, @r###"
|
||||
[
|
||||
"\"A\"",
|
||||
"\"B\"",
|
||||
"\"C\"",
|
||||
"\"D\"",
|
||||
"\"E\"",
|
||||
"\"F\"",
|
||||
"\"G\"",
|
||||
"\"H\"",
|
||||
"\"I\"",
|
||||
"__does_not_exist__",
|
||||
"__does_not_exist__",
|
||||
"__does_not_exist__",
|
||||
]
|
||||
"###);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_distinct_at_search_placeholder_no_ranking_rules() {
|
||||
let index = create_index();
|
||||
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
||||
let s = Search::new(&txn, &index);
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 2, 5, 8, 9, 15, 18, 20, 21, 24, 25, 26]");
|
||||
let distinct_values = verify_distinct(&index, &txn, &documents_ids);
|
||||
let distinct_values = verify_distinct(&index, &txn, None, &documents_ids);
|
||||
insta::assert_debug_snapshot!(distinct_values, @r###"
|
||||
[
|
||||
"\"A\"",
|
||||
@ -263,7 +310,7 @@ fn test_distinct_placeholder_sort() {
|
||||
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[14, 26, 4, 7, 17, 23, 1, 19, 25, 8, 20, 24]");
|
||||
let distinct_values = verify_distinct(&index, &txn, &documents_ids);
|
||||
let distinct_values = verify_distinct(&index, &txn, None, &documents_ids);
|
||||
insta::assert_debug_snapshot!(distinct_values, @r###"
|
||||
[
|
||||
"\"E\"",
|
||||
@ -303,7 +350,7 @@ fn test_distinct_placeholder_sort() {
|
||||
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[21, 20, 18, 15, 9, 8, 5, 2, 0, 24, 25, 26]");
|
||||
let distinct_values = verify_distinct(&index, &txn, &documents_ids);
|
||||
let distinct_values = verify_distinct(&index, &txn, None, &documents_ids);
|
||||
insta::assert_debug_snapshot!(distinct_values, @r###"
|
||||
[
|
||||
"\"I\"",
|
||||
@ -346,7 +393,7 @@ fn test_distinct_placeholder_sort() {
|
||||
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[23, 20, 19, 17, 14, 8, 7, 4, 1, 26, 25, 24]");
|
||||
let distinct_values = verify_distinct(&index, &txn, &documents_ids);
|
||||
let distinct_values = verify_distinct(&index, &txn, None, &documents_ids);
|
||||
insta::assert_debug_snapshot!(distinct_values, @r###"
|
||||
[
|
||||
"\"I\"",
|
||||
@ -399,7 +446,7 @@ fn test_distinct_words() {
|
||||
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 2, 26, 5, 8, 9, 15, 18, 20, 21, 25, 24]");
|
||||
let distinct_values = verify_distinct(&index, &txn, &documents_ids);
|
||||
let distinct_values = verify_distinct(&index, &txn, None, &documents_ids);
|
||||
insta::assert_debug_snapshot!(distinct_values, @r###"
|
||||
[
|
||||
"\"A\"",
|
||||
@ -453,7 +500,7 @@ fn test_distinct_sort_words() {
|
||||
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[22, 20, 19, 16, 9, 8, 7, 3, 1, 26, 25, 24]");
|
||||
let distinct_values = verify_distinct(&index, &txn, &documents_ids);
|
||||
let distinct_values = verify_distinct(&index, &txn, None, &documents_ids);
|
||||
insta::assert_debug_snapshot!(distinct_values, @r###"
|
||||
[
|
||||
"\"I\"",
|
||||
@ -549,7 +596,7 @@ fn test_distinct_typo() {
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[3, 26, 0, 7, 8, 9, 15, 22, 18, 20, 25, 24]");
|
||||
|
||||
let distinct_values = verify_distinct(&index, &txn, &documents_ids);
|
||||
let distinct_values = verify_distinct(&index, &txn, None, &documents_ids);
|
||||
insta::assert_debug_snapshot!(distinct_values, @r###"
|
||||
[
|
||||
"\"B\"",
|
||||
|
Reference in New Issue
Block a user