Make the distinct work at search

This commit is contained in:
Clément Renault
2024-06-11 11:39:35 -04:00
parent cb765ad249
commit 0d31be1494
10 changed files with 77 additions and 2 deletions

View File

@ -159,6 +159,7 @@ impl<'a> Search<'a> {
offset: 0,
limit: self.limit + self.offset,
sort_criteria: self.sort_criteria.clone(),
distinct: self.distinct.clone(),
searchable_attributes: self.searchable_attributes,
geo_strategy: self.geo_strategy,
terms_matching_strategy: self.terms_matching_strategy,

View File

@ -40,6 +40,7 @@ pub struct Search<'a> {
offset: usize,
limit: usize,
sort_criteria: Option<Vec<AscDesc>>,
distinct: Option<String>,
searchable_attributes: Option<&'a [String]>,
geo_strategy: new::GeoSortStrategy,
terms_matching_strategy: TermsMatchingStrategy,
@ -61,6 +62,7 @@ impl<'a> Search<'a> {
offset: 0,
limit: 20,
sort_criteria: None,
distinct: None,
searchable_attributes: None,
geo_strategy: new::GeoSortStrategy::default(),
terms_matching_strategy: TermsMatchingStrategy::default(),
@ -105,6 +107,11 @@ impl<'a> Search<'a> {
self
}
pub fn distinct(&mut self, distinct: String) -> &mut Search<'a> {
self.distinct = Some(distinct);
self
}
pub fn searchable_attributes(&mut self, searchable: &'a [String]) -> &mut Search<'a> {
self.searchable_attributes = Some(searchable);
self
@ -169,6 +176,13 @@ impl<'a> Search<'a> {
ctx.attributes_to_search_on(searchable_attributes)?;
}
if let Some(distinct) = &self.distinct {
if !ctx.index.filterable_fields(ctx.txn)?.contains(distinct) {
// TODO return a real error message
panic!("Distinct search field is not a filterable attribute");
}
}
let universe = filtered_universe(ctx.index, ctx.txn, &self.filter)?;
let PartialSearchResult {
located_query_terms,
@ -185,6 +199,7 @@ impl<'a> Search<'a> {
self.scoring_strategy,
universe,
&self.sort_criteria,
&self.distinct,
self.geo_strategy,
self.offset,
self.limit,
@ -202,6 +217,7 @@ impl<'a> Search<'a> {
self.exhaustive_number_hits,
universe,
&self.sort_criteria,
&self.distinct,
self.geo_strategy,
self.offset,
self.limit,
@ -238,6 +254,7 @@ impl fmt::Debug for Search<'_> {
offset,
limit,
sort_criteria,
distinct,
searchable_attributes,
geo_strategy: _,
terms_matching_strategy,
@ -257,6 +274,7 @@ impl fmt::Debug for Search<'_> {
.field("offset", offset)
.field("limit", limit)
.field("sort_criteria", sort_criteria)
.field("distinct", distinct)
.field("searchable_attributes", searchable_attributes)
.field("terms_matching_strategy", terms_matching_strategy)
.field("scoring_strategy", scoring_strategy)

View File

@ -22,6 +22,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
ctx: &mut SearchContext<'ctx>,
mut ranking_rules: Vec<BoxRankingRule<'ctx, Q>>,
query: &Q,
distinct: Option<&str>,
universe: &RoaringBitmap,
from: usize,
length: usize,
@ -34,7 +35,12 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
logger.ranking_rules(&ranking_rules);
logger.initial_universe(universe);
let distinct_fid = if let Some(field) = ctx.index.distinct_field(ctx.txn)? {
let distinct_field = match distinct {
Some(distinct) => Some(distinct),
None => ctx.index.distinct_field(ctx.txn)?,
};
let distinct_fid = if let Some(field) = distinct_field {
ctx.index.fields_ids_map(ctx.txn)?.id(field)
} else {
None

View File

@ -516,6 +516,7 @@ mod tests {
false,
universe,
&None,
&None,
crate::search::new::GeoSortStrategy::default(),
0,
100,

View File

@ -567,6 +567,7 @@ pub fn execute_vector_search(
scoring_strategy: ScoringStrategy,
universe: RoaringBitmap,
sort_criteria: &Option<Vec<AscDesc>>,
distinct: &Option<String>,
geo_strategy: geo_sort::Strategy,
from: usize,
length: usize,
@ -597,6 +598,7 @@ pub fn execute_vector_search(
ctx,
ranking_rules,
&PlaceholderQuery,
distinct.as_deref(),
&universe,
from,
length,
@ -626,6 +628,7 @@ pub fn execute_search(
exhaustive_number_hits: bool,
mut universe: RoaringBitmap,
sort_criteria: &Option<Vec<AscDesc>>,
distinct: &Option<String>,
geo_strategy: geo_sort::Strategy,
from: usize,
length: usize,
@ -716,6 +719,7 @@ pub fn execute_search(
ctx,
ranking_rules,
&graph,
distinct.as_deref(),
&universe,
from,
length,
@ -731,6 +735,7 @@ pub fn execute_search(
ctx,
ranking_rules,
&PlaceholderQuery,
distinct.as_deref(),
&universe,
from,
length,
@ -747,7 +752,14 @@ pub fn execute_search(
// The candidates is the universe unless the exhaustive number of hits
// is requested and a distinct attribute is set.
if exhaustive_number_hits {
if let Some(f) = ctx.index.distinct_field(ctx.txn)? {
// TODO Should the distinct search parameter replace the distinct setting?
// Or should we return an error if the distinct search param is set at the same time as the setting is set?
let distinct_field = match distinct.as_deref() {
Some(distinct) => Some(distinct),
None => ctx.index.distinct_field(ctx.txn)?,
};
if let Some(f) = distinct_field {
if let Some(distinct_fid) = fields_ids_map.id(f) {
all_candidates = apply_distinct_rule(ctx, distinct_fid, &all_candidates)?.remaining;
}