5369: exhaustive facet search r=ManyTheFish a=ManyTheFish

Fixes #5403

This PR adds an `exhaustiveFacetCount` field to the `/facet-search` API allowing the end-user to have a better facet count when having a distinct attribute set in the index settings.

 # Usage

`POST /index/:index_uid/facet-search`
**Body:**
```json
{
  "facetQuery": "blob",
  "facetName": "genres",
  "q": "",
  "exhaustiveFacetCount": true
}
```

# Prototype Docker images

```sh
$ docker pull getmeili/meilisearch:prototype-exhaustive-facet-search-00
```

Co-authored-by: ManyTheFish <many@meilisearch.com>
This commit is contained in:
meili-bors[bot]
2025-03-13 10:36:04 +00:00
committed by GitHub
3 changed files with 351 additions and 2 deletions

View File

@@ -282,6 +282,7 @@ InvalidSearchCropMarker , InvalidRequest , BAD_REQUEST ;
InvalidSearchFacets , InvalidRequest , BAD_REQUEST ; InvalidSearchFacets , InvalidRequest , BAD_REQUEST ;
InvalidSearchSemanticRatio , InvalidRequest , BAD_REQUEST ; InvalidSearchSemanticRatio , InvalidRequest , BAD_REQUEST ;
InvalidSearchLocales , InvalidRequest , BAD_REQUEST ; InvalidSearchLocales , InvalidRequest , BAD_REQUEST ;
InvalidFacetSearchExhaustiveFacetCount, InvalidRequest , BAD_REQUEST ;
InvalidFacetSearchFacetName , InvalidRequest , BAD_REQUEST ; InvalidFacetSearchFacetName , InvalidRequest , BAD_REQUEST ;
InvalidSimilarId , InvalidRequest , BAD_REQUEST ; InvalidSimilarId , InvalidRequest , BAD_REQUEST ;
InvalidSearchFilter , InvalidRequest , BAD_REQUEST ; InvalidSearchFilter , InvalidRequest , BAD_REQUEST ;

View File

@@ -68,6 +68,8 @@ pub struct FacetSearchQuery {
pub ranking_score_threshold: Option<RankingScoreThreshold>, pub ranking_score_threshold: Option<RankingScoreThreshold>,
#[deserr(default, error = DeserrJsonError<InvalidSearchLocales>, default)] #[deserr(default, error = DeserrJsonError<InvalidSearchLocales>, default)]
pub locales: Option<Vec<Locale>>, pub locales: Option<Vec<Locale>>,
#[deserr(default, error = DeserrJsonError<InvalidFacetSearchExhaustiveFacetCount>, default)]
pub exhaustive_facet_count: Option<bool>,
} }
#[derive(Default)] #[derive(Default)]
@@ -98,6 +100,7 @@ impl FacetSearchAggregator {
hybrid, hybrid,
ranking_score_threshold, ranking_score_threshold,
locales, locales,
exhaustive_facet_count,
} = query; } = query;
Self { Self {
@@ -110,7 +113,8 @@ impl FacetSearchAggregator {
|| attributes_to_search_on.is_some() || attributes_to_search_on.is_some()
|| hybrid.is_some() || hybrid.is_some()
|| ranking_score_threshold.is_some() || ranking_score_threshold.is_some()
|| locales.is_some(), || locales.is_some()
|| exhaustive_facet_count.is_some(),
..Default::default() ..Default::default()
} }
} }
@@ -293,13 +297,24 @@ impl From<FacetSearchQuery> for SearchQuery {
hybrid, hybrid,
ranking_score_threshold, ranking_score_threshold,
locales, locales,
exhaustive_facet_count,
} = value; } = value;
// If exhaustive_facet_count is true, we need to set the page to 0
// because the facet search is not exhaustive by default.
let page = if exhaustive_facet_count.map_or(false, |exhaustive| exhaustive) {
// setting the page to 0 will force the search to be exhaustive when computing the number of hits,
// but it will skip the bucket sort saving time.
Some(0)
} else {
None
};
SearchQuery { SearchQuery {
q, q,
offset: DEFAULT_SEARCH_OFFSET(), offset: DEFAULT_SEARCH_OFFSET(),
limit: DEFAULT_SEARCH_LIMIT(), limit: DEFAULT_SEARCH_LIMIT(),
page: None, page,
hits_per_page: None, hits_per_page: None,
attributes_to_retrieve: None, attributes_to_retrieve: None,
retrieve_vectors: false, retrieve_vectors: false,

View File

@@ -615,3 +615,336 @@ async fn facet_search_with_filterable_attributes_rules_errors() {
}, },
).await; ).await;
} }
#[actix_rt::test]
async fn distinct_facet_search_on_movies() {
let server = Server::new().await;
let index = server.index("test");
let documents = json!([
{
"id": 1,
"title": "Carol",
"genres": ["Romance", "Drama", "Blob"],
"color": "crimson"
},
{
"id": 2,
"title": "Wonder Woman",
"genres": ["Action", "Adventure", "Blob"],
"color": "emerald"
},
{
"id": 3,
"title": "Life of Pi",
"genres": ["Adventure", "Drama", "Blob"],
"color": "azure"
},
{
"id": 4,
"title": "Mad Max: Fury Road",
"genres": ["Adventure", "Science Fiction", "Blob"],
"color": "scarlet"
},
{
"id": 5,
"title": "Moana",
"genres": ["Fantasy", "Action", "Blob"],
"color": "coral"
},
{
"id": 6,
"title": "Philadelphia",
"genres": ["Drama", "Blob"],
"color": "navy"
},
{
"id": 7,
"title": "The Matrix",
"genres": ["Science Fiction", "Action", "Blob"],
"color": "onyx"
},
{
"id": 8,
"title": "Inception",
"genres": ["Science Fiction", "Thriller", "Blob"],
"color": "cerulean"
},
{
"id": 9,
"title": "The Shawshank Redemption",
"genres": ["Drama", "Blob"],
"color": "slate"
},
{
"id": 10,
"title": "Pulp Fiction",
"genres": ["Crime", "Drama", "Blob"],
"color": "gold"
},
{
"id": 11,
"title": "The Dark Knight",
"genres": ["Action", "Crime", "Blob"],
"color": "obsidian"
},
{
"id": 12,
"title": "Forrest Gump",
"genres": ["Drama", "Romance", "Blob"],
"color": "jade"
},
{
"id": 13,
"title": "The Godfather",
"genres": ["Crime", "Drama", "Blob"],
"color": "sepia"
},
{
"id": 14,
"title": "Fight Club",
"genres": ["Drama", "Thriller", "Blob"],
"color": "ruby"
},
{
"id": 15,
"title": "Goodfellas",
"genres": ["Crime", "Biography", "Blob"],
"color": "charcoal"
},
{
"id": 16,
"title": "The Silence of the Lambs",
"genres": ["Crime", "Thriller", "Blob"],
"color": "amethyst"
},
{
"id": 17,
"title": "Schindler's List",
"genres": ["Biography", "Drama", "Blob"],
"color": "ebony"
},
{
"id": 18,
"title": "The Lord of the Rings",
"genres": ["Adventure", "Fantasy", "Blob"],
"color": "forest"
},
{
"id": 19,
"title": "Star Wars",
"genres": ["Science Fiction", "Adventure", "Blob"],
"color": "amber"
},
{
"id": 20,
"title": "Jurassic Park",
"genres": ["Adventure", "Science Fiction", "Blob"],
"color": "lime"
},
{
"id": 21,
"title": "Titanic",
"genres": ["Drama", "Romance", "Blob"],
"color": "sapphire"
},
{
"id": 22,
"title": "The Avengers",
"genres": ["Action", "Science Fiction", "Blob"],
"color": "burgundy"
},
{
"id": 23,
"title": "Avatar",
"genres": ["Science Fiction", "Adventure", "Blob"],
"color": "turquoise"
},
{
"id": 24,
"title": "The Green Mile",
"genres": ["Crime", "Fantasy", "Blob"],
"color": "emerald"
},
{
"id": 25,
"title": "Gladiator",
"genres": ["Action", "Drama", "Blob"],
"color": "sepia"
},
{
"id": 26,
"title": "The Departed",
"genres": ["Crime", "Thriller", "Blob"],
"color": "crimson"
},
{
"id": 27,
"title": "Saving Private Ryan",
"genres": ["Drama", "War", "Blob"],
"color": "slate"
},
{
"id": 28,
"title": "Interstellar",
"genres": ["Science Fiction", "Adventure", "Blob"],
"color": "azure"
},
{
"id": 29,
"title": "The Pianist",
"genres": ["Biography", "Drama", "Blob"],
"color": "onyx"
},
{
"id": 30,
"title": "The Usual Suspects",
"genres": ["Crime", "Mystery", "Blob"],
"color": "charcoal"
},
{
"id": 31,
"title": "The Sixth Sense",
"genres": ["Mystery", "Thriller", "Blob"],
"color": "amethyst"
},
{
"id": 32,
"title": "The Princess Bride",
"genres": ["Adventure", "Romance", "Blob"],
"color": "ruby"
},
{
"id": 33,
"title": "Blade Runner",
"genres": ["Science Fiction", "Noir", "Blob"],
"color": "sapphire"
},
{
"id": 34,
"title": "The Big Lebowski",
"genres": ["Comedy", "Crime", "Blob"],
"color": "gold"
},
{
"id": 35,
"title": "Good Will Hunting",
"genres": ["Drama", "Romance", "Blob"],
"color": "turquoise"
},
{
"id": 36,
"title": "The Terminator",
"genres": ["Action", "Science Fiction", "Blob"],
"color": "obsidian"
},
{
"id": 37,
"title": "Casablanca",
"genres": ["Drama", "Romance", "Blob"],
"color": "jade"
},
{
"id": 38,
"title": "The Exorcist",
"genres": ["Horror", "Thriller", "Blob"],
"color": "burgundy"
},
{
"id": 39,
"title": "Apocalypse Now",
"genres": ["Drama", "War", "Blob"],
"color": "forest"
},
{
"id": 40,
"title": "Back to the Future",
"genres": ["Adventure", "Comedy", "Blob"],
"color": "amber"
},
{
"id": 41,
"title": "The Graduate",
"genres": ["Comedy", "Drama", "Blob"],
"color": "azure"
},
{
"id": 42,
"title": "Alien",
"genres": ["Horror", "Science Fiction", "Blob"],
"color": "obsidian"
},
{
"id": 43,
"title": "The Breakfast Club",
"genres": ["Drama", "Comedy", "Blob"],
"color": "coral"
},
{
"id": 44,
"title": "Die Hard",
"genres": ["Action", "Thriller", "Blob"],
"color": "scarlet"
},
{
"id": 45,
"title": "The Sound of Music",
"genres": ["Drama", "Musical", "Blob"],
"color": "emerald"
},
{
"id": 46,
"title": "Jaws",
"genres": ["Horror", "Thriller", "Blob"],
"color": "navy"
},
{
"id": 47,
"title": "Rocky",
"genres": ["Drama", "Sport", "Blob"],
"color": "burgundy"
},
{
"id": 48,
"title": "E.T. the Extra-Terrestrial",
"genres": ["Adventure", "Science Fiction", "Blob"],
"color": "amber"
},
{
"id": 49,
"title": "The Godfather Part II",
"genres": ["Crime", "Drama", "Blob"],
"color": "sepia"
},
{
"id": 50,
"title": "One Flew Over the Cuckoo's Nest",
"genres": ["Drama", "Blob"],
"color": "slate"
}
]);
let (response, code) =
index.update_settings_filterable_attributes(json!(["genres", "color"])).await;
assert_eq!(202, code, "{:?}", response);
index.wait_task(response.uid()).await;
let (response, code) = index.update_settings_distinct_attribute(json!("color")).await;
assert_eq!(202, code, "{:?}", response);
index.wait_task(response.uid()).await;
let (response, _code) = index.add_documents(documents, None).await;
index.wait_task(response.uid()).await;
let (response, code) =
index.facet_search(json!({"facetQuery": "blob", "facetName": "genres", "q": "" })).await;
// non-exhaustive facet count is counting 27 documents with the facet query "blob" but there are only 23 documents with a distinct color.
assert_eq!(code, 200, "{}", response);
snapshot!(response["facetHits"], @r###"[{"value":"Blob","count":27}]"###);
let (response, code) =
index.facet_search(json!({"facetQuery": "blob", "facetName": "genres", "q": "", "exhaustiveFacetCount": true })).await;
// exhaustive facet count is counting 23 documents with the facet query "blob" which is the number of distinct colors.
assert_eq!(code, 200, "{}", response);
snapshot!(response["facetHits"], @r###"[{"value":"Blob","count":23}]"###);
}