2494: Introduce the new faceting and pagination settings r=ManyTheFish a=Kerollmops

This PR introduces two new settings following the newly created spec https://github.com/meilisearch/specifications/pull/157:
 - The `faceting.max_values_per_facet` one describes the maximum number of values (each with a count) associated with a value in a facet distribution query.
 - The `pagination.limited_to` one describes the maximum number of documents that a search query can ever return.

Co-authored-by: Kerollmops <clement@meilisearch.com>
This commit is contained in:
bors[bot]
2022-06-09 12:09:21 +00:00
committed by GitHub
10 changed files with 285 additions and 25 deletions

16
Cargo.lock generated
View File

@@ -1123,8 +1123,8 @@ dependencies = [
[[package]] [[package]]
name = "filter-parser" name = "filter-parser"
version = "0.29.1" version = "0.29.2"
source = "git+https://github.com/meilisearch/milli.git?tag=v0.29.1#7313d6c5331e7dc13e9ded70b60b1f56dd7e583c" source = "git+https://github.com/meilisearch/milli.git?tag=v0.29.2#19d44142a170d63d076e7d327b542dfa1f3f8b96"
dependencies = [ dependencies = [
"nom", "nom",
"nom_locate", "nom_locate",
@@ -1148,8 +1148,8 @@ dependencies = [
[[package]] [[package]]
name = "flatten-serde-json" name = "flatten-serde-json"
version = "0.29.1" version = "0.29.2"
source = "git+https://github.com/meilisearch/milli.git?tag=v0.29.1#7313d6c5331e7dc13e9ded70b60b1f56dd7e583c" source = "git+https://github.com/meilisearch/milli.git?tag=v0.29.2#19d44142a170d63d076e7d327b542dfa1f3f8b96"
dependencies = [ dependencies = [
"serde_json", "serde_json",
] ]
@@ -1661,8 +1661,8 @@ dependencies = [
[[package]] [[package]]
name = "json-depth-checker" name = "json-depth-checker"
version = "0.29.1" version = "0.29.2"
source = "git+https://github.com/meilisearch/milli.git?tag=v0.29.1#7313d6c5331e7dc13e9ded70b60b1f56dd7e583c" source = "git+https://github.com/meilisearch/milli.git?tag=v0.29.2#19d44142a170d63d076e7d327b542dfa1f3f8b96"
dependencies = [ dependencies = [
"serde_json", "serde_json",
] ]
@@ -2189,8 +2189,8 @@ dependencies = [
[[package]] [[package]]
name = "milli" name = "milli"
version = "0.29.1" version = "0.29.2"
source = "git+https://github.com/meilisearch/milli.git?tag=v0.29.1#7313d6c5331e7dc13e9ded70b60b1f56dd7e583c" source = "git+https://github.com/meilisearch/milli.git?tag=v0.29.2#19d44142a170d63d076e7d327b542dfa1f3f8b96"
dependencies = [ dependencies = [
"bimap", "bimap",
"bincode", "bincode",

View File

@@ -8,7 +8,7 @@ base64 = "0.13.0"
enum-iterator = "0.7.0" enum-iterator = "0.7.0"
hmac = "0.12.1" hmac = "0.12.1"
meilisearch-error = { path = "../meilisearch-error" } meilisearch-error = { path = "../meilisearch-error" }
milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.29.1" } milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.29.2" }
rand = "0.8.4" rand = "0.8.4"
serde = { version = "1.0.136", features = ["derive"] } serde = { version = "1.0.136", features = ["derive"] }
serde_json = { version = "1.0.79", features = ["preserve_order"] } serde_json = { version = "1.0.79", features = ["preserve_order"] }

View File

@@ -282,6 +282,50 @@ make_setting_route!(
} }
); );
make_setting_route!(
"/faceting",
patch,
meilisearch_lib::index::updates::FacetingSettings,
faceting,
"faceting",
analytics,
|setting: &Option<meilisearch_lib::index::updates::FacetingSettings>, req: &HttpRequest| {
use serde_json::json;
analytics.publish(
"Faceting Updated".to_string(),
json!({
"faceting": {
"max_values_per_facet": setting.as_ref().and_then(|s| s.max_values_per_facet.set()),
},
}),
Some(req),
);
}
);
make_setting_route!(
"/pagination",
patch,
meilisearch_lib::index::updates::PaginationSettings,
pagination,
"pagination",
analytics,
|setting: &Option<meilisearch_lib::index::updates::PaginationSettings>, req: &HttpRequest| {
use serde_json::json;
analytics.publish(
"Pagination Updated".to_string(),
json!({
"pagination": {
"limited_to": setting.as_ref().and_then(|s| s.limited_to.set()),
},
}),
Some(req),
);
}
);
macro_rules! generate_configure { macro_rules! generate_configure {
($($mod:ident),*) => { ($($mod:ident),*) => {
pub fn configure(cfg: &mut web::ServiceConfig) { pub fn configure(cfg: &mut web::ServiceConfig) {

View File

@@ -61,7 +61,7 @@ async fn import_dump_v2_movie_raw() {
assert_eq!(code, 200); assert_eq!(code, 200);
assert_eq!( assert_eq!(
settings, settings,
json!({"displayedAttributes": ["*"], "searchableAttributes": ["*"], "filterableAttributes": [], "sortableAttributes": [], "rankingRules": ["words", "typo", "proximity", "attribute", "exactness"], "stopWords": [], "synonyms": {}, "distinctAttribute": null, "typoTolerance": {"enabled": true, "minWordSizeForTypos": {"oneTypo": 5, "twoTypos": 9}, "disableOnWords": [], "disableOnAttributes": [] }}) json!({"displayedAttributes": ["*"], "searchableAttributes": ["*"], "filterableAttributes": [], "sortableAttributes": [], "rankingRules": ["words", "typo", "proximity", "attribute", "exactness"], "stopWords": [], "synonyms": {}, "distinctAttribute": null, "typoTolerance": {"enabled": true, "minWordSizeForTypos": {"oneTypo": 5, "twoTypos": 9}, "disableOnWords": [], "disableOnAttributes": [] }, "faceting": { "maxValuesPerFacet": 100 }, "pagination": { "limitedTo": 1000 } })
); );
let (tasks, code) = index.list_tasks().await; let (tasks, code) = index.list_tasks().await;
@@ -125,7 +125,7 @@ async fn import_dump_v2_movie_with_settings() {
assert_eq!(code, 200); assert_eq!(code, 200);
assert_eq!( assert_eq!(
settings, settings,
json!({ "displayedAttributes": ["title", "genres", "overview", "poster", "release_date"], "searchableAttributes": ["title", "overview"], "filterableAttributes": ["genres"], "sortableAttributes": [], "rankingRules": ["words", "typo", "proximity", "attribute", "exactness"], "stopWords": ["of", "the"], "synonyms": {}, "distinctAttribute": null, "typoTolerance": {"enabled": true, "minWordSizeForTypos": { "oneTypo": 5, "twoTypos": 9 }, "disableOnWords": [], "disableOnAttributes": [] }}) json!({ "displayedAttributes": ["title", "genres", "overview", "poster", "release_date"], "searchableAttributes": ["title", "overview"], "filterableAttributes": ["genres"], "sortableAttributes": [], "rankingRules": ["words", "typo", "proximity", "attribute", "exactness"], "stopWords": ["of", "the"], "synonyms": {}, "distinctAttribute": null, "typoTolerance": {"enabled": true, "minWordSizeForTypos": { "oneTypo": 5, "twoTypos": 9 }, "disableOnWords": [], "disableOnAttributes": [] }, "faceting": { "maxValuesPerFacet": 100 }, "pagination": { "limitedTo": 1000 } })
); );
let (tasks, code) = index.list_tasks().await; let (tasks, code) = index.list_tasks().await;
@@ -189,7 +189,7 @@ async fn import_dump_v2_rubygems_with_settings() {
assert_eq!(code, 200); assert_eq!(code, 200);
assert_eq!( assert_eq!(
settings, settings,
json!({"displayedAttributes": ["name", "summary", "description", "version", "total_downloads"], "searchableAttributes": ["name", "summary"], "filterableAttributes": ["version"], "sortableAttributes": [], "rankingRules": ["typo", "words", "fame:desc", "proximity", "attribute", "exactness", "total_downloads:desc"], "stopWords": [], "synonyms": {}, "distinctAttribute": null, "typoTolerance": {"enabled": true, "minWordSizeForTypos": {"oneTypo": 5, "twoTypos": 9}, "disableOnWords": [], "disableOnAttributes": [] }}) json!({"displayedAttributes": ["name", "summary", "description", "version", "total_downloads"], "searchableAttributes": ["name", "summary"], "filterableAttributes": ["version"], "sortableAttributes": [], "rankingRules": ["typo", "words", "fame:desc", "proximity", "attribute", "exactness", "total_downloads:desc"], "stopWords": [], "synonyms": {}, "distinctAttribute": null, "typoTolerance": {"enabled": true, "minWordSizeForTypos": {"oneTypo": 5, "twoTypos": 9}, "disableOnWords": [], "disableOnAttributes": [] }, "faceting": { "maxValuesPerFacet": 100 }, "pagination": { "limitedTo": 1000 }})
); );
let (tasks, code) = index.list_tasks().await; let (tasks, code) = index.list_tasks().await;
@@ -253,7 +253,7 @@ async fn import_dump_v3_movie_raw() {
assert_eq!(code, 200); assert_eq!(code, 200);
assert_eq!( assert_eq!(
settings, settings,
json!({"displayedAttributes": ["*"], "searchableAttributes": ["*"], "filterableAttributes": [], "sortableAttributes": [], "rankingRules": ["words", "typo", "proximity", "attribute", "exactness"], "stopWords": [], "synonyms": {}, "distinctAttribute": null, "typoTolerance": {"enabled": true, "minWordSizeForTypos": {"oneTypo": 5, "twoTypos": 9}, "disableOnWords": [], "disableOnAttributes": [] }}) json!({"displayedAttributes": ["*"], "searchableAttributes": ["*"], "filterableAttributes": [], "sortableAttributes": [], "rankingRules": ["words", "typo", "proximity", "attribute", "exactness"], "stopWords": [], "synonyms": {}, "distinctAttribute": null, "typoTolerance": {"enabled": true, "minWordSizeForTypos": {"oneTypo": 5, "twoTypos": 9}, "disableOnWords": [], "disableOnAttributes": [] }, "faceting": { "maxValuesPerFacet": 100 }, "pagination": { "limitedTo": 1000 } })
); );
let (tasks, code) = index.list_tasks().await; let (tasks, code) = index.list_tasks().await;
@@ -317,7 +317,7 @@ async fn import_dump_v3_movie_with_settings() {
assert_eq!(code, 200); assert_eq!(code, 200);
assert_eq!( assert_eq!(
settings, settings,
json!({ "displayedAttributes": ["title", "genres", "overview", "poster", "release_date"], "searchableAttributes": ["title", "overview"], "filterableAttributes": ["genres"], "sortableAttributes": [], "rankingRules": ["words", "typo", "proximity", "attribute", "exactness"], "stopWords": ["of", "the"], "synonyms": {}, "distinctAttribute": null, "typoTolerance": {"enabled": true, "minWordSizeForTypos": { "oneTypo": 5, "twoTypos": 9 }, "disableOnWords": [], "disableOnAttributes": [] }}) json!({ "displayedAttributes": ["title", "genres", "overview", "poster", "release_date"], "searchableAttributes": ["title", "overview"], "filterableAttributes": ["genres"], "sortableAttributes": [], "rankingRules": ["words", "typo", "proximity", "attribute", "exactness"], "stopWords": ["of", "the"], "synonyms": {}, "distinctAttribute": null, "typoTolerance": {"enabled": true, "minWordSizeForTypos": { "oneTypo": 5, "twoTypos": 9 }, "disableOnWords": [], "disableOnAttributes": [] }, "faceting": { "maxValuesPerFacet": 100 }, "pagination": { "limitedTo": 1000 } })
); );
let (tasks, code) = index.list_tasks().await; let (tasks, code) = index.list_tasks().await;
@@ -381,7 +381,7 @@ async fn import_dump_v3_rubygems_with_settings() {
assert_eq!(code, 200); assert_eq!(code, 200);
assert_eq!( assert_eq!(
settings, settings,
json!({"displayedAttributes": ["name", "summary", "description", "version", "total_downloads"], "searchableAttributes": ["name", "summary"], "filterableAttributes": ["version"], "sortableAttributes": [], "rankingRules": ["typo", "words", "fame:desc", "proximity", "attribute", "exactness", "total_downloads:desc"], "stopWords": [], "synonyms": {}, "distinctAttribute": null, "typoTolerance": {"enabled": true, "minWordSizeForTypos": {"oneTypo": 5, "twoTypos": 9}, "disableOnWords": [], "disableOnAttributes": [] }}) json!({"displayedAttributes": ["name", "summary", "description", "version", "total_downloads"], "searchableAttributes": ["name", "summary"], "filterableAttributes": ["version"], "sortableAttributes": [], "rankingRules": ["typo", "words", "fame:desc", "proximity", "attribute", "exactness", "total_downloads:desc"], "stopWords": [], "synonyms": {}, "distinctAttribute": null, "typoTolerance": {"enabled": true, "minWordSizeForTypos": {"oneTypo": 5, "twoTypos": 9}, "disableOnWords": [], "disableOnAttributes": [] }, "faceting": { "maxValuesPerFacet": 100 }, "pagination": { "limitedTo": 1000 } })
); );
let (tasks, code) = index.list_tasks().await; let (tasks, code) = index.list_tasks().await;
@@ -445,7 +445,7 @@ async fn import_dump_v4_movie_raw() {
assert_eq!(code, 200); assert_eq!(code, 200);
assert_eq!( assert_eq!(
settings, settings,
json!({ "displayedAttributes": ["*"], "searchableAttributes": ["*"], "filterableAttributes": [], "sortableAttributes": [], "rankingRules": ["words", "typo", "proximity", "attribute", "exactness"], "stopWords": [], "synonyms": {}, "distinctAttribute": null, "typoTolerance": {"enabled": true, "minWordSizeForTypos": {"oneTypo": 5, "twoTypos": 9}, "disableOnWords": [], "disableOnAttributes": [] }}) json!({ "displayedAttributes": ["*"], "searchableAttributes": ["*"], "filterableAttributes": [], "sortableAttributes": [], "rankingRules": ["words", "typo", "proximity", "attribute", "exactness"], "stopWords": [], "synonyms": {}, "distinctAttribute": null, "typoTolerance": {"enabled": true, "minWordSizeForTypos": {"oneTypo": 5, "twoTypos": 9}, "disableOnWords": [], "disableOnAttributes": [] }, "faceting": { "maxValuesPerFacet": 100 }, "pagination": { "limitedTo": 1000 } })
); );
let (tasks, code) = index.list_tasks().await; let (tasks, code) = index.list_tasks().await;
@@ -509,7 +509,7 @@ async fn import_dump_v4_movie_with_settings() {
assert_eq!(code, 200); assert_eq!(code, 200);
assert_eq!( assert_eq!(
settings, settings,
json!({ "displayedAttributes": ["title", "genres", "overview", "poster", "release_date"], "searchableAttributes": ["title", "overview"], "filterableAttributes": ["genres"], "sortableAttributes": [], "rankingRules": ["words", "typo", "proximity", "attribute", "exactness"], "stopWords": ["of", "the"], "synonyms": {}, "distinctAttribute": null, "typoTolerance": {"enabled": true, "minWordSizeForTypos": { "oneTypo": 5, "twoTypos": 9 }, "disableOnWords": [], "disableOnAttributes": [] }}) json!({ "displayedAttributes": ["title", "genres", "overview", "poster", "release_date"], "searchableAttributes": ["title", "overview"], "filterableAttributes": ["genres"], "sortableAttributes": [], "rankingRules": ["words", "typo", "proximity", "attribute", "exactness"], "stopWords": ["of", "the"], "synonyms": {}, "distinctAttribute": null, "typoTolerance": {"enabled": true, "minWordSizeForTypos": { "oneTypo": 5, "twoTypos": 9 }, "disableOnWords": [], "disableOnAttributes": [] }, "faceting": { "maxValuesPerFacet": 100 }, "pagination": { "limitedTo": 1000 } })
); );
let (tasks, code) = index.list_tasks().await; let (tasks, code) = index.list_tasks().await;
@@ -573,7 +573,7 @@ async fn import_dump_v4_rubygems_with_settings() {
assert_eq!(code, 200); assert_eq!(code, 200);
assert_eq!( assert_eq!(
settings, settings,
json!({ "displayedAttributes": ["name", "summary", "description", "version", "total_downloads"], "searchableAttributes": ["name", "summary"], "filterableAttributes": ["version"], "sortableAttributes": [], "rankingRules": ["typo", "words", "fame:desc", "proximity", "attribute", "exactness", "total_downloads:desc"], "stopWords": [], "synonyms": {}, "distinctAttribute": null, "typoTolerance": {"enabled": true, "minWordSizeForTypos": {"oneTypo": 5, "twoTypos": 9}, "disableOnWords": [], "disableOnAttributes": [] }}) json!({ "displayedAttributes": ["name", "summary", "description", "version", "total_downloads"], "searchableAttributes": ["name", "summary"], "filterableAttributes": ["version"], "sortableAttributes": [], "rankingRules": ["typo", "words", "fame:desc", "proximity", "attribute", "exactness", "total_downloads:desc"], "stopWords": [], "synonyms": {}, "distinctAttribute": null, "typoTolerance": {"enabled": true, "minWordSizeForTypos": {"oneTypo": 5, "twoTypos": 9}, "disableOnWords": [], "disableOnAttributes": [] }, "faceting": { "maxValuesPerFacet": 100 }, "pagination": { "limitedTo": 1000 } })
); );
let (tasks, code) = index.list_tasks().await; let (tasks, code) = index.list_tasks().await;

View File

@@ -565,6 +565,36 @@ async fn placeholder_search_is_hard_limited() {
}, },
) )
.await; .await;
index
.update_settings(json!({ "pagination": { "limitedTo": 10_000 } }))
.await;
index.wait_task(1).await;
index
.search(
json!({
"limit": 1500,
}),
|response, code| {
assert_eq!(code, 200, "{}", response);
assert_eq!(response["hits"].as_array().unwrap().len(), 1200);
},
)
.await;
index
.search(
json!({
"offset": 1000,
"limit": 400,
}),
|response, code| {
assert_eq!(code, 200, "{}", response);
assert_eq!(response["hits"].as_array().unwrap().len(), 200);
},
)
.await;
} }
#[actix_rt::test] #[actix_rt::test]
@@ -604,4 +634,85 @@ async fn search_is_hard_limited() {
}, },
) )
.await; .await;
index
.update_settings(json!({ "pagination": { "limitedTo": 10_000 } }))
.await;
index.wait_task(1).await;
index
.search(
json!({
"q": "unique",
"limit": 1500,
}),
|response, code| {
assert_eq!(code, 200, "{}", response);
assert_eq!(response["hits"].as_array().unwrap().len(), 1200);
},
)
.await;
index
.search(
json!({
"q": "unique",
"offset": 1000,
"limit": 400,
}),
|response, code| {
assert_eq!(code, 200, "{}", response);
assert_eq!(response["hits"].as_array().unwrap().len(), 200);
},
)
.await;
}
#[actix_rt::test]
async fn faceting_max_values_per_facet() {
let server = Server::new().await;
let index = server.index("test");
index
.update_settings(json!({ "filterableAttributes": ["number"] }))
.await;
let documents: Vec<_> = (0..10_000)
.map(|id| json!({ "id": id, "number": id * 10 }))
.collect();
index.add_documents(json!(documents), None).await;
index.wait_task(1).await;
index
.search(
json!({
"facets": ["number"]
}),
|response, code| {
assert_eq!(code, 200, "{}", response);
let numbers = response["facetDistribution"]["number"].as_object().unwrap();
assert_eq!(numbers.len(), 100);
},
)
.await;
index
.update_settings(json!({ "faceting": { "maxValuesPerFacet": 10_000 } }))
.await;
index.wait_task(2).await;
index
.search(
json!({
"facets": ["number"]
}),
|response, code| {
assert_eq!(code, 200, "{}", response);
let numbers = dbg!(&response)["facetDistribution"]["number"]
.as_object()
.unwrap();
assert_eq!(numbers.len(), 10_000);
},
)
.await;
} }

View File

@@ -24,6 +24,12 @@ static DEFAULT_SETTINGS_VALUES: Lazy<HashMap<&'static str, Value>> = Lazy::new(|
); );
map.insert("stop_words", json!([])); map.insert("stop_words", json!([]));
map.insert("synonyms", json!({})); map.insert("synonyms", json!({}));
map.insert(
"faceting",
json!({
"maxValuesByFacet": json!(100),
}),
);
map map
}); });
@@ -43,7 +49,7 @@ async fn get_settings() {
let (response, code) = index.settings().await; let (response, code) = index.settings().await;
assert_eq!(code, 200); assert_eq!(code, 200);
let settings = response.as_object().unwrap(); let settings = response.as_object().unwrap();
assert_eq!(settings.keys().len(), 9); assert_eq!(settings.keys().len(), 11);
assert_eq!(settings["displayedAttributes"], json!(["*"])); assert_eq!(settings["displayedAttributes"], json!(["*"]));
assert_eq!(settings["searchableAttributes"], json!(["*"])); assert_eq!(settings["searchableAttributes"], json!(["*"]));
assert_eq!(settings["filterableAttributes"], json!([])); assert_eq!(settings["filterableAttributes"], json!([]));
@@ -61,6 +67,18 @@ async fn get_settings() {
]) ])
); );
assert_eq!(settings["stopWords"], json!([])); assert_eq!(settings["stopWords"], json!([]));
assert_eq!(
settings["faceting"],
json!({
"maxValuesPerFacet": 100,
})
);
assert_eq!(
settings["pagination"],
json!({
"limitedTo": 1000,
})
);
} }
#[actix_rt::test] #[actix_rt::test]

View File

@@ -30,7 +30,7 @@ lazy_static = "1.4.0"
log = "0.4.14" log = "0.4.14"
meilisearch-auth = { path = "../meilisearch-auth" } meilisearch-auth = { path = "../meilisearch-auth" }
meilisearch-error = { path = "../meilisearch-error" } meilisearch-error = { path = "../meilisearch-error" }
milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.29.1" } milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.29.2" }
mime = "0.3.16" mime = "0.3.16"
num_cpus = "1.13.1" num_cpus = "1.13.1"
obkv = "0.2.0" obkv = "0.2.0"

View File

@@ -8,17 +8,18 @@ use std::sync::Arc;
use fst::IntoStreamer; use fst::IntoStreamer;
use milli::heed::{EnvOpenOptions, RoTxn}; use milli::heed::{EnvOpenOptions, RoTxn};
use milli::update::{IndexerConfig, Setting}; use milli::update::{IndexerConfig, Setting};
use milli::{obkv_to_json, FieldDistribution}; use milli::{obkv_to_json, FieldDistribution, DEFAULT_VALUES_PER_FACET};
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use serde_json::{Map, Value}; use serde_json::{Map, Value};
use time::OffsetDateTime; use time::OffsetDateTime;
use uuid::Uuid; use uuid::Uuid;
use crate::index::search::DEFAULT_PAGINATION_LIMITED_TO;
use crate::EnvSizer; use crate::EnvSizer;
use super::error::IndexError; use super::error::IndexError;
use super::error::Result; use super::error::Result;
use super::updates::{MinWordSizeTyposSetting, TypoSettings}; use super::updates::{FacetingSettings, MinWordSizeTyposSetting, PaginationSettings, TypoSettings};
use super::{Checked, Settings}; use super::{Checked, Settings};
pub type Document = Map<String, Value>; pub type Document = Map<String, Value>;
@@ -193,6 +194,20 @@ impl Index {
disable_on_attributes: Setting::Set(disabled_attributes), disable_on_attributes: Setting::Set(disabled_attributes),
}; };
let faceting = FacetingSettings {
max_values_per_facet: Setting::Set(
self.max_values_per_facet(txn)?
.unwrap_or(DEFAULT_VALUES_PER_FACET),
),
};
let pagination = PaginationSettings {
limited_to: Setting::Set(
self.pagination_limited_to(txn)?
.unwrap_or(DEFAULT_PAGINATION_LIMITED_TO),
),
};
Ok(Settings { Ok(Settings {
displayed_attributes: match displayed_attributes { displayed_attributes: match displayed_attributes {
Some(attrs) => Setting::Set(attrs), Some(attrs) => Setting::Set(attrs),
@@ -212,6 +227,8 @@ impl Index {
}, },
synonyms: Setting::Set(synonyms), synonyms: Setting::Set(synonyms),
typo_tolerance: Setting::Set(typo_tolerance), typo_tolerance: Setting::Set(typo_tolerance),
faceting: Setting::Set(faceting),
pagination: Setting::Set(pagination),
_kind: PhantomData, _kind: PhantomData,
}) })
} }

View File

@@ -7,6 +7,7 @@ use either::Either;
use milli::tokenizer::TokenizerBuilder; use milli::tokenizer::TokenizerBuilder;
use milli::{ use milli::{
AscDesc, FieldId, FieldsIdsMap, Filter, FormatOptions, MatchBounds, MatcherBuilder, SortError, AscDesc, FieldId, FieldsIdsMap, Filter, FormatOptions, MatchBounds, MatcherBuilder, SortError,
DEFAULT_VALUES_PER_FACET,
}; };
use regex::Regex; use regex::Regex;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
@@ -28,7 +29,7 @@ pub const DEFAULT_HIGHLIGHT_POST_TAG: fn() -> String = || "</em>".to_string();
/// The maximimum number of results that the engine /// The maximimum number of results that the engine
/// will be able to return in one search call. /// will be able to return in one search call.
pub const HARD_RESULT_LIMIT: usize = 1000; pub const DEFAULT_PAGINATION_LIMITED_TO: usize = 1000;
#[derive(Deserialize, Debug, Clone, PartialEq)] #[derive(Deserialize, Debug, Clone, PartialEq)]
#[serde(rename_all = "camelCase", deny_unknown_fields)] #[serde(rename_all = "camelCase", deny_unknown_fields)]
@@ -90,10 +91,14 @@ impl Index {
search.query(query); search.query(query);
} }
let pagination_limited_to = self
.pagination_limited_to(&rtxn)?
.unwrap_or(DEFAULT_PAGINATION_LIMITED_TO);
// Make sure that a user can't get more documents than the hard limit, // Make sure that a user can't get more documents than the hard limit,
// we align that on the offset too. // we align that on the offset too.
let offset = min(query.offset.unwrap_or(0), HARD_RESULT_LIMIT); let offset = min(query.offset.unwrap_or(0), pagination_limited_to);
let limit = min(query.limit, HARD_RESULT_LIMIT.saturating_sub(offset)); let limit = min(query.limit, pagination_limited_to.saturating_sub(offset));
search.offset(offset); search.offset(offset);
search.limit(limit); search.limit(limit);
@@ -223,6 +228,12 @@ impl Index {
let facet_distribution = match query.facets { let facet_distribution = match query.facets {
Some(ref fields) => { Some(ref fields) => {
let mut facet_distribution = self.facets_distribution(&rtxn); let mut facet_distribution = self.facets_distribution(&rtxn);
let max_values_by_facet = self
.max_values_per_facet(&rtxn)?
.unwrap_or(DEFAULT_VALUES_PER_FACET);
facet_distribution.max_values_per_facet(max_values_by_facet);
if fields.iter().all(|f| f != "*") { if fields.iter().all(|f| f != "*") {
facet_distribution.facets(fields); facet_distribution.facets(fields);
} }

View File

@@ -68,6 +68,27 @@ pub struct TypoSettings {
#[serde(default, skip_serializing_if = "Setting::is_not_set")] #[serde(default, skip_serializing_if = "Setting::is_not_set")]
pub disable_on_attributes: Setting<BTreeSet<String>>, pub disable_on_attributes: Setting<BTreeSet<String>>,
} }
#[cfg_attr(test, derive(proptest_derive::Arbitrary))]
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq)]
#[serde(deny_unknown_fields)]
#[serde(rename_all = "camelCase")]
pub struct FacetingSettings {
#[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
pub max_values_per_facet: Setting<usize>,
}
#[cfg_attr(test, derive(proptest_derive::Arbitrary))]
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq)]
#[serde(deny_unknown_fields)]
#[serde(rename_all = "camelCase")]
pub struct PaginationSettings {
#[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
pub limited_to: Setting<usize>,
}
/// Holds all the settings for an index. `T` can either be `Checked` if they represents settings /// Holds all the settings for an index. `T` can either be `Checked` if they represents settings
/// whose validity is guaranteed, or `Unchecked` if they need to be validated. In the later case, a /// whose validity is guaranteed, or `Unchecked` if they need to be validated. In the later case, a
/// call to `check` will return a `Settings<Checked>` from a `Settings<Unchecked>`. /// call to `check` will return a `Settings<Checked>` from a `Settings<Unchecked>`.
@@ -114,6 +135,12 @@ pub struct Settings<T> {
#[serde(default, skip_serializing_if = "Setting::is_not_set")] #[serde(default, skip_serializing_if = "Setting::is_not_set")]
#[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))] #[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
pub typo_tolerance: Setting<TypoSettings>, pub typo_tolerance: Setting<TypoSettings>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
#[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
pub faceting: Setting<FacetingSettings>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
#[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
pub pagination: Setting<PaginationSettings>,
#[serde(skip)] #[serde(skip)]
pub _kind: PhantomData<T>, pub _kind: PhantomData<T>,
@@ -131,6 +158,8 @@ impl Settings<Checked> {
synonyms: Setting::Reset, synonyms: Setting::Reset,
distinct_attribute: Setting::Reset, distinct_attribute: Setting::Reset,
typo_tolerance: Setting::Reset, typo_tolerance: Setting::Reset,
faceting: Setting::Reset,
pagination: Setting::Reset,
_kind: PhantomData, _kind: PhantomData,
} }
} }
@@ -146,6 +175,8 @@ impl Settings<Checked> {
synonyms, synonyms,
distinct_attribute, distinct_attribute,
typo_tolerance, typo_tolerance,
faceting,
pagination,
.. ..
} = self; } = self;
@@ -159,6 +190,8 @@ impl Settings<Checked> {
synonyms, synonyms,
distinct_attribute, distinct_attribute,
typo_tolerance, typo_tolerance,
faceting,
pagination,
_kind: PhantomData, _kind: PhantomData,
} }
} }
@@ -198,6 +231,8 @@ impl Settings<Unchecked> {
synonyms: self.synonyms, synonyms: self.synonyms,
distinct_attribute: self.distinct_attribute, distinct_attribute: self.distinct_attribute,
typo_tolerance: self.typo_tolerance, typo_tolerance: self.typo_tolerance,
faceting: self.faceting,
pagination: self.pagination,
_kind: PhantomData, _kind: PhantomData,
} }
} }
@@ -427,6 +462,26 @@ pub fn apply_settings_to_builder(
} }
Setting::NotSet => (), Setting::NotSet => (),
} }
match settings.faceting {
Setting::Set(ref value) => match value.max_values_per_facet {
Setting::Set(val) => builder.set_max_values_per_facet(val),
Setting::Reset => builder.reset_max_values_per_facet(),
Setting::NotSet => (),
},
Setting::Reset => builder.reset_max_values_per_facet(),
Setting::NotSet => (),
}
match settings.pagination {
Setting::Set(ref value) => match value.limited_to {
Setting::Set(val) => builder.set_pagination_limited_to(val),
Setting::Reset => builder.reset_pagination_limited_to(),
Setting::NotSet => (),
},
Setting::Reset => builder.reset_pagination_limited_to(),
Setting::NotSet => (),
}
} }
#[cfg(test)] #[cfg(test)]
@@ -456,6 +511,8 @@ pub(crate) mod test {
synonyms: Setting::NotSet, synonyms: Setting::NotSet,
distinct_attribute: Setting::NotSet, distinct_attribute: Setting::NotSet,
typo_tolerance: Setting::NotSet, typo_tolerance: Setting::NotSet,
faceting: Setting::NotSet,
pagination: Setting::NotSet,
_kind: PhantomData::<Unchecked>, _kind: PhantomData::<Unchecked>,
}; };
@@ -478,6 +535,8 @@ pub(crate) mod test {
synonyms: Setting::NotSet, synonyms: Setting::NotSet,
distinct_attribute: Setting::NotSet, distinct_attribute: Setting::NotSet,
typo_tolerance: Setting::NotSet, typo_tolerance: Setting::NotSet,
faceting: Setting::NotSet,
pagination: Setting::NotSet,
_kind: PhantomData::<Unchecked>, _kind: PhantomData::<Unchecked>,
}; };