Compare commits

...

5 Commits

Author SHA1 Message Date
ManyTheFish
4672920091 Fix tests 2022-07-18 18:12:22 +02:00
ManyTheFish
e933509f3d Integrate new version of milli 2022-07-18 17:20:21 +02:00
ManyTheFish
ad086a6771 Use a milli version that compute exhaustiveley the number of hits 2022-07-13 10:58:39 +02:00
ManyTheFish
19102fde99 Format all fields in camelCase 2022-07-12 17:11:59 +02:00
ManyTheFish
92030bc0af Update API to fit the proto needs 2022-07-11 17:40:49 +02:00
9 changed files with 112 additions and 45 deletions

29
Cargo.lock generated
View File

@@ -644,9 +644,9 @@ dependencies = [
[[package]] [[package]]
name = "charabia" name = "charabia"
version = "0.5.0" version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4a26a3df4d9c9231eb1e757fe6b1c66c471e0c2cd5410265e7c3109a726663c4" checksum = "2ed19edcd98f5bf6572f48d6f5982d595cb8718e47c6f0066d942b280575ff02"
dependencies = [ dependencies = [
"character_converter", "character_converter",
"cow-utils", "cow-utils",
@@ -1124,7 +1124,7 @@ dependencies = [
[[package]] [[package]]
name = "filter-parser" name = "filter-parser"
version = "0.31.1" version = "0.31.1"
source = "git+https://github.com/meilisearch/milli.git?tag=v0.31.1#83ad1aaf0552db9f63fc21ae9fe3976e61577dc8" source = "git+https://github.com/meilisearch/milli.git?branch=ease-search-results-pagination#ff97a6017c9e409b46df68930b0d2774159b29ad"
dependencies = [ dependencies = [
"nom", "nom",
"nom_locate", "nom_locate",
@@ -1149,7 +1149,7 @@ dependencies = [
[[package]] [[package]]
name = "flatten-serde-json" name = "flatten-serde-json"
version = "0.31.1" version = "0.31.1"
source = "git+https://github.com/meilisearch/milli.git?tag=v0.31.1#83ad1aaf0552db9f63fc21ae9fe3976e61577dc8" source = "git+https://github.com/meilisearch/milli.git?branch=ease-search-results-pagination#ff97a6017c9e409b46df68930b0d2774159b29ad"
dependencies = [ dependencies = [
"serde_json", "serde_json",
] ]
@@ -1662,7 +1662,7 @@ dependencies = [
[[package]] [[package]]
name = "json-depth-checker" name = "json-depth-checker"
version = "0.31.1" version = "0.31.1"
source = "git+https://github.com/meilisearch/milli.git?tag=v0.31.1#83ad1aaf0552db9f63fc21ae9fe3976e61577dc8" source = "git+https://github.com/meilisearch/milli.git?branch=ease-search-results-pagination#ff97a6017c9e409b46df68930b0d2774159b29ad"
dependencies = [ dependencies = [
"serde_json", "serde_json",
] ]
@@ -2013,7 +2013,7 @@ dependencies = [
"sha2", "sha2",
"thiserror", "thiserror",
"time 0.3.9", "time 0.3.9",
"uuid 1.1.2", "uuid",
] ]
[[package]] [[package]]
@@ -2082,7 +2082,7 @@ dependencies = [
"tokio", "tokio",
"tokio-stream", "tokio-stream",
"urlencoding", "urlencoding",
"uuid 1.1.2", "uuid",
"vergen", "vergen",
"walkdir", "walkdir",
"yaup", "yaup",
@@ -2146,7 +2146,7 @@ dependencies = [
"thiserror", "thiserror",
"time 0.3.9", "time 0.3.9",
"tokio", "tokio",
"uuid 1.1.2", "uuid",
"walkdir", "walkdir",
"whoami", "whoami",
] ]
@@ -2189,7 +2189,7 @@ dependencies = [
[[package]] [[package]]
name = "milli" name = "milli"
version = "0.31.1" version = "0.31.1"
source = "git+https://github.com/meilisearch/milli.git?tag=v0.31.1#83ad1aaf0552db9f63fc21ae9fe3976e61577dc8" source = "git+https://github.com/meilisearch/milli.git?branch=ease-search-results-pagination#ff97a6017c9e409b46df68930b0d2774159b29ad"
dependencies = [ dependencies = [
"bimap", "bimap",
"bincode", "bincode",
@@ -2228,7 +2228,7 @@ dependencies = [
"tempfile", "tempfile",
"thiserror", "thiserror",
"time 0.3.9", "time 0.3.9",
"uuid 0.8.2", "uuid",
] ]
[[package]] [[package]]
@@ -3670,15 +3670,6 @@ version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5190c9442dcdaf0ddd50f37420417d219ae5261bbf5db120d0f9bab996c9cba1" checksum = "5190c9442dcdaf0ddd50f37420417d219ae5261bbf5db120d0f9bab996c9cba1"
[[package]]
name = "uuid"
version = "0.8.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bc5cf98d8186244414c848017f0e2676b3fcb46807f6668a97dfe67359a3c4b7"
dependencies = [
"getrandom",
]
[[package]] [[package]]
name = "uuid" name = "uuid"
version = "1.1.2" version = "1.1.2"

View File

@@ -7,7 +7,7 @@ edition = "2021"
enum-iterator = "0.7.0" enum-iterator = "0.7.0"
hmac = "0.12.1" hmac = "0.12.1"
meilisearch-types = { path = "../meilisearch-types" } meilisearch-types = { path = "../meilisearch-types" }
milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.31.1" } milli = { git = "https://github.com/meilisearch/milli.git", branch = "ease-search-results-pagination" }
rand = "0.8.4" rand = "0.8.4"
serde = { version = "1.0.136", features = ["derive"] } serde = { version = "1.0.136", features = ["derive"] }
serde_json = { version = "1.0.79", features = ["preserve_order"] } serde_json = { version = "1.0.79", features = ["preserve_order"] }

View File

@@ -10,7 +10,7 @@ use http::header::CONTENT_TYPE;
use meilisearch_auth::SearchRules; use meilisearch_auth::SearchRules;
use meilisearch_lib::index::{ use meilisearch_lib::index::{
SearchQuery, SearchResult, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, SearchQuery, SearchResult, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT,
}; };
use meilisearch_lib::index_controller::Stats; use meilisearch_lib::index_controller::Stats;
use meilisearch_lib::MeiliSearch; use meilisearch_lib::MeiliSearch;
@@ -369,6 +369,7 @@ pub struct SearchAggregator {
// pagination // pagination
max_limit: usize, max_limit: usize,
max_offset: usize, max_offset: usize,
finite_pagination: bool,
// formatting // formatting
highlight_pre_tag: bool, highlight_pre_tag: bool,
@@ -423,8 +424,15 @@ impl SearchAggregator {
ret.max_terms_number = q.split_whitespace().count(); ret.max_terms_number = q.split_whitespace().count();
} }
ret.max_limit = query.limit; if query.limit.is_none() && query.offset.is_none() {
ret.max_limit = query.hits_per_page;
ret.max_offset = query.page.saturating_sub(1) * query.hits_per_page;
ret.finite_pagination = true;
} else {
ret.max_limit = query.limit.unwrap_or_else(DEFAULT_SEARCH_LIMIT);
ret.max_offset = query.offset.unwrap_or_default(); ret.max_offset = query.offset.unwrap_or_default();
ret.finite_pagination = false;
}
ret.highlight_pre_tag = query.highlight_pre_tag != DEFAULT_HIGHLIGHT_PRE_TAG(); ret.highlight_pre_tag = query.highlight_pre_tag != DEFAULT_HIGHLIGHT_PRE_TAG();
ret.highlight_post_tag = query.highlight_post_tag != DEFAULT_HIGHLIGHT_POST_TAG(); ret.highlight_post_tag = query.highlight_post_tag != DEFAULT_HIGHLIGHT_POST_TAG();

View File

@@ -3,7 +3,7 @@ use log::debug;
use meilisearch_auth::IndexSearchRules; use meilisearch_auth::IndexSearchRules;
use meilisearch_lib::index::{ use meilisearch_lib::index::{
SearchQuery, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, SearchQuery, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG,
DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_HIT_PER_PAGE, DEFAULT_PAGE,
}; };
use meilisearch_lib::MeiliSearch; use meilisearch_lib::MeiliSearch;
use meilisearch_types::error::ResponseError; use meilisearch_types::error::ResponseError;
@@ -29,6 +29,10 @@ pub struct SearchQueryGet {
q: Option<String>, q: Option<String>,
offset: Option<usize>, offset: Option<usize>,
limit: Option<usize>, limit: Option<usize>,
#[serde(default = "DEFAULT_PAGE")]
page: usize,
#[serde(default = "DEFAULT_HIT_PER_PAGE")]
hits_per_page: usize,
attributes_to_retrieve: Option<CS<String>>, attributes_to_retrieve: Option<CS<String>>,
attributes_to_crop: Option<CS<String>>, attributes_to_crop: Option<CS<String>>,
#[serde(default = "DEFAULT_CROP_LENGTH")] #[serde(default = "DEFAULT_CROP_LENGTH")]
@@ -60,7 +64,9 @@ impl From<SearchQueryGet> for SearchQuery {
Self { Self {
q: other.q, q: other.q,
offset: other.offset, offset: other.offset,
limit: other.limit.unwrap_or_else(DEFAULT_SEARCH_LIMIT), limit: other.limit,
page: other.page,
hits_per_page: other.hits_per_page,
attributes_to_retrieve: other attributes_to_retrieve: other
.attributes_to_retrieve .attributes_to_retrieve
.map(|o| o.into_iter().collect()), .map(|o| o.into_iter().collect()),

View File

@@ -28,7 +28,7 @@ lazy_static = "1.4.0"
log = "0.4.14" log = "0.4.14"
meilisearch-auth = { path = "../meilisearch-auth" } meilisearch-auth = { path = "../meilisearch-auth" }
meilisearch-types = { path = "../meilisearch-types" } meilisearch-types = { path = "../meilisearch-types" }
milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.31.1" } milli = { git = "https://github.com/meilisearch/milli.git", branch = "ease-search-results-pagination" }
mime = "0.3.16" mime = "0.3.16"
num_cpus = "1.13.1" num_cpus = "1.13.1"
obkv = "0.2.0" obkv = "0.2.0"

View File

@@ -25,6 +25,8 @@ impl ErrorCode for MilliError<'_> {
// TODO: wait for spec for new error codes. // TODO: wait for spec for new error codes.
UserError::SerdeJson(_) UserError::SerdeJson(_)
| UserError::DocumentLimitReached | UserError::DocumentLimitReached
// TODO: REMOVE ME
| UserError::AccessingSoftDeletedDocument { .. }
| UserError::UnknownInternalDocumentId { .. } => Code::Internal, | UserError::UnknownInternalDocumentId { .. } => Code::Internal,
UserError::InvalidStoreFile => Code::InvalidStore, UserError::InvalidStoreFile => Code::InvalidStore,
UserError::NoSpaceLeftOnDevice => Code::NoSpaceLeftOnDevice, UserError::NoSpaceLeftOnDevice => Code::NoSpaceLeftOnDevice,

View File

@@ -1,6 +1,7 @@
pub use search::{ pub use search::{
SearchQuery, SearchResult, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, HitsInfo, SearchQuery, SearchResult, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_HIT_PER_PAGE, DEFAULT_PAGE,
DEFAULT_SEARCH_LIMIT,
}; };
pub use updates::{apply_settings_to_builder, Checked, Facets, Settings, Unchecked}; pub use updates::{apply_settings_to_builder, Checked, Facets, Settings, Unchecked};

View File

@@ -1,4 +1,4 @@
use std::cmp::min; use std::cmp::{max, min};
use std::collections::{BTreeMap, BTreeSet, HashSet}; use std::collections::{BTreeMap, BTreeSet, HashSet};
use std::str::FromStr; use std::str::FromStr;
use std::time::Instant; use std::time::Instant;
@@ -26,6 +26,8 @@ pub const DEFAULT_CROP_LENGTH: fn() -> usize = || 10;
pub const DEFAULT_CROP_MARKER: fn() -> String = || "…".to_string(); pub const DEFAULT_CROP_MARKER: fn() -> String = || "…".to_string();
pub const DEFAULT_HIGHLIGHT_PRE_TAG: fn() -> String = || "<em>".to_string(); pub const DEFAULT_HIGHLIGHT_PRE_TAG: fn() -> String = || "<em>".to_string();
pub const DEFAULT_HIGHLIGHT_POST_TAG: fn() -> String = || "</em>".to_string(); pub const DEFAULT_HIGHLIGHT_POST_TAG: fn() -> String = || "</em>".to_string();
pub const DEFAULT_PAGE: fn() -> usize = || 1;
pub const DEFAULT_HIT_PER_PAGE: fn() -> usize = || 20;
/// The maximimum number of results that the engine /// The maximimum number of results that the engine
/// will be able to return in one search call. /// will be able to return in one search call.
@@ -36,8 +38,11 @@ pub const DEFAULT_PAGINATION_MAX_TOTAL_HITS: usize = 1000;
pub struct SearchQuery { pub struct SearchQuery {
pub q: Option<String>, pub q: Option<String>,
pub offset: Option<usize>, pub offset: Option<usize>,
#[serde(default = "DEFAULT_SEARCH_LIMIT")] pub limit: Option<usize>,
pub limit: usize, #[serde(default = "DEFAULT_PAGE")]
pub page: usize,
#[serde(default = "DEFAULT_HIT_PER_PAGE")]
pub hits_per_page: usize,
pub attributes_to_retrieve: Option<BTreeSet<String>>, pub attributes_to_retrieve: Option<BTreeSet<String>>,
pub attributes_to_crop: Option<Vec<String>>, pub attributes_to_crop: Option<Vec<String>>,
#[serde(default = "DEFAULT_CROP_LENGTH")] #[serde(default = "DEFAULT_CROP_LENGTH")]
@@ -71,15 +76,31 @@ pub struct SearchHit {
#[serde(rename_all = "camelCase")] #[serde(rename_all = "camelCase")]
pub struct SearchResult { pub struct SearchResult {
pub hits: Vec<SearchHit>, pub hits: Vec<SearchHit>,
pub estimated_total_hits: u64,
pub query: String, pub query: String,
pub limit: usize,
pub offset: usize,
pub processing_time_ms: u128, pub processing_time_ms: u128,
#[serde(flatten)]
pub hits_info: HitsInfo,
#[serde(skip_serializing_if = "Option::is_none")] #[serde(skip_serializing_if = "Option::is_none")]
pub facet_distribution: Option<BTreeMap<String, BTreeMap<String, u64>>>, pub facet_distribution: Option<BTreeMap<String, BTreeMap<String, u64>>>,
} }
#[derive(Serialize, Debug, Clone, PartialEq)]
#[serde(untagged)]
pub enum HitsInfo {
#[serde(rename_all = "camelCase")]
Pagination {
hits_per_page: usize,
page: usize,
total_pages: usize,
},
#[serde(rename_all = "camelCase")]
OffsetLimit {
limit: usize,
offset: usize,
estimated_total_hits: usize,
},
}
impl Index { impl Index {
pub fn perform_search(&self, query: SearchQuery) -> Result<SearchResult> { pub fn perform_search(&self, query: SearchQuery) -> Result<SearchResult> {
let before_search = Instant::now(); let before_search = Instant::now();
@@ -97,8 +118,28 @@ impl Index {
// Make sure that a user can't get more documents than the hard limit, // Make sure that a user can't get more documents than the hard limit,
// we align that on the offset too. // we align that on the offset too.
let is_finite_pagination = query.offset.is_none() && query.limit.is_none();
search.exhaustive_number_hits(is_finite_pagination);
let (offset, limit) = if is_finite_pagination {
// we start at least at page 1.
let page = max(query.page, 1);
// return at least 1 document.
let hits_per_page = max(query.hits_per_page, 1);
let offset = min(hits_per_page * (page - 1), max_total_hits);
let limit = min(hits_per_page, max_total_hits.saturating_sub(offset));
(offset, limit)
} else {
let offset = min(query.offset.unwrap_or(0), max_total_hits); let offset = min(query.offset.unwrap_or(0), max_total_hits);
let limit = min(query.limit, max_total_hits.saturating_sub(offset)); let limit = min(
query.limit.unwrap_or_else(DEFAULT_SEARCH_LIMIT),
max_total_hits.saturating_sub(offset),
);
(offset, limit)
};
search.offset(offset); search.offset(offset);
search.limit(limit); search.limit(limit);
@@ -223,7 +264,23 @@ impl Index {
documents.push(hit); documents.push(hit);
} }
let estimated_total_hits = candidates.len(); let number_of_hits = min(candidates.len() as usize, max_total_hits);
let hits_info = if is_finite_pagination {
// return at least 1 document.
let hits_per_page = max(query.hits_per_page, 1);
HitsInfo::Pagination {
hits_per_page,
page: offset / hits_per_page + 1,
// TODO @many: estimation for now but we should ask milli to return an exact value
total_pages: (number_of_hits + hits_per_page - 1) / query.hits_per_page,
}
} else {
HitsInfo::OffsetLimit {
limit: query.limit.unwrap_or_else(DEFAULT_SEARCH_LIMIT),
offset,
estimated_total_hits: number_of_hits,
}
};
let facet_distribution = match query.facets { let facet_distribution = match query.facets {
Some(ref fields) => { Some(ref fields) => {
@@ -246,10 +303,8 @@ impl Index {
let result = SearchResult { let result = SearchResult {
hits: documents, hits: documents,
estimated_total_hits, hits_info,
query: query.q.clone().unwrap_or_default(), query: query.q.clone().unwrap_or_default(),
limit: query.limit,
offset: query.offset.unwrap_or_default(),
processing_time_ms: before_search.elapsed().as_millis(), processing_time_ms: before_search.elapsed().as_millis(),
facet_distribution, facet_distribution,
}; };

View File

@@ -652,7 +652,7 @@ mod test {
use nelson::Mocker; use nelson::Mocker;
use crate::index::error::Result as IndexResult; use crate::index::error::Result as IndexResult;
use crate::index::Index; use crate::index::{HitsInfo, Index};
use crate::index::{ use crate::index::{
DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG,
}; };
@@ -685,7 +685,9 @@ mod test {
let query = SearchQuery { let query = SearchQuery {
q: Some(String::from("hello world")), q: Some(String::from("hello world")),
offset: Some(10), offset: Some(10),
limit: 0, limit: Some(0),
page: 1,
hits_per_page: 10,
attributes_to_retrieve: Some(vec!["string".to_owned()].into_iter().collect()), attributes_to_retrieve: Some(vec!["string".to_owned()].into_iter().collect()),
attributes_to_crop: None, attributes_to_crop: None,
crop_length: 18, crop_length: 18,
@@ -701,10 +703,12 @@ mod test {
let result = SearchResult { let result = SearchResult {
hits: vec![], hits: vec![],
estimated_total_hits: 29,
query: "hello world".to_string(), query: "hello world".to_string(),
hits_info: HitsInfo::OffsetLimit {
limit: 24, limit: 24,
offset: 0, offset: 0,
estimated_total_hits: 29,
},
processing_time_ms: 50, processing_time_ms: 50,
facet_distribution: None, facet_distribution: None,
}; };