Compare commits

...

3 Commits

9 changed files with 42 additions and 28 deletions

29
Cargo.lock generated
View File

@ -644,9 +644,9 @@ dependencies = [
[[package]]
name = "charabia"
version = "0.5.0"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4a26a3df4d9c9231eb1e757fe6b1c66c471e0c2cd5410265e7c3109a726663c4"
checksum = "2ed19edcd98f5bf6572f48d6f5982d595cb8718e47c6f0066d942b280575ff02"
dependencies = [
"character_converter",
"cow-utils",
@ -1124,7 +1124,7 @@ dependencies = [
[[package]]
name = "filter-parser"
version = "0.31.1"
source = "git+https://github.com/meilisearch/milli.git?tag=v0.31.1#83ad1aaf0552db9f63fc21ae9fe3976e61577dc8"
source = "git+https://github.com/meilisearch/milli.git?branch=matching-query-terms-policy#13b12997105254a727e7419ee2fb7ab28c3633e4"
dependencies = [
"nom",
"nom_locate",
@ -1149,7 +1149,7 @@ dependencies = [
[[package]]
name = "flatten-serde-json"
version = "0.31.1"
source = "git+https://github.com/meilisearch/milli.git?tag=v0.31.1#83ad1aaf0552db9f63fc21ae9fe3976e61577dc8"
source = "git+https://github.com/meilisearch/milli.git?branch=matching-query-terms-policy#13b12997105254a727e7419ee2fb7ab28c3633e4"
dependencies = [
"serde_json",
]
@ -1662,7 +1662,7 @@ dependencies = [
[[package]]
name = "json-depth-checker"
version = "0.31.1"
source = "git+https://github.com/meilisearch/milli.git?tag=v0.31.1#83ad1aaf0552db9f63fc21ae9fe3976e61577dc8"
source = "git+https://github.com/meilisearch/milli.git?branch=matching-query-terms-policy#13b12997105254a727e7419ee2fb7ab28c3633e4"
dependencies = [
"serde_json",
]
@ -2013,7 +2013,7 @@ dependencies = [
"sha2",
"thiserror",
"time 0.3.9",
"uuid 1.1.2",
"uuid",
]
[[package]]
@ -2082,7 +2082,7 @@ dependencies = [
"tokio",
"tokio-stream",
"urlencoding",
"uuid 1.1.2",
"uuid",
"vergen",
"walkdir",
"yaup",
@ -2146,7 +2146,7 @@ dependencies = [
"thiserror",
"time 0.3.9",
"tokio",
"uuid 1.1.2",
"uuid",
"walkdir",
"whoami",
]
@ -2189,7 +2189,7 @@ dependencies = [
[[package]]
name = "milli"
version = "0.31.1"
source = "git+https://github.com/meilisearch/milli.git?tag=v0.31.1#83ad1aaf0552db9f63fc21ae9fe3976e61577dc8"
source = "git+https://github.com/meilisearch/milli.git?branch=matching-query-terms-policy#13b12997105254a727e7419ee2fb7ab28c3633e4"
dependencies = [
"bimap",
"bincode",
@ -2228,7 +2228,7 @@ dependencies = [
"tempfile",
"thiserror",
"time 0.3.9",
"uuid 0.8.2",
"uuid",
]
[[package]]
@ -3670,15 +3670,6 @@ version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5190c9442dcdaf0ddd50f37420417d219ae5261bbf5db120d0f9bab996c9cba1"
[[package]]
name = "uuid"
version = "0.8.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bc5cf98d8186244414c848017f0e2676b3fcb46807f6668a97dfe67359a3c4b7"
dependencies = [
"getrandom",
]
[[package]]
name = "uuid"
version = "1.1.2"

View File

@ -7,7 +7,7 @@ edition = "2021"
enum-iterator = "0.7.0"
hmac = "0.12.1"
meilisearch-types = { path = "../meilisearch-types" }
milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.31.1" }
milli = { git = "https://github.com/meilisearch/milli.git", branch = "matching-query-terms-policy" }
rand = "0.8.4"
serde = { version = "1.0.136", features = ["derive"] }
serde_json = { version = "1.0.79", features = ["preserve_order"] }

View File

@ -270,8 +270,8 @@ impl Segment {
}
async fn run(mut self, meilisearch: MeiliSearch) {
const INTERVAL: Duration = Duration::from_secs(60 * 60); // one hour
// The first batch must be sent after one hour.
const INTERVAL: Duration = Duration::from_secs(60); // one minute
// The first batch must be sent after one minute.
let mut interval =
tokio::time::interval_at(tokio::time::Instant::now() + INTERVAL, INTERVAL);
@ -301,7 +301,7 @@ impl Segment {
.push(Identify {
context: Some(json!({
"app": {
"version": env!("CARGO_PKG_VERSION").to_string(),
"version": "prototype-optional-words-2".to_string(),
},
})),
user: self.user.clone(),
@ -366,6 +366,9 @@ pub struct SearchAggregator {
// The maximum number of terms in a q request
max_terms_number: usize,
// everytime a search is done, we increment the counter linked to the used settings
optional_words: HashMap<String, usize>,
// pagination
max_limit: usize,
max_offset: usize,
@ -423,6 +426,9 @@ impl SearchAggregator {
ret.max_terms_number = q.split_whitespace().count();
}
ret.optional_words
.insert(format!("{:?}", query.optional_words), 1);
ret.max_limit = query.limit;
ret.max_offset = query.offset.unwrap_or_default();
@ -476,6 +482,11 @@ impl SearchAggregator {
}
// q
self.max_terms_number = self.max_terms_number.max(other.max_terms_number);
for (key, value) in other.optional_words.into_iter() {
let optional_words = self.optional_words.entry(key).or_insert(0);
*optional_words = optional_words.saturating_add(value);
}
// pagination
self.max_limit = self.max_limit.max(other.max_limit);
self.max_offset = self.max_offset.max(other.max_offset);
@ -517,6 +528,7 @@ impl SearchAggregator {
},
"q": {
"max_terms_number": self.max_terms_number,
"most_used_optional_words": self.optional_words.iter().max_by_key(|(_, v)| *v).map(|(k, _)| json!(k)).unwrap_or_else(|| json!(null)),
},
"pagination": {
"max_limit": self.max_limit,

View File

@ -2,8 +2,8 @@ use actix_web::{web, HttpRequest, HttpResponse};
use log::debug;
use meilisearch_auth::IndexSearchRules;
use meilisearch_lib::index::{
SearchQuery, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG,
DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT,
OptionalWords, SearchQuery, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT,
};
use meilisearch_lib::MeiliSearch;
use meilisearch_types::error::ResponseError;
@ -45,6 +45,8 @@ pub struct SearchQueryGet {
highlight_post_tag: String,
#[serde(default = "DEFAULT_CROP_MARKER")]
crop_marker: String,
#[serde(default)]
optional_words: OptionalWords,
}
impl From<SearchQueryGet> for SearchQuery {
@ -76,6 +78,7 @@ impl From<SearchQueryGet> for SearchQuery {
highlight_pre_tag: other.highlight_pre_tag,
highlight_post_tag: other.highlight_post_tag,
crop_marker: other.crop_marker,
optional_words: other.optional_words,
}
}
}

View File

@ -28,7 +28,7 @@ lazy_static = "1.4.0"
log = "0.4.14"
meilisearch-auth = { path = "../meilisearch-auth" }
meilisearch-types = { path = "../meilisearch-types" }
milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.31.1" }
milli = { git = "https://github.com/meilisearch/milli.git", branch = "matching-query-terms-policy" }
mime = "0.3.16"
num_cpus = "1.13.1"
obkv = "0.2.0"

View File

@ -25,6 +25,7 @@ impl ErrorCode for MilliError<'_> {
// TODO: wait for spec for new error codes.
UserError::SerdeJson(_)
| UserError::DocumentLimitReached
| UserError::AccessingSoftDeletedDocument { .. }
| UserError::UnknownInternalDocumentId { .. } => Code::Internal,
UserError::InvalidStoreFile => Code::InvalidStore,
UserError::NoSpaceLeftOnDevice => Code::NoSpaceLeftOnDevice,

View File

@ -14,6 +14,8 @@ mod index;
pub use index::{Document, IndexMeta, IndexStats};
pub use milli::OptionalWords;
#[cfg(not(test))]
pub use index::Index;

View File

@ -6,8 +6,8 @@ use std::time::Instant;
use either::Either;
use milli::tokenizer::TokenizerBuilder;
use milli::{
AscDesc, FieldId, FieldsIdsMap, Filter, FormatOptions, MatchBounds, MatcherBuilder, SortError,
DEFAULT_VALUES_PER_FACET,
AscDesc, FieldId, FieldsIdsMap, Filter, FormatOptions, MatchBounds, MatcherBuilder,
OptionalWords, SortError, DEFAULT_VALUES_PER_FACET,
};
use regex::Regex;
use serde::{Deserialize, Serialize};
@ -55,6 +55,8 @@ pub struct SearchQuery {
pub highlight_post_tag: String,
#[serde(default = "DEFAULT_CROP_MARKER")]
pub crop_marker: String,
#[serde(default)]
pub optional_words: OptionalWords,
}
#[derive(Debug, Clone, Serialize, PartialEq)]
@ -91,6 +93,8 @@ impl Index {
search.query(query);
}
search.optional_words(query.optional_words);
let max_total_hits = self
.pagination_max_total_hits(&rtxn)?
.unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS);

View File

@ -697,6 +697,7 @@ mod test {
highlight_pre_tag: DEFAULT_HIGHLIGHT_PRE_TAG(),
highlight_post_tag: DEFAULT_HIGHLIGHT_POST_TAG(),
crop_marker: DEFAULT_CROP_MARKER(),
optional_words: Default::default(),
};
let result = SearchResult {