Compare commits

...

2 Commits

Author SHA1 Message Date
ManyTheFish
a9d908b2a2 Fix removing word threshold 2022-07-28 14:20:07 +02:00
ManyTheFish
659d65dddc Implement several behaviors for optional words 2022-07-25 15:52:50 +02:00
9 changed files with 39 additions and 25 deletions

29
Cargo.lock generated
View File

@@ -644,9 +644,9 @@ dependencies = [
[[package]] [[package]]
name = "charabia" name = "charabia"
version = "0.5.0" version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4a26a3df4d9c9231eb1e757fe6b1c66c471e0c2cd5410265e7c3109a726663c4" checksum = "2ed19edcd98f5bf6572f48d6f5982d595cb8718e47c6f0066d942b280575ff02"
dependencies = [ dependencies = [
"character_converter", "character_converter",
"cow-utils", "cow-utils",
@@ -1124,7 +1124,7 @@ dependencies = [
[[package]] [[package]]
name = "filter-parser" name = "filter-parser"
version = "0.31.1" version = "0.31.1"
source = "git+https://github.com/meilisearch/milli.git?tag=v0.31.1#83ad1aaf0552db9f63fc21ae9fe3976e61577dc8" source = "git+https://github.com/meilisearch/milli.git?branch=matching-query-terms-policy#13b12997105254a727e7419ee2fb7ab28c3633e4"
dependencies = [ dependencies = [
"nom", "nom",
"nom_locate", "nom_locate",
@@ -1149,7 +1149,7 @@ dependencies = [
[[package]] [[package]]
name = "flatten-serde-json" name = "flatten-serde-json"
version = "0.31.1" version = "0.31.1"
source = "git+https://github.com/meilisearch/milli.git?tag=v0.31.1#83ad1aaf0552db9f63fc21ae9fe3976e61577dc8" source = "git+https://github.com/meilisearch/milli.git?branch=matching-query-terms-policy#13b12997105254a727e7419ee2fb7ab28c3633e4"
dependencies = [ dependencies = [
"serde_json", "serde_json",
] ]
@@ -1662,7 +1662,7 @@ dependencies = [
[[package]] [[package]]
name = "json-depth-checker" name = "json-depth-checker"
version = "0.31.1" version = "0.31.1"
source = "git+https://github.com/meilisearch/milli.git?tag=v0.31.1#83ad1aaf0552db9f63fc21ae9fe3976e61577dc8" source = "git+https://github.com/meilisearch/milli.git?branch=matching-query-terms-policy#13b12997105254a727e7419ee2fb7ab28c3633e4"
dependencies = [ dependencies = [
"serde_json", "serde_json",
] ]
@@ -2013,7 +2013,7 @@ dependencies = [
"sha2", "sha2",
"thiserror", "thiserror",
"time 0.3.9", "time 0.3.9",
"uuid 1.1.2", "uuid",
] ]
[[package]] [[package]]
@@ -2082,7 +2082,7 @@ dependencies = [
"tokio", "tokio",
"tokio-stream", "tokio-stream",
"urlencoding", "urlencoding",
"uuid 1.1.2", "uuid",
"vergen", "vergen",
"walkdir", "walkdir",
"yaup", "yaup",
@@ -2146,7 +2146,7 @@ dependencies = [
"thiserror", "thiserror",
"time 0.3.9", "time 0.3.9",
"tokio", "tokio",
"uuid 1.1.2", "uuid",
"walkdir", "walkdir",
"whoami", "whoami",
] ]
@@ -2189,7 +2189,7 @@ dependencies = [
[[package]] [[package]]
name = "milli" name = "milli"
version = "0.31.1" version = "0.31.1"
source = "git+https://github.com/meilisearch/milli.git?tag=v0.31.1#83ad1aaf0552db9f63fc21ae9fe3976e61577dc8" source = "git+https://github.com/meilisearch/milli.git?branch=matching-query-terms-policy#13b12997105254a727e7419ee2fb7ab28c3633e4"
dependencies = [ dependencies = [
"bimap", "bimap",
"bincode", "bincode",
@@ -2228,7 +2228,7 @@ dependencies = [
"tempfile", "tempfile",
"thiserror", "thiserror",
"time 0.3.9", "time 0.3.9",
"uuid 0.8.2", "uuid",
] ]
[[package]] [[package]]
@@ -3670,15 +3670,6 @@ version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5190c9442dcdaf0ddd50f37420417d219ae5261bbf5db120d0f9bab996c9cba1" checksum = "5190c9442dcdaf0ddd50f37420417d219ae5261bbf5db120d0f9bab996c9cba1"
[[package]]
name = "uuid"
version = "0.8.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bc5cf98d8186244414c848017f0e2676b3fcb46807f6668a97dfe67359a3c4b7"
dependencies = [
"getrandom",
]
[[package]] [[package]]
name = "uuid" name = "uuid"
version = "1.1.2" version = "1.1.2"

View File

@@ -7,7 +7,7 @@ edition = "2021"
enum-iterator = "0.7.0" enum-iterator = "0.7.0"
hmac = "0.12.1" hmac = "0.12.1"
meilisearch-types = { path = "../meilisearch-types" } meilisearch-types = { path = "../meilisearch-types" }
milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.31.1" } milli = { git = "https://github.com/meilisearch/milli.git", branch = "matching-query-terms-policy" }
rand = "0.8.4" rand = "0.8.4"
serde = { version = "1.0.136", features = ["derive"] } serde = { version = "1.0.136", features = ["derive"] }
serde_json = { version = "1.0.79", features = ["preserve_order"] } serde_json = { version = "1.0.79", features = ["preserve_order"] }

View File

@@ -366,6 +366,9 @@ pub struct SearchAggregator {
// The maximum number of terms in a q request // The maximum number of terms in a q request
max_terms_number: usize, max_terms_number: usize,
// everytime a search is done, we increment the counter linked to the used settings
optional_words: HashMap<String, usize>,
// pagination // pagination
max_limit: usize, max_limit: usize,
max_offset: usize, max_offset: usize,
@@ -423,6 +426,9 @@ impl SearchAggregator {
ret.max_terms_number = q.split_whitespace().count(); ret.max_terms_number = q.split_whitespace().count();
} }
ret.optional_words
.insert(format!("{:?}", query.optional_words), 1);
ret.max_limit = query.limit; ret.max_limit = query.limit;
ret.max_offset = query.offset.unwrap_or_default(); ret.max_offset = query.offset.unwrap_or_default();
@@ -476,6 +482,11 @@ impl SearchAggregator {
} }
// q // q
self.max_terms_number = self.max_terms_number.max(other.max_terms_number); self.max_terms_number = self.max_terms_number.max(other.max_terms_number);
for (key, value) in other.optional_words.into_iter() {
let optional_words = self.optional_words.entry(key).or_insert(0);
*optional_words = optional_words.saturating_add(value);
}
// pagination // pagination
self.max_limit = self.max_limit.max(other.max_limit); self.max_limit = self.max_limit.max(other.max_limit);
self.max_offset = self.max_offset.max(other.max_offset); self.max_offset = self.max_offset.max(other.max_offset);
@@ -517,6 +528,7 @@ impl SearchAggregator {
}, },
"q": { "q": {
"max_terms_number": self.max_terms_number, "max_terms_number": self.max_terms_number,
"most_used_optional_words": self.optional_words.iter().max_by_key(|(_, v)| *v).map(|(k, _)| json!(k)).unwrap_or_else(|| json!(null)),
}, },
"pagination": { "pagination": {
"max_limit": self.max_limit, "max_limit": self.max_limit,

View File

@@ -2,8 +2,8 @@ use actix_web::{web, HttpRequest, HttpResponse};
use log::debug; use log::debug;
use meilisearch_auth::IndexSearchRules; use meilisearch_auth::IndexSearchRules;
use meilisearch_lib::index::{ use meilisearch_lib::index::{
SearchQuery, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, OptionalWords, SearchQuery, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT,
}; };
use meilisearch_lib::MeiliSearch; use meilisearch_lib::MeiliSearch;
use meilisearch_types::error::ResponseError; use meilisearch_types::error::ResponseError;
@@ -45,6 +45,8 @@ pub struct SearchQueryGet {
highlight_post_tag: String, highlight_post_tag: String,
#[serde(default = "DEFAULT_CROP_MARKER")] #[serde(default = "DEFAULT_CROP_MARKER")]
crop_marker: String, crop_marker: String,
#[serde(default)]
optional_words: OptionalWords,
} }
impl From<SearchQueryGet> for SearchQuery { impl From<SearchQueryGet> for SearchQuery {
@@ -76,6 +78,7 @@ impl From<SearchQueryGet> for SearchQuery {
highlight_pre_tag: other.highlight_pre_tag, highlight_pre_tag: other.highlight_pre_tag,
highlight_post_tag: other.highlight_post_tag, highlight_post_tag: other.highlight_post_tag,
crop_marker: other.crop_marker, crop_marker: other.crop_marker,
optional_words: other.optional_words,
} }
} }
} }

View File

@@ -28,7 +28,7 @@ lazy_static = "1.4.0"
log = "0.4.14" log = "0.4.14"
meilisearch-auth = { path = "../meilisearch-auth" } meilisearch-auth = { path = "../meilisearch-auth" }
meilisearch-types = { path = "../meilisearch-types" } meilisearch-types = { path = "../meilisearch-types" }
milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.31.1" } milli = { git = "https://github.com/meilisearch/milli.git", branch = "matching-query-terms-policy" }
mime = "0.3.16" mime = "0.3.16"
num_cpus = "1.13.1" num_cpus = "1.13.1"
obkv = "0.2.0" obkv = "0.2.0"

View File

@@ -25,6 +25,7 @@ impl ErrorCode for MilliError<'_> {
// TODO: wait for spec for new error codes. // TODO: wait for spec for new error codes.
UserError::SerdeJson(_) UserError::SerdeJson(_)
| UserError::DocumentLimitReached | UserError::DocumentLimitReached
| UserError::AccessingSoftDeletedDocument { .. }
| UserError::UnknownInternalDocumentId { .. } => Code::Internal, | UserError::UnknownInternalDocumentId { .. } => Code::Internal,
UserError::InvalidStoreFile => Code::InvalidStore, UserError::InvalidStoreFile => Code::InvalidStore,
UserError::NoSpaceLeftOnDevice => Code::NoSpaceLeftOnDevice, UserError::NoSpaceLeftOnDevice => Code::NoSpaceLeftOnDevice,

View File

@@ -14,6 +14,8 @@ mod index;
pub use index::{Document, IndexMeta, IndexStats}; pub use index::{Document, IndexMeta, IndexStats};
pub use milli::OptionalWords;
#[cfg(not(test))] #[cfg(not(test))]
pub use index::Index; pub use index::Index;

View File

@@ -6,8 +6,8 @@ use std::time::Instant;
use either::Either; use either::Either;
use milli::tokenizer::TokenizerBuilder; use milli::tokenizer::TokenizerBuilder;
use milli::{ use milli::{
AscDesc, FieldId, FieldsIdsMap, Filter, FormatOptions, MatchBounds, MatcherBuilder, SortError, AscDesc, FieldId, FieldsIdsMap, Filter, FormatOptions, MatchBounds, MatcherBuilder,
DEFAULT_VALUES_PER_FACET, OptionalWords, SortError, DEFAULT_VALUES_PER_FACET,
}; };
use regex::Regex; use regex::Regex;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
@@ -55,6 +55,8 @@ pub struct SearchQuery {
pub highlight_post_tag: String, pub highlight_post_tag: String,
#[serde(default = "DEFAULT_CROP_MARKER")] #[serde(default = "DEFAULT_CROP_MARKER")]
pub crop_marker: String, pub crop_marker: String,
#[serde(default)]
pub optional_words: OptionalWords,
} }
#[derive(Debug, Clone, Serialize, PartialEq)] #[derive(Debug, Clone, Serialize, PartialEq)]
@@ -91,6 +93,8 @@ impl Index {
search.query(query); search.query(query);
} }
search.optional_words(query.optional_words);
let max_total_hits = self let max_total_hits = self
.pagination_max_total_hits(&rtxn)? .pagination_max_total_hits(&rtxn)?
.unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS); .unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS);

View File

@@ -697,6 +697,7 @@ mod test {
highlight_pre_tag: DEFAULT_HIGHLIGHT_PRE_TAG(), highlight_pre_tag: DEFAULT_HIGHLIGHT_PRE_TAG(),
highlight_post_tag: DEFAULT_HIGHLIGHT_POST_TAG(), highlight_post_tag: DEFAULT_HIGHLIGHT_POST_TAG(),
crop_marker: DEFAULT_CROP_MARKER(), crop_marker: DEFAULT_CROP_MARKER(),
optional_words: Default::default(),
}; };
let result = SearchResult { let result = SearchResult {