2383: v0.27.0: bring `stable` into `main` r=Kerollmops a=curquiza

Bring `stable` into `main`

Co-authored-by: ad hoc <postma.marin@protonmail.com>
Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com>
Co-authored-by: bors[bot] <26634292+bors[bot]@users.noreply.github.com>
Co-authored-by: ManyTheFish <many@meilisearch.com>
Co-authored-by: Tamo <tamo@meilisearch.com>
Co-authored-by: Paul Sanders <psanders1@gmail.com>
Co-authored-by: Irevoire <tamo@meilisearch.com>
Co-authored-by: Morgane Dubus <30866152+mdubus@users.noreply.github.com>
Co-authored-by: Guillaume Mourier <guillaume@meilisearch.com>
This commit is contained in:
bors[bot]
2022-05-16 08:35:25 +00:00
committed by GitHub
19 changed files with 1523 additions and 93 deletions

View File

@ -1,6 +1,6 @@
[package]
name = "meilisearch-lib"
version = "0.26.0"
version = "0.27.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
@ -30,13 +30,13 @@ lazy_static = "1.4.0"
log = "0.4.14"
meilisearch-auth = { path = "../meilisearch-auth" }
meilisearch-error = { path = "../meilisearch-error" }
milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.26.0" }
milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.26.4" }
mime = "0.3.16"
num_cpus = "1.13.1"
obkv = "0.2.0"
once_cell = "1.10.0"
parking_lot = "0.12.0"
permissive-json-pointer = "0.2.0"
permissive-json-pointer = { path = "../permissive-json-pointer" }
rand = "0.8.5"
rayon = "1.5.1"
regex = "1.5.5"

View File

@ -18,7 +18,7 @@ use crate::EnvSizer;
use super::error::IndexError;
use super::error::Result;
use super::updates::{MinWordLengthTypoSetting, TypoSettings};
use super::updates::{MinWordSizeTyposSetting, TypoSettings};
use super::{Checked, Settings};
pub type Document = Map<String, Value>;
@ -170,7 +170,7 @@ impl Index {
})
.collect();
let min_typo_word_len = MinWordLengthTypoSetting {
let min_typo_word_len = MinWordSizeTyposSetting {
one_typo: Setting::Set(self.min_word_len_one_typo(txn)?),
two_typos: Setting::Set(self.min_word_len_two_typos(txn)?),
};
@ -190,7 +190,7 @@ impl Index {
let typo_tolerance = TypoSettings {
enabled: Setting::Set(self.authorize_typos(txn)?),
min_word_length_for_typo: Setting::Set(min_typo_word_len),
min_word_size_for_typos: Setting::Set(min_typo_word_len),
disable_on_words: Setting::Set(disabled_words),
disable_on_attributes: Setting::Set(disabled_attributes),
};
@ -213,7 +213,7 @@ impl Index {
None => Setting::Reset,
},
synonyms: Setting::Set(synonyms),
typo: Setting::Set(typo_tolerance),
typo_tolerance: Setting::Set(typo_tolerance),
_kind: PhantomData,
})
}

View File

@ -1,6 +1,7 @@
pub use search::{
default_crop_length, default_crop_marker, default_highlight_post_tag,
default_highlight_pre_tag, SearchQuery, SearchResult, DEFAULT_SEARCH_LIMIT,
default_highlight_pre_tag, SearchQuery, SearchResult, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT,
};
pub use updates::{apply_settings_to_builder, Checked, Facets, Settings, Unchecked};

View File

@ -4,7 +4,6 @@ use std::str::FromStr;
use std::time::Instant;
use either::Either;
use indexmap::IndexMap;
use milli::tokenizer::{Analyzer, AnalyzerConfig, Token};
use milli::{AscDesc, FieldId, FieldsIdsMap, Filter, MatchingWords, SortError};
use regex::Regex;
@ -16,7 +15,7 @@ use crate::index::error::FacetError;
use super::error::{IndexError, Result};
use super::index::Index;
pub type Document = IndexMap<String, Value>;
pub type Document = serde_json::Map<String, Value>;
type MatchesInfo = BTreeMap<String, Vec<MatchInfo>>;
#[derive(Serialize, Debug, Clone, PartialEq)]
@ -35,17 +34,17 @@ pub const fn default_crop_length() -> usize {
DEFAULT_CROP_LENGTH
}
const DEFAULT_CROP_MARKER: &str = "";
pub const DEFAULT_CROP_MARKER: &str = "";
pub fn default_crop_marker() -> String {
DEFAULT_CROP_MARKER.to_string()
}
const DEFAULT_HIGHLIGHT_PRE_TAG: &str = "<em>";
pub const DEFAULT_HIGHLIGHT_PRE_TAG: &str = "<em>";
pub fn default_highlight_pre_tag() -> String {
DEFAULT_HIGHLIGHT_PRE_TAG.to_string()
}
const DEFAULT_HIGHLIGHT_POST_TAG: &str = "</em>";
pub const DEFAULT_HIGHLIGHT_POST_TAG: &str = "</em>";
pub fn default_highlight_post_tag() -> String {
DEFAULT_HIGHLIGHT_POST_TAG.to_string()
}
@ -233,14 +232,22 @@ impl Index {
let documents_iter = self.documents(&rtxn, documents_ids)?;
for (_id, obkv) in documents_iter {
let mut document = make_document(&to_retrieve_ids, &fields_ids_map, obkv)?;
// First generate a document with all the displayed fields
let displayed_document = make_document(&displayed_ids, &fields_ids_map, obkv)?;
// select the attributes to retrieve
let attributes_to_retrieve = to_retrieve_ids
.iter()
.map(|&fid| fields_ids_map.name(fid).expect("Missing field name"));
let mut document =
permissive_json_pointer::select_values(&displayed_document, attributes_to_retrieve);
let matches_info = query
.matches
.then(|| compute_matches(&matching_words, &document, &analyzer));
let formatted = format_fields(
&document,
&displayed_document,
&fields_ids_map,
&formatter,
&matching_words,
@ -476,7 +483,7 @@ fn add_non_formatted_ids_to_formatted_options(
}
fn make_document(
attributes_to_retrieve: &BTreeSet<FieldId>,
displayed_attributes: &BTreeSet<FieldId>,
field_ids_map: &FieldsIdsMap,
obkv: obkv::KvReaderU16,
) -> Result<Document> {
@ -494,15 +501,11 @@ fn make_document(
}
// select the attributes to retrieve
let attributes_to_retrieve = attributes_to_retrieve
let displayed_attributes = displayed_attributes
.iter()
.map(|&fid| field_ids_map.name(fid).expect("Missing field name"));
let document = permissive_json_pointer::select_values(&document, attributes_to_retrieve);
// then we need to convert the `serde_json::Map` into an `IndexMap`.
let document = document.into_iter().collect();
let document = permissive_json_pointer::select_values(&document, displayed_attributes);
Ok(document)
}
@ -513,20 +516,13 @@ fn format_fields<A: AsRef<[u8]>>(
matching_words: &impl Matcher,
formatted_options: &BTreeMap<FieldId, FormatOptions>,
) -> Result<Document> {
// Convert the `IndexMap` into a `serde_json::Map`.
let document = document
.iter()
.map(|(k, v)| (k.clone(), v.clone()))
.collect();
let selectors: Vec<_> = formatted_options
.keys()
// This unwrap must be safe since we got the ids from the fields_ids_map just
// before.
.map(|&fid| field_ids_map.name(fid).unwrap())
.collect();
let mut document = permissive_json_pointer::select_values(&document, selectors.iter().copied());
let mut document = permissive_json_pointer::select_values(document, selectors.iter().copied());
permissive_json_pointer::map_leaf_values(&mut document, selectors, |key, value| {
// To get the formatting option of each key we need to see all the rules that applies
@ -542,13 +538,9 @@ fn format_fields<A: AsRef<[u8]>>(
.fold(FormatOptions::default(), |acc, (_, option)| {
acc.merge(*option)
});
// TODO: remove this useless clone
*value = formatter.format_value(value.clone(), matching_words, format);
*value = formatter.format_value(std::mem::take(value), matching_words, format);
});
// we need to convert back the `serde_json::Map` into an `IndexMap`.
let document = document.into_iter().collect();
Ok(document)
}

View File

@ -41,7 +41,7 @@ pub struct Unchecked;
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq)]
#[serde(deny_unknown_fields)]
#[serde(rename_all = "camelCase")]
pub struct MinWordLengthTypoSetting {
pub struct MinWordSizeTyposSetting {
#[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
pub one_typo: Setting<u8>,
@ -60,7 +60,7 @@ pub struct TypoSettings {
pub enabled: Setting<bool>,
#[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
pub min_word_length_for_typo: Setting<MinWordLengthTypoSetting>,
pub min_word_size_for_typos: Setting<MinWordSizeTyposSetting>,
#[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
pub disable_on_words: Setting<BTreeSet<String>>,
@ -113,7 +113,7 @@ pub struct Settings<T> {
pub distinct_attribute: Setting<String>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
#[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
pub typo: Setting<TypoSettings>,
pub typo_tolerance: Setting<TypoSettings>,
#[serde(skip)]
pub _kind: PhantomData<T>,
@ -130,7 +130,7 @@ impl Settings<Checked> {
stop_words: Setting::Reset,
synonyms: Setting::Reset,
distinct_attribute: Setting::Reset,
typo: Setting::Reset,
typo_tolerance: Setting::Reset,
_kind: PhantomData,
}
}
@ -145,7 +145,7 @@ impl Settings<Checked> {
stop_words,
synonyms,
distinct_attribute,
typo: typo_tolerance,
typo_tolerance,
..
} = self;
@ -158,7 +158,7 @@ impl Settings<Checked> {
stop_words,
synonyms,
distinct_attribute,
typo: typo_tolerance,
typo_tolerance,
_kind: PhantomData,
}
}
@ -197,7 +197,7 @@ impl Settings<Unchecked> {
stop_words: self.stop_words,
synonyms: self.synonyms,
distinct_attribute: self.distinct_attribute,
typo: self.typo,
typo_tolerance: self.typo_tolerance,
_kind: PhantomData,
}
}
@ -373,7 +373,7 @@ pub fn apply_settings_to_builder(
Setting::NotSet => (),
}
match settings.typo {
match settings.typo_tolerance {
Setting::Set(ref value) => {
match value.enabled {
Setting::Set(val) => builder.set_autorize_typos(val),
@ -381,7 +381,7 @@ pub fn apply_settings_to_builder(
Setting::NotSet => (),
}
match value.min_word_length_for_typo {
match value.min_word_size_for_typos {
Setting::Set(ref setting) => {
match setting.one_typo {
Setting::Set(val) => builder.set_min_word_len_one_typo(val),
@ -455,7 +455,7 @@ pub(crate) mod test {
stop_words: Setting::NotSet,
synonyms: Setting::NotSet,
distinct_attribute: Setting::NotSet,
typo: Setting::NotSet,
typo_tolerance: Setting::NotSet,
_kind: PhantomData::<Unchecked>,
};
@ -477,7 +477,7 @@ pub(crate) mod test {
stop_words: Setting::NotSet,
synonyms: Setting::NotSet,
distinct_attribute: Setting::NotSet,
typo: Setting::NotSet,
typo_tolerance: Setting::NotSet,
_kind: PhantomData::<Unchecked>,
};

View File

@ -178,15 +178,6 @@ impl IndexControllerBuilder {
.max_task_store_size
.ok_or_else(|| anyhow::anyhow!("Missing update database size"))?;
let db_exists = db_path.as_ref().exists();
if db_exists {
// Directory could be pre-created without any database in.
let db_is_empty = db_path.as_ref().read_dir()?.next().is_none();
if !db_is_empty {
versioning::check_version_file(db_path.as_ref())?;
}
}
if let Some(ref path) = self.import_snapshot {
log::info!("Loading from snapshot {:?}", path);
load_snapshot(
@ -207,6 +198,15 @@ impl IndexControllerBuilder {
)?;
}
let db_exists = db_path.as_ref().exists();
if db_exists {
// Directory could be pre-created without any database in.
let db_is_empty = db_path.as_ref().read_dir()?.next().is_none();
if !db_is_empty {
versioning::check_version_file(db_path.as_ref())?;
}
}
std::fs::create_dir_all(db_path.as_ref())?;
let meta_env = Arc::new(open_meta_env(db_path.as_ref(), task_store_size)?);