mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-07-26 16:21:07 +00:00
Merge #2383
2383: v0.27.0: bring `stable` into `main` r=Kerollmops a=curquiza Bring `stable` into `main` Co-authored-by: ad hoc <postma.marin@protonmail.com> Co-authored-by: Clémentine Urquizar <clementine@meilisearch.com> Co-authored-by: bors[bot] <26634292+bors[bot]@users.noreply.github.com> Co-authored-by: ManyTheFish <many@meilisearch.com> Co-authored-by: Tamo <tamo@meilisearch.com> Co-authored-by: Paul Sanders <psanders1@gmail.com> Co-authored-by: Irevoire <tamo@meilisearch.com> Co-authored-by: Morgane Dubus <30866152+mdubus@users.noreply.github.com> Co-authored-by: Guillaume Mourier <guillaume@meilisearch.com>
This commit is contained in:
@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "meilisearch-lib"
|
||||
version = "0.26.0"
|
||||
version = "0.27.0"
|
||||
edition = "2021"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
@ -30,13 +30,13 @@ lazy_static = "1.4.0"
|
||||
log = "0.4.14"
|
||||
meilisearch-auth = { path = "../meilisearch-auth" }
|
||||
meilisearch-error = { path = "../meilisearch-error" }
|
||||
milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.26.0" }
|
||||
milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.26.4" }
|
||||
mime = "0.3.16"
|
||||
num_cpus = "1.13.1"
|
||||
obkv = "0.2.0"
|
||||
once_cell = "1.10.0"
|
||||
parking_lot = "0.12.0"
|
||||
permissive-json-pointer = "0.2.0"
|
||||
permissive-json-pointer = { path = "../permissive-json-pointer" }
|
||||
rand = "0.8.5"
|
||||
rayon = "1.5.1"
|
||||
regex = "1.5.5"
|
||||
|
@ -18,7 +18,7 @@ use crate::EnvSizer;
|
||||
|
||||
use super::error::IndexError;
|
||||
use super::error::Result;
|
||||
use super::updates::{MinWordLengthTypoSetting, TypoSettings};
|
||||
use super::updates::{MinWordSizeTyposSetting, TypoSettings};
|
||||
use super::{Checked, Settings};
|
||||
|
||||
pub type Document = Map<String, Value>;
|
||||
@ -170,7 +170,7 @@ impl Index {
|
||||
})
|
||||
.collect();
|
||||
|
||||
let min_typo_word_len = MinWordLengthTypoSetting {
|
||||
let min_typo_word_len = MinWordSizeTyposSetting {
|
||||
one_typo: Setting::Set(self.min_word_len_one_typo(txn)?),
|
||||
two_typos: Setting::Set(self.min_word_len_two_typos(txn)?),
|
||||
};
|
||||
@ -190,7 +190,7 @@ impl Index {
|
||||
|
||||
let typo_tolerance = TypoSettings {
|
||||
enabled: Setting::Set(self.authorize_typos(txn)?),
|
||||
min_word_length_for_typo: Setting::Set(min_typo_word_len),
|
||||
min_word_size_for_typos: Setting::Set(min_typo_word_len),
|
||||
disable_on_words: Setting::Set(disabled_words),
|
||||
disable_on_attributes: Setting::Set(disabled_attributes),
|
||||
};
|
||||
@ -213,7 +213,7 @@ impl Index {
|
||||
None => Setting::Reset,
|
||||
},
|
||||
synonyms: Setting::Set(synonyms),
|
||||
typo: Setting::Set(typo_tolerance),
|
||||
typo_tolerance: Setting::Set(typo_tolerance),
|
||||
_kind: PhantomData,
|
||||
})
|
||||
}
|
||||
|
@ -1,6 +1,7 @@
|
||||
pub use search::{
|
||||
default_crop_length, default_crop_marker, default_highlight_post_tag,
|
||||
default_highlight_pre_tag, SearchQuery, SearchResult, DEFAULT_SEARCH_LIMIT,
|
||||
default_highlight_pre_tag, SearchQuery, SearchResult, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
|
||||
DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT,
|
||||
};
|
||||
pub use updates::{apply_settings_to_builder, Checked, Facets, Settings, Unchecked};
|
||||
|
||||
|
@ -4,7 +4,6 @@ use std::str::FromStr;
|
||||
use std::time::Instant;
|
||||
|
||||
use either::Either;
|
||||
use indexmap::IndexMap;
|
||||
use milli::tokenizer::{Analyzer, AnalyzerConfig, Token};
|
||||
use milli::{AscDesc, FieldId, FieldsIdsMap, Filter, MatchingWords, SortError};
|
||||
use regex::Regex;
|
||||
@ -16,7 +15,7 @@ use crate::index::error::FacetError;
|
||||
use super::error::{IndexError, Result};
|
||||
use super::index::Index;
|
||||
|
||||
pub type Document = IndexMap<String, Value>;
|
||||
pub type Document = serde_json::Map<String, Value>;
|
||||
type MatchesInfo = BTreeMap<String, Vec<MatchInfo>>;
|
||||
|
||||
#[derive(Serialize, Debug, Clone, PartialEq)]
|
||||
@ -35,17 +34,17 @@ pub const fn default_crop_length() -> usize {
|
||||
DEFAULT_CROP_LENGTH
|
||||
}
|
||||
|
||||
const DEFAULT_CROP_MARKER: &str = "…";
|
||||
pub const DEFAULT_CROP_MARKER: &str = "…";
|
||||
pub fn default_crop_marker() -> String {
|
||||
DEFAULT_CROP_MARKER.to_string()
|
||||
}
|
||||
|
||||
const DEFAULT_HIGHLIGHT_PRE_TAG: &str = "<em>";
|
||||
pub const DEFAULT_HIGHLIGHT_PRE_TAG: &str = "<em>";
|
||||
pub fn default_highlight_pre_tag() -> String {
|
||||
DEFAULT_HIGHLIGHT_PRE_TAG.to_string()
|
||||
}
|
||||
|
||||
const DEFAULT_HIGHLIGHT_POST_TAG: &str = "</em>";
|
||||
pub const DEFAULT_HIGHLIGHT_POST_TAG: &str = "</em>";
|
||||
pub fn default_highlight_post_tag() -> String {
|
||||
DEFAULT_HIGHLIGHT_POST_TAG.to_string()
|
||||
}
|
||||
@ -233,14 +232,22 @@ impl Index {
|
||||
let documents_iter = self.documents(&rtxn, documents_ids)?;
|
||||
|
||||
for (_id, obkv) in documents_iter {
|
||||
let mut document = make_document(&to_retrieve_ids, &fields_ids_map, obkv)?;
|
||||
// First generate a document with all the displayed fields
|
||||
let displayed_document = make_document(&displayed_ids, &fields_ids_map, obkv)?;
|
||||
|
||||
// select the attributes to retrieve
|
||||
let attributes_to_retrieve = to_retrieve_ids
|
||||
.iter()
|
||||
.map(|&fid| fields_ids_map.name(fid).expect("Missing field name"));
|
||||
let mut document =
|
||||
permissive_json_pointer::select_values(&displayed_document, attributes_to_retrieve);
|
||||
|
||||
let matches_info = query
|
||||
.matches
|
||||
.then(|| compute_matches(&matching_words, &document, &analyzer));
|
||||
|
||||
let formatted = format_fields(
|
||||
&document,
|
||||
&displayed_document,
|
||||
&fields_ids_map,
|
||||
&formatter,
|
||||
&matching_words,
|
||||
@ -476,7 +483,7 @@ fn add_non_formatted_ids_to_formatted_options(
|
||||
}
|
||||
|
||||
fn make_document(
|
||||
attributes_to_retrieve: &BTreeSet<FieldId>,
|
||||
displayed_attributes: &BTreeSet<FieldId>,
|
||||
field_ids_map: &FieldsIdsMap,
|
||||
obkv: obkv::KvReaderU16,
|
||||
) -> Result<Document> {
|
||||
@ -494,15 +501,11 @@ fn make_document(
|
||||
}
|
||||
|
||||
// select the attributes to retrieve
|
||||
let attributes_to_retrieve = attributes_to_retrieve
|
||||
let displayed_attributes = displayed_attributes
|
||||
.iter()
|
||||
.map(|&fid| field_ids_map.name(fid).expect("Missing field name"));
|
||||
|
||||
let document = permissive_json_pointer::select_values(&document, attributes_to_retrieve);
|
||||
|
||||
// then we need to convert the `serde_json::Map` into an `IndexMap`.
|
||||
let document = document.into_iter().collect();
|
||||
|
||||
let document = permissive_json_pointer::select_values(&document, displayed_attributes);
|
||||
Ok(document)
|
||||
}
|
||||
|
||||
@ -513,20 +516,13 @@ fn format_fields<A: AsRef<[u8]>>(
|
||||
matching_words: &impl Matcher,
|
||||
formatted_options: &BTreeMap<FieldId, FormatOptions>,
|
||||
) -> Result<Document> {
|
||||
// Convert the `IndexMap` into a `serde_json::Map`.
|
||||
let document = document
|
||||
.iter()
|
||||
.map(|(k, v)| (k.clone(), v.clone()))
|
||||
.collect();
|
||||
|
||||
let selectors: Vec<_> = formatted_options
|
||||
.keys()
|
||||
// This unwrap must be safe since we got the ids from the fields_ids_map just
|
||||
// before.
|
||||
.map(|&fid| field_ids_map.name(fid).unwrap())
|
||||
.collect();
|
||||
|
||||
let mut document = permissive_json_pointer::select_values(&document, selectors.iter().copied());
|
||||
let mut document = permissive_json_pointer::select_values(document, selectors.iter().copied());
|
||||
|
||||
permissive_json_pointer::map_leaf_values(&mut document, selectors, |key, value| {
|
||||
// To get the formatting option of each key we need to see all the rules that applies
|
||||
@ -542,13 +538,9 @@ fn format_fields<A: AsRef<[u8]>>(
|
||||
.fold(FormatOptions::default(), |acc, (_, option)| {
|
||||
acc.merge(*option)
|
||||
});
|
||||
// TODO: remove this useless clone
|
||||
*value = formatter.format_value(value.clone(), matching_words, format);
|
||||
*value = formatter.format_value(std::mem::take(value), matching_words, format);
|
||||
});
|
||||
|
||||
// we need to convert back the `serde_json::Map` into an `IndexMap`.
|
||||
let document = document.into_iter().collect();
|
||||
|
||||
Ok(document)
|
||||
}
|
||||
|
||||
|
@ -41,7 +41,7 @@ pub struct Unchecked;
|
||||
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq)]
|
||||
#[serde(deny_unknown_fields)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct MinWordLengthTypoSetting {
|
||||
pub struct MinWordSizeTyposSetting {
|
||||
#[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
pub one_typo: Setting<u8>,
|
||||
@ -60,7 +60,7 @@ pub struct TypoSettings {
|
||||
pub enabled: Setting<bool>,
|
||||
#[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
pub min_word_length_for_typo: Setting<MinWordLengthTypoSetting>,
|
||||
pub min_word_size_for_typos: Setting<MinWordSizeTyposSetting>,
|
||||
#[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
pub disable_on_words: Setting<BTreeSet<String>>,
|
||||
@ -113,7 +113,7 @@ pub struct Settings<T> {
|
||||
pub distinct_attribute: Setting<String>,
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
#[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
|
||||
pub typo: Setting<TypoSettings>,
|
||||
pub typo_tolerance: Setting<TypoSettings>,
|
||||
|
||||
#[serde(skip)]
|
||||
pub _kind: PhantomData<T>,
|
||||
@ -130,7 +130,7 @@ impl Settings<Checked> {
|
||||
stop_words: Setting::Reset,
|
||||
synonyms: Setting::Reset,
|
||||
distinct_attribute: Setting::Reset,
|
||||
typo: Setting::Reset,
|
||||
typo_tolerance: Setting::Reset,
|
||||
_kind: PhantomData,
|
||||
}
|
||||
}
|
||||
@ -145,7 +145,7 @@ impl Settings<Checked> {
|
||||
stop_words,
|
||||
synonyms,
|
||||
distinct_attribute,
|
||||
typo: typo_tolerance,
|
||||
typo_tolerance,
|
||||
..
|
||||
} = self;
|
||||
|
||||
@ -158,7 +158,7 @@ impl Settings<Checked> {
|
||||
stop_words,
|
||||
synonyms,
|
||||
distinct_attribute,
|
||||
typo: typo_tolerance,
|
||||
typo_tolerance,
|
||||
_kind: PhantomData,
|
||||
}
|
||||
}
|
||||
@ -197,7 +197,7 @@ impl Settings<Unchecked> {
|
||||
stop_words: self.stop_words,
|
||||
synonyms: self.synonyms,
|
||||
distinct_attribute: self.distinct_attribute,
|
||||
typo: self.typo,
|
||||
typo_tolerance: self.typo_tolerance,
|
||||
_kind: PhantomData,
|
||||
}
|
||||
}
|
||||
@ -373,7 +373,7 @@ pub fn apply_settings_to_builder(
|
||||
Setting::NotSet => (),
|
||||
}
|
||||
|
||||
match settings.typo {
|
||||
match settings.typo_tolerance {
|
||||
Setting::Set(ref value) => {
|
||||
match value.enabled {
|
||||
Setting::Set(val) => builder.set_autorize_typos(val),
|
||||
@ -381,7 +381,7 @@ pub fn apply_settings_to_builder(
|
||||
Setting::NotSet => (),
|
||||
}
|
||||
|
||||
match value.min_word_length_for_typo {
|
||||
match value.min_word_size_for_typos {
|
||||
Setting::Set(ref setting) => {
|
||||
match setting.one_typo {
|
||||
Setting::Set(val) => builder.set_min_word_len_one_typo(val),
|
||||
@ -455,7 +455,7 @@ pub(crate) mod test {
|
||||
stop_words: Setting::NotSet,
|
||||
synonyms: Setting::NotSet,
|
||||
distinct_attribute: Setting::NotSet,
|
||||
typo: Setting::NotSet,
|
||||
typo_tolerance: Setting::NotSet,
|
||||
_kind: PhantomData::<Unchecked>,
|
||||
};
|
||||
|
||||
@ -477,7 +477,7 @@ pub(crate) mod test {
|
||||
stop_words: Setting::NotSet,
|
||||
synonyms: Setting::NotSet,
|
||||
distinct_attribute: Setting::NotSet,
|
||||
typo: Setting::NotSet,
|
||||
typo_tolerance: Setting::NotSet,
|
||||
_kind: PhantomData::<Unchecked>,
|
||||
};
|
||||
|
||||
|
@ -178,15 +178,6 @@ impl IndexControllerBuilder {
|
||||
.max_task_store_size
|
||||
.ok_or_else(|| anyhow::anyhow!("Missing update database size"))?;
|
||||
|
||||
let db_exists = db_path.as_ref().exists();
|
||||
if db_exists {
|
||||
// Directory could be pre-created without any database in.
|
||||
let db_is_empty = db_path.as_ref().read_dir()?.next().is_none();
|
||||
if !db_is_empty {
|
||||
versioning::check_version_file(db_path.as_ref())?;
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(ref path) = self.import_snapshot {
|
||||
log::info!("Loading from snapshot {:?}", path);
|
||||
load_snapshot(
|
||||
@ -207,6 +198,15 @@ impl IndexControllerBuilder {
|
||||
)?;
|
||||
}
|
||||
|
||||
let db_exists = db_path.as_ref().exists();
|
||||
if db_exists {
|
||||
// Directory could be pre-created without any database in.
|
||||
let db_is_empty = db_path.as_ref().read_dir()?.next().is_none();
|
||||
if !db_is_empty {
|
||||
versioning::check_version_file(db_path.as_ref())?;
|
||||
}
|
||||
}
|
||||
|
||||
std::fs::create_dir_all(db_path.as_ref())?;
|
||||
|
||||
let meta_env = Arc::new(open_meta_env(db_path.as_ref(), task_store_size)?);
|
||||
|
Reference in New Issue
Block a user