mirror of
https://github.com/meilisearch/meilisearch.git
synced 2025-12-10 22:55:43 +00:00
Compare commits
57 Commits
nightly-te
...
prototype-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
e8c9367686 | ||
|
|
e9b62aacb3 | ||
|
|
456960d2c7 | ||
|
|
3dda176723 | ||
|
|
af0f6f0bf0 | ||
|
|
ccf3ba3f32 | ||
|
|
65528a3e06 | ||
|
|
cdb4b3e024 | ||
|
|
8c0ebd1331 | ||
|
|
5130e06b41 | ||
|
|
08e27ef73f | ||
|
|
914b125c5f | ||
|
|
717b069907 | ||
|
|
7ea154673a | ||
|
|
b947f3bb9d | ||
|
|
4c35817c5f | ||
|
|
c53841e166 | ||
|
|
fd81945597 | ||
|
|
794e491152 | ||
|
|
cab27c2ab4 | ||
|
|
624fa9052f | ||
|
|
359ede4862 | ||
|
|
60c11dbdbd | ||
|
|
dacee40ebc | ||
|
|
6089083a8e | ||
|
|
cc2c19d4c3 | ||
|
|
a5c56fac8a | ||
|
|
e4e49e63d0 | ||
|
|
00bd7bd19a | ||
|
|
ef3d098b4d | ||
|
|
8084cf29f3 | ||
|
|
5a7c1bde84 | ||
|
|
6b2d671be7 | ||
|
|
43c13faeda | ||
|
|
064ee95b1c | ||
|
|
44c1900f36 | ||
|
|
04671d0751 | ||
|
|
4f4c669d50 | ||
|
|
35758db9ec | ||
|
|
4988199bb9 | ||
|
|
83991ee770 | ||
|
|
9d061cec26 | ||
|
|
4a21fecf67 | ||
|
|
ae8e69c030 | ||
|
|
fe819a9d80 | ||
|
|
e338ceb97f | ||
|
|
75c87d5391 | ||
|
|
dd57873f8e | ||
|
|
9d5e3457e5 | ||
|
|
04694071fe | ||
|
|
b0c1a9504a | ||
|
|
d57026cd96 | ||
|
|
41c9e8856a | ||
|
|
d4ff59fcf5 | ||
|
|
9c485f8563 | ||
|
|
d8d12d5979 | ||
|
|
0597a97c84 |
12
.github/workflows/test-suite.yml
vendored
12
.github/workflows/test-suite.yml
vendored
@@ -43,7 +43,7 @@ jobs:
|
||||
toolchain: nightly
|
||||
override: true
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.5.1
|
||||
uses: Swatinem/rust-cache@v2.6.2
|
||||
- name: Run cargo check without any default features
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
@@ -65,7 +65,7 @@ jobs:
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.5.1
|
||||
uses: Swatinem/rust-cache@v2.6.2
|
||||
- name: Run cargo check without any default features
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
@@ -146,7 +146,7 @@ jobs:
|
||||
toolchain: stable
|
||||
override: true
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.5.1
|
||||
uses: Swatinem/rust-cache@v2.6.2
|
||||
- name: Run tests in debug
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
@@ -161,11 +161,11 @@ jobs:
|
||||
- uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
profile: minimal
|
||||
toolchain: 1.69.0
|
||||
toolchain: 1.71.1
|
||||
override: true
|
||||
components: clippy
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.5.1
|
||||
uses: Swatinem/rust-cache@v2.6.2
|
||||
- name: Run cargo clippy
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
@@ -184,7 +184,7 @@ jobs:
|
||||
override: true
|
||||
components: rustfmt
|
||||
- name: Cache dependencies
|
||||
uses: Swatinem/rust-cache@v2.5.1
|
||||
uses: Swatinem/rust-cache@v2.6.2
|
||||
- name: Run cargo fmt
|
||||
# Since we never ran the `build.rs` script in the benchmark directory we are missing one auto-generated import file.
|
||||
# Since we want to trigger (and fail) this action as fast as possible, instead of building the benchmark crate
|
||||
|
||||
662
Cargo.lock
generated
662
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
@@ -18,7 +18,7 @@ members = [
|
||||
]
|
||||
|
||||
[workspace.package]
|
||||
version = "1.3.0"
|
||||
version = "1.4.0"
|
||||
authors = ["Quentin de Quelen <quentin@dequelen.me>", "Clément Renault <clement@meilisearch.com>"]
|
||||
description = "Meilisearch HTTP server"
|
||||
homepage = "https://meilisearch.com"
|
||||
|
||||
@@ -262,6 +262,9 @@ pub(crate) mod test {
|
||||
sortable_attributes: Setting::Set(btreeset! { S("age") }),
|
||||
ranking_rules: Setting::NotSet,
|
||||
stop_words: Setting::NotSet,
|
||||
non_separator_tokens: Setting::NotSet,
|
||||
separator_tokens: Setting::NotSet,
|
||||
dictionary: Setting::NotSet,
|
||||
synonyms: Setting::NotSet,
|
||||
distinct_attribute: Setting::NotSet,
|
||||
typo_tolerance: Setting::NotSet,
|
||||
|
||||
@@ -340,6 +340,9 @@ impl<T> From<v5::Settings<T>> for v6::Settings<v6::Unchecked> {
|
||||
}
|
||||
},
|
||||
stop_words: settings.stop_words.into(),
|
||||
non_separator_tokens: v6::Setting::NotSet,
|
||||
separator_tokens: v6::Setting::NotSet,
|
||||
dictionary: v6::Setting::NotSet,
|
||||
synonyms: settings.synonyms.into(),
|
||||
distinct_attribute: settings.distinct_attribute.into(),
|
||||
typo_tolerance: match settings.typo_tolerance {
|
||||
|
||||
@@ -0,0 +1,24 @@
|
||||
---
|
||||
source: dump/src/reader/mod.rs
|
||||
expression: spells.settings().unwrap()
|
||||
---
|
||||
{
|
||||
"displayedAttributes": [
|
||||
"*"
|
||||
],
|
||||
"searchableAttributes": [
|
||||
"*"
|
||||
],
|
||||
"filterableAttributes": [],
|
||||
"sortableAttributes": [],
|
||||
"rankingRules": [
|
||||
"typo",
|
||||
"words",
|
||||
"proximity",
|
||||
"attribute",
|
||||
"exactness"
|
||||
],
|
||||
"stopWords": [],
|
||||
"synonyms": {},
|
||||
"distinctAttribute": null
|
||||
}
|
||||
@@ -0,0 +1,38 @@
|
||||
---
|
||||
source: dump/src/reader/mod.rs
|
||||
expression: products.settings().unwrap()
|
||||
---
|
||||
{
|
||||
"displayedAttributes": [
|
||||
"*"
|
||||
],
|
||||
"searchableAttributes": [
|
||||
"*"
|
||||
],
|
||||
"filterableAttributes": [],
|
||||
"sortableAttributes": [],
|
||||
"rankingRules": [
|
||||
"typo",
|
||||
"words",
|
||||
"proximity",
|
||||
"attribute",
|
||||
"exactness"
|
||||
],
|
||||
"stopWords": [],
|
||||
"synonyms": {
|
||||
"android": [
|
||||
"phone",
|
||||
"smartphone"
|
||||
],
|
||||
"iphone": [
|
||||
"phone",
|
||||
"smartphone"
|
||||
],
|
||||
"phone": [
|
||||
"android",
|
||||
"iphone",
|
||||
"smartphone"
|
||||
]
|
||||
},
|
||||
"distinctAttribute": null
|
||||
}
|
||||
@@ -0,0 +1,31 @@
|
||||
---
|
||||
source: dump/src/reader/mod.rs
|
||||
expression: movies.settings().unwrap()
|
||||
---
|
||||
{
|
||||
"displayedAttributes": [
|
||||
"*"
|
||||
],
|
||||
"searchableAttributes": [
|
||||
"*"
|
||||
],
|
||||
"filterableAttributes": [
|
||||
"genres",
|
||||
"id"
|
||||
],
|
||||
"sortableAttributes": [
|
||||
"genres",
|
||||
"id"
|
||||
],
|
||||
"rankingRules": [
|
||||
"typo",
|
||||
"words",
|
||||
"proximity",
|
||||
"attribute",
|
||||
"exactness",
|
||||
"release_date:asc"
|
||||
],
|
||||
"stopWords": [],
|
||||
"synonyms": {},
|
||||
"distinctAttribute": null
|
||||
}
|
||||
@@ -13,7 +13,7 @@ license.workspace = true
|
||||
[dependencies]
|
||||
arbitrary = { version = "1.3.0", features = ["derive"] }
|
||||
clap = { version = "4.3.0", features = ["derive"] }
|
||||
fastrand = "1.9.0"
|
||||
fastrand = "2.0.0"
|
||||
milli = { path = "../milli" }
|
||||
serde = { version = "1.0.160", features = ["derive"] }
|
||||
serde_json = { version = "1.0.95", features = ["preserve_order"] }
|
||||
|
||||
@@ -790,10 +790,19 @@ impl IndexScheduler {
|
||||
|
||||
let mut res = BTreeMap::new();
|
||||
|
||||
let processing_tasks = { self.processing_tasks.read().unwrap().processing.len() };
|
||||
|
||||
res.insert(
|
||||
"statuses".to_string(),
|
||||
enum_iterator::all::<Status>()
|
||||
.map(|s| Ok((s.to_string(), self.get_status(&rtxn, s)?.len())))
|
||||
.map(|s| {
|
||||
let tasks = self.get_status(&rtxn, s)?.len();
|
||||
match s {
|
||||
Status::Enqueued => Ok((s.to_string(), tasks - processing_tasks)),
|
||||
Status::Processing => Ok((s.to_string(), processing_tasks)),
|
||||
s => Ok((s.to_string(), tasks)),
|
||||
}
|
||||
})
|
||||
.collect::<Result<BTreeMap<String, u64>>>()?,
|
||||
);
|
||||
res.insert(
|
||||
@@ -4131,4 +4140,154 @@ mod tests {
|
||||
snapshot!(json_string!(tasks, { "[].enqueuedAt" => "[date]", "[].startedAt" => "[date]", "[].finishedAt" => "[date]", ".**.original_filter" => "[filter]", ".**.query" => "[query]" }), name: "everything_has_been_processed");
|
||||
drop(rtxn);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn basic_get_stats() {
|
||||
let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]);
|
||||
|
||||
let kind = index_creation_task("catto", "mouse");
|
||||
let _task = index_scheduler.register(kind).unwrap();
|
||||
let kind = index_creation_task("doggo", "sheep");
|
||||
let _task = index_scheduler.register(kind).unwrap();
|
||||
let kind = index_creation_task("whalo", "fish");
|
||||
let _task = index_scheduler.register(kind).unwrap();
|
||||
|
||||
snapshot!(json_string!(index_scheduler.get_stats().unwrap()), @r###"
|
||||
{
|
||||
"indexes": {
|
||||
"catto": 1,
|
||||
"doggo": 1,
|
||||
"whalo": 1
|
||||
},
|
||||
"statuses": {
|
||||
"canceled": 0,
|
||||
"enqueued": 3,
|
||||
"failed": 0,
|
||||
"processing": 0,
|
||||
"succeeded": 0
|
||||
},
|
||||
"types": {
|
||||
"documentAdditionOrUpdate": 0,
|
||||
"documentDeletion": 0,
|
||||
"dumpCreation": 0,
|
||||
"indexCreation": 3,
|
||||
"indexDeletion": 0,
|
||||
"indexSwap": 0,
|
||||
"indexUpdate": 0,
|
||||
"settingsUpdate": 0,
|
||||
"snapshotCreation": 0,
|
||||
"taskCancelation": 0,
|
||||
"taskDeletion": 0
|
||||
}
|
||||
}
|
||||
"###);
|
||||
|
||||
handle.advance_till([Start, BatchCreated]);
|
||||
snapshot!(json_string!(index_scheduler.get_stats().unwrap()), @r###"
|
||||
{
|
||||
"indexes": {
|
||||
"catto": 1,
|
||||
"doggo": 1,
|
||||
"whalo": 1
|
||||
},
|
||||
"statuses": {
|
||||
"canceled": 0,
|
||||
"enqueued": 2,
|
||||
"failed": 0,
|
||||
"processing": 1,
|
||||
"succeeded": 0
|
||||
},
|
||||
"types": {
|
||||
"documentAdditionOrUpdate": 0,
|
||||
"documentDeletion": 0,
|
||||
"dumpCreation": 0,
|
||||
"indexCreation": 3,
|
||||
"indexDeletion": 0,
|
||||
"indexSwap": 0,
|
||||
"indexUpdate": 0,
|
||||
"settingsUpdate": 0,
|
||||
"snapshotCreation": 0,
|
||||
"taskCancelation": 0,
|
||||
"taskDeletion": 0
|
||||
}
|
||||
}
|
||||
"###);
|
||||
|
||||
handle.advance_till([
|
||||
InsideProcessBatch,
|
||||
InsideProcessBatch,
|
||||
ProcessBatchSucceeded,
|
||||
AfterProcessing,
|
||||
Start,
|
||||
BatchCreated,
|
||||
]);
|
||||
snapshot!(json_string!(index_scheduler.get_stats().unwrap()), @r###"
|
||||
{
|
||||
"indexes": {
|
||||
"catto": 1,
|
||||
"doggo": 1,
|
||||
"whalo": 1
|
||||
},
|
||||
"statuses": {
|
||||
"canceled": 0,
|
||||
"enqueued": 1,
|
||||
"failed": 0,
|
||||
"processing": 1,
|
||||
"succeeded": 1
|
||||
},
|
||||
"types": {
|
||||
"documentAdditionOrUpdate": 0,
|
||||
"documentDeletion": 0,
|
||||
"dumpCreation": 0,
|
||||
"indexCreation": 3,
|
||||
"indexDeletion": 0,
|
||||
"indexSwap": 0,
|
||||
"indexUpdate": 0,
|
||||
"settingsUpdate": 0,
|
||||
"snapshotCreation": 0,
|
||||
"taskCancelation": 0,
|
||||
"taskDeletion": 0
|
||||
}
|
||||
}
|
||||
"###);
|
||||
|
||||
// now we make one more batch, the started_at field of the new tasks will be past `second_start_time`
|
||||
handle.advance_till([
|
||||
InsideProcessBatch,
|
||||
InsideProcessBatch,
|
||||
ProcessBatchSucceeded,
|
||||
AfterProcessing,
|
||||
Start,
|
||||
BatchCreated,
|
||||
]);
|
||||
snapshot!(json_string!(index_scheduler.get_stats().unwrap()), @r###"
|
||||
{
|
||||
"indexes": {
|
||||
"catto": 1,
|
||||
"doggo": 1,
|
||||
"whalo": 1
|
||||
},
|
||||
"statuses": {
|
||||
"canceled": 0,
|
||||
"enqueued": 0,
|
||||
"failed": 0,
|
||||
"processing": 1,
|
||||
"succeeded": 2
|
||||
},
|
||||
"types": {
|
||||
"documentAdditionOrUpdate": 0,
|
||||
"documentDeletion": 0,
|
||||
"dumpCreation": 0,
|
||||
"indexCreation": 3,
|
||||
"indexDeletion": 0,
|
||||
"indexSwap": 0,
|
||||
"indexUpdate": 0,
|
||||
"settingsUpdate": 0,
|
||||
"snapshotCreation": 0,
|
||||
"taskCancelation": 0,
|
||||
"taskDeletion": 0
|
||||
}
|
||||
}
|
||||
"###);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -167,7 +167,9 @@ macro_rules! snapshot {
|
||||
let (settings, snap_name, _) = $crate::default_snapshot_settings_for_test(test_name, Some(&snap_name));
|
||||
settings.bind(|| {
|
||||
let snap = format!("{}", $value);
|
||||
meili_snap::insta::assert_snapshot!(format!("{}", snap_name), snap);
|
||||
insta::allow_duplicates! {
|
||||
meili_snap::insta::assert_snapshot!(format!("{}", snap_name), snap);
|
||||
}
|
||||
});
|
||||
};
|
||||
($value:expr, @$inline:literal) => {
|
||||
@@ -176,7 +178,9 @@ macro_rules! snapshot {
|
||||
let (settings, _, _) = $crate::default_snapshot_settings_for_test("", Some("_dummy_argument"));
|
||||
settings.bind(|| {
|
||||
let snap = format!("{}", $value);
|
||||
meili_snap::insta::assert_snapshot!(snap, @$inline);
|
||||
insta::allow_duplicates! {
|
||||
meili_snap::insta::assert_snapshot!(snap, @$inline);
|
||||
}
|
||||
});
|
||||
};
|
||||
($value:expr) => {
|
||||
@@ -194,7 +198,9 @@ macro_rules! snapshot {
|
||||
let (settings, snap_name, _) = $crate::default_snapshot_settings_for_test(test_name, None);
|
||||
settings.bind(|| {
|
||||
let snap = format!("{}", $value);
|
||||
meili_snap::insta::assert_snapshot!(format!("{}", snap_name), snap);
|
||||
insta::allow_duplicates! {
|
||||
meili_snap::insta::assert_snapshot!(format!("{}", snap_name), snap);
|
||||
}
|
||||
});
|
||||
};
|
||||
}
|
||||
|
||||
@@ -129,6 +129,9 @@ impl HeedAuthStore {
|
||||
Action::DumpsAll => {
|
||||
actions.insert(Action::DumpsCreate);
|
||||
}
|
||||
Action::SnapshotsAll => {
|
||||
actions.insert(Action::SnapshotsCreate);
|
||||
}
|
||||
Action::TasksAll => {
|
||||
actions.extend([Action::TasksGet, Action::TasksDelete, Action::TasksCancel]);
|
||||
}
|
||||
|
||||
@@ -15,13 +15,13 @@ actix-web = { version = "4.3.1", default-features = false }
|
||||
anyhow = "1.0.70"
|
||||
convert_case = "0.6.0"
|
||||
csv = "1.2.1"
|
||||
deserr = "0.5.0"
|
||||
deserr = { version = "0.6.0", features = ["actix-web"]}
|
||||
either = { version = "1.8.1", features = ["serde"] }
|
||||
enum-iterator = "1.4.0"
|
||||
file-store = { path = "../file-store" }
|
||||
flate2 = "1.0.25"
|
||||
fst = "0.4.7"
|
||||
memmap2 = "0.5.10"
|
||||
memmap2 = "0.7.1"
|
||||
milli = { path = "../milli" }
|
||||
roaring = { version = "0.10.1", features = ["serde"] }
|
||||
serde = { version = "1.0.160", features = ["derive"] }
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
use std::borrow::Borrow;
|
||||
use std::fmt::{self, Debug, Display};
|
||||
use std::fs::File;
|
||||
use std::io::{self, Seek, Write};
|
||||
@@ -42,7 +41,7 @@ impl Display for DocumentFormatError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
Self::Io(e) => write!(f, "{e}"),
|
||||
Self::MalformedPayload(me, b) => match me.borrow() {
|
||||
Self::MalformedPayload(me, b) => match me {
|
||||
Error::Json(se) => {
|
||||
let mut message = match se.classify() {
|
||||
Category::Data => {
|
||||
|
||||
@@ -259,6 +259,9 @@ InvalidSettingsRankingRules , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsSearchableAttributes , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsSortableAttributes , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsStopWords , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsNonSeparatorTokens , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsSeparatorTokens , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsDictionary , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsSynonyms , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidSettingsTypoTolerance , InvalidRequest , BAD_REQUEST ;
|
||||
InvalidState , Internal , INTERNAL_SERVER_ERROR ;
|
||||
|
||||
@@ -257,6 +257,12 @@ pub enum Action {
|
||||
#[serde(rename = "dumps.create")]
|
||||
#[deserr(rename = "dumps.create")]
|
||||
DumpsCreate,
|
||||
#[serde(rename = "snapshots.*")]
|
||||
#[deserr(rename = "snapshots.*")]
|
||||
SnapshotsAll,
|
||||
#[serde(rename = "snapshots.create")]
|
||||
#[deserr(rename = "snapshots.create")]
|
||||
SnapshotsCreate,
|
||||
#[serde(rename = "version")]
|
||||
#[deserr(rename = "version")]
|
||||
Version,
|
||||
@@ -309,6 +315,7 @@ impl Action {
|
||||
METRICS_GET => Some(Self::MetricsGet),
|
||||
DUMPS_ALL => Some(Self::DumpsAll),
|
||||
DUMPS_CREATE => Some(Self::DumpsCreate),
|
||||
SNAPSHOTS_CREATE => Some(Self::SnapshotsCreate),
|
||||
VERSION => Some(Self::Version),
|
||||
KEYS_CREATE => Some(Self::KeysAdd),
|
||||
KEYS_GET => Some(Self::KeysGet),
|
||||
@@ -353,6 +360,7 @@ pub mod actions {
|
||||
pub const METRICS_GET: u8 = MetricsGet.repr();
|
||||
pub const DUMPS_ALL: u8 = DumpsAll.repr();
|
||||
pub const DUMPS_CREATE: u8 = DumpsCreate.repr();
|
||||
pub const SNAPSHOTS_CREATE: u8 = SnapshotsCreate.repr();
|
||||
pub const VERSION: u8 = Version.repr();
|
||||
pub const KEYS_CREATE: u8 = KeysAdd.repr();
|
||||
pub const KEYS_GET: u8 = KeysGet.repr();
|
||||
|
||||
@@ -171,6 +171,15 @@ pub struct Settings<T> {
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSettingsStopWords>)]
|
||||
pub stop_words: Setting<BTreeSet<String>>,
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSettingsNonSeparatorTokens>)]
|
||||
pub non_separator_tokens: Setting<BTreeSet<String>>,
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSettingsSeparatorTokens>)]
|
||||
pub separator_tokens: Setting<BTreeSet<String>>,
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSettingsDictionary>)]
|
||||
pub dictionary: Setting<BTreeSet<String>>,
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSettingsSynonyms>)]
|
||||
pub synonyms: Setting<BTreeMap<String, Vec<String>>>,
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
@@ -201,6 +210,9 @@ impl Settings<Checked> {
|
||||
ranking_rules: Setting::Reset,
|
||||
stop_words: Setting::Reset,
|
||||
synonyms: Setting::Reset,
|
||||
non_separator_tokens: Setting::Reset,
|
||||
separator_tokens: Setting::Reset,
|
||||
dictionary: Setting::Reset,
|
||||
distinct_attribute: Setting::Reset,
|
||||
typo_tolerance: Setting::Reset,
|
||||
faceting: Setting::Reset,
|
||||
@@ -217,6 +229,9 @@ impl Settings<Checked> {
|
||||
sortable_attributes,
|
||||
ranking_rules,
|
||||
stop_words,
|
||||
non_separator_tokens,
|
||||
separator_tokens,
|
||||
dictionary,
|
||||
synonyms,
|
||||
distinct_attribute,
|
||||
typo_tolerance,
|
||||
@@ -232,6 +247,9 @@ impl Settings<Checked> {
|
||||
sortable_attributes,
|
||||
ranking_rules,
|
||||
stop_words,
|
||||
non_separator_tokens,
|
||||
separator_tokens,
|
||||
dictionary,
|
||||
synonyms,
|
||||
distinct_attribute,
|
||||
typo_tolerance,
|
||||
@@ -274,6 +292,9 @@ impl Settings<Unchecked> {
|
||||
ranking_rules: self.ranking_rules,
|
||||
stop_words: self.stop_words,
|
||||
synonyms: self.synonyms,
|
||||
non_separator_tokens: self.non_separator_tokens,
|
||||
separator_tokens: self.separator_tokens,
|
||||
dictionary: self.dictionary,
|
||||
distinct_attribute: self.distinct_attribute,
|
||||
typo_tolerance: self.typo_tolerance,
|
||||
faceting: self.faceting,
|
||||
@@ -335,6 +356,28 @@ pub fn apply_settings_to_builder(
|
||||
Setting::NotSet => (),
|
||||
}
|
||||
|
||||
match settings.non_separator_tokens {
|
||||
Setting::Set(ref non_separator_tokens) => {
|
||||
builder.set_non_separator_tokens(non_separator_tokens.clone())
|
||||
}
|
||||
Setting::Reset => builder.reset_non_separator_tokens(),
|
||||
Setting::NotSet => (),
|
||||
}
|
||||
|
||||
match settings.separator_tokens {
|
||||
Setting::Set(ref separator_tokens) => {
|
||||
builder.set_separator_tokens(separator_tokens.clone())
|
||||
}
|
||||
Setting::Reset => builder.reset_separator_tokens(),
|
||||
Setting::NotSet => (),
|
||||
}
|
||||
|
||||
match settings.dictionary {
|
||||
Setting::Set(ref dictionary) => builder.set_dictionary(dictionary.clone()),
|
||||
Setting::Reset => builder.reset_dictionary(),
|
||||
Setting::NotSet => (),
|
||||
}
|
||||
|
||||
match settings.synonyms {
|
||||
Setting::Set(ref synonyms) => builder.set_synonyms(synonyms.clone().into_iter().collect()),
|
||||
Setting::Reset => builder.reset_synonyms(),
|
||||
@@ -459,15 +502,14 @@ pub fn settings(
|
||||
})
|
||||
.transpose()?
|
||||
.unwrap_or_default();
|
||||
|
||||
let non_separator_tokens = index.non_separator_tokens(rtxn)?.unwrap_or_default();
|
||||
let separator_tokens = index.separator_tokens(rtxn)?.unwrap_or_default();
|
||||
let dictionary = index.dictionary(rtxn)?.unwrap_or_default();
|
||||
|
||||
let distinct_field = index.distinct_field(rtxn)?.map(String::from);
|
||||
|
||||
// in milli each word in the synonyms map were split on their separator. Since we lost
|
||||
// this information we are going to put space between words.
|
||||
let synonyms = index
|
||||
.synonyms(rtxn)?
|
||||
.iter()
|
||||
.map(|(key, values)| (key.join(" "), values.iter().map(|value| value.join(" ")).collect()))
|
||||
.collect();
|
||||
let synonyms = index.user_defined_synonyms(rtxn)?;
|
||||
|
||||
let min_typo_word_len = MinWordSizeTyposSetting {
|
||||
one_typo: Setting::Set(index.min_word_len_one_typo(rtxn)?),
|
||||
@@ -520,6 +562,9 @@ pub fn settings(
|
||||
sortable_attributes: Setting::Set(sortable_attributes),
|
||||
ranking_rules: Setting::Set(criteria.iter().map(|c| c.clone().into()).collect()),
|
||||
stop_words: Setting::Set(stop_words),
|
||||
non_separator_tokens: Setting::Set(non_separator_tokens),
|
||||
separator_tokens: Setting::Set(separator_tokens),
|
||||
dictionary: Setting::Set(dictionary),
|
||||
distinct_attribute: match distinct_field {
|
||||
Some(field) => Setting::Set(field),
|
||||
None => Setting::Reset,
|
||||
@@ -642,6 +687,9 @@ pub(crate) mod test {
|
||||
sortable_attributes: Setting::NotSet,
|
||||
ranking_rules: Setting::NotSet,
|
||||
stop_words: Setting::NotSet,
|
||||
non_separator_tokens: Setting::NotSet,
|
||||
separator_tokens: Setting::NotSet,
|
||||
dictionary: Setting::NotSet,
|
||||
synonyms: Setting::NotSet,
|
||||
distinct_attribute: Setting::NotSet,
|
||||
typo_tolerance: Setting::NotSet,
|
||||
@@ -663,6 +711,9 @@ pub(crate) mod test {
|
||||
sortable_attributes: Setting::NotSet,
|
||||
ranking_rules: Setting::NotSet,
|
||||
stop_words: Setting::NotSet,
|
||||
non_separator_tokens: Setting::NotSet,
|
||||
separator_tokens: Setting::NotSet,
|
||||
dictionary: Setting::NotSet,
|
||||
synonyms: Setting::NotSet,
|
||||
distinct_attribute: Setting::NotSet,
|
||||
typo_tolerance: Setting::NotSet,
|
||||
|
||||
@@ -39,7 +39,7 @@ byte-unit = { version = "4.0.19", default-features = false, features = [
|
||||
bytes = "1.4.0"
|
||||
clap = { version = "4.2.1", features = ["derive", "env"] }
|
||||
crossbeam-channel = "0.5.8"
|
||||
deserr = "0.5.0"
|
||||
deserr = { version = "0.6.0", features = ["actix-web"]}
|
||||
dump = { path = "../dump" }
|
||||
either = "1.8.1"
|
||||
env_logger = "0.10.0"
|
||||
@@ -50,9 +50,9 @@ futures = "0.3.28"
|
||||
futures-util = "0.3.28"
|
||||
http = "0.2.9"
|
||||
index-scheduler = { path = "../index-scheduler" }
|
||||
indexmap = { version = "1.9.3", features = ["serde-1"] }
|
||||
indexmap = { version = "2.0.0", features = ["serde"] }
|
||||
is-terminal = "0.4.8"
|
||||
itertools = "0.10.5"
|
||||
itertools = "0.11.0"
|
||||
jsonwebtoken = "8.3.0"
|
||||
lazy_static = "1.4.0"
|
||||
log = "0.4.17"
|
||||
@@ -87,7 +87,7 @@ sha2 = "0.10.6"
|
||||
siphasher = "0.3.10"
|
||||
slice-group-by = "0.3.0"
|
||||
static-files = { version = "0.2.3", optional = true }
|
||||
sysinfo = "0.28.4"
|
||||
sysinfo = "0.29.7"
|
||||
tar = "0.4.38"
|
||||
tempfile = "3.5.0"
|
||||
thiserror = "1.0.40"
|
||||
|
||||
@@ -20,7 +20,7 @@ pub struct SearchAggregator;
|
||||
#[allow(dead_code)]
|
||||
impl SearchAggregator {
|
||||
pub fn from_query(_: &dyn Any, _: &dyn Any) -> Self {
|
||||
Self::default()
|
||||
Self
|
||||
}
|
||||
|
||||
pub fn succeed(&mut self, _: &dyn Any) {}
|
||||
@@ -32,7 +32,7 @@ pub struct MultiSearchAggregator;
|
||||
#[allow(dead_code)]
|
||||
impl MultiSearchAggregator {
|
||||
pub fn from_queries(_: &dyn Any, _: &dyn Any) -> Self {
|
||||
Self::default()
|
||||
Self
|
||||
}
|
||||
|
||||
pub fn succeed(&mut self) {}
|
||||
@@ -44,7 +44,7 @@ pub struct FacetSearchAggregator;
|
||||
#[allow(dead_code)]
|
||||
impl FacetSearchAggregator {
|
||||
pub fn from_query(_: &dyn Any, _: &dyn Any) -> Self {
|
||||
Self::default()
|
||||
Self
|
||||
}
|
||||
|
||||
pub fn succeed(&mut self, _: &dyn Any) {}
|
||||
|
||||
@@ -310,6 +310,81 @@ make_setting_route!(
|
||||
}
|
||||
);
|
||||
|
||||
make_setting_route!(
|
||||
"/non-separator-tokens",
|
||||
put,
|
||||
std::collections::BTreeSet<String>,
|
||||
meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsNonSeparatorTokens,
|
||||
>,
|
||||
non_separator_tokens,
|
||||
"nonSeparatorTokens",
|
||||
analytics,
|
||||
|non_separator_tokens: &Option<std::collections::BTreeSet<String>>, req: &HttpRequest| {
|
||||
use serde_json::json;
|
||||
|
||||
analytics.publish(
|
||||
"nonSeparatorTokens Updated".to_string(),
|
||||
json!({
|
||||
"non_separator_tokens": {
|
||||
"total": non_separator_tokens.as_ref().map(|non_separator_tokens| non_separator_tokens.len()),
|
||||
},
|
||||
}),
|
||||
Some(req),
|
||||
);
|
||||
}
|
||||
);
|
||||
|
||||
make_setting_route!(
|
||||
"/separator-tokens",
|
||||
put,
|
||||
std::collections::BTreeSet<String>,
|
||||
meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsSeparatorTokens,
|
||||
>,
|
||||
separator_tokens,
|
||||
"separatorTokens",
|
||||
analytics,
|
||||
|separator_tokens: &Option<std::collections::BTreeSet<String>>, req: &HttpRequest| {
|
||||
use serde_json::json;
|
||||
|
||||
analytics.publish(
|
||||
"separatorTokens Updated".to_string(),
|
||||
json!({
|
||||
"separator_tokens": {
|
||||
"total": separator_tokens.as_ref().map(|separator_tokens| separator_tokens.len()),
|
||||
},
|
||||
}),
|
||||
Some(req),
|
||||
);
|
||||
}
|
||||
);
|
||||
|
||||
make_setting_route!(
|
||||
"/dictionary",
|
||||
put,
|
||||
std::collections::BTreeSet<String>,
|
||||
meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsDictionary,
|
||||
>,
|
||||
dictionary,
|
||||
"dictionary",
|
||||
analytics,
|
||||
|dictionary: &Option<std::collections::BTreeSet<String>>, req: &HttpRequest| {
|
||||
use serde_json::json;
|
||||
|
||||
analytics.publish(
|
||||
"dictionary Updated".to_string(),
|
||||
json!({
|
||||
"dictionary": {
|
||||
"total": dictionary.as_ref().map(|dictionary| dictionary.len()),
|
||||
},
|
||||
}),
|
||||
Some(req),
|
||||
);
|
||||
}
|
||||
);
|
||||
|
||||
make_setting_route!(
|
||||
"/synonyms",
|
||||
put,
|
||||
@@ -466,6 +541,9 @@ generate_configure!(
|
||||
searchable_attributes,
|
||||
distinct_attribute,
|
||||
stop_words,
|
||||
separator_tokens,
|
||||
non_separator_tokens,
|
||||
dictionary,
|
||||
synonyms,
|
||||
ranking_rules,
|
||||
typo_tolerance,
|
||||
|
||||
@@ -24,6 +24,7 @@ pub mod features;
|
||||
pub mod indexes;
|
||||
mod metrics;
|
||||
mod multi_search;
|
||||
mod snapshot;
|
||||
mod swap_indexes;
|
||||
pub mod tasks;
|
||||
|
||||
@@ -32,6 +33,7 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
.service(web::resource("/health").route(web::get().to(get_health)))
|
||||
.service(web::scope("/keys").configure(api_key::configure))
|
||||
.service(web::scope("/dumps").configure(dump::configure))
|
||||
.service(web::scope("/snapshots").configure(snapshot::configure))
|
||||
.service(web::resource("/stats").route(web::get().to(get_stats)))
|
||||
.service(web::resource("/version").route(web::get().to(get_version)))
|
||||
.service(web::scope("/indexes").configure(indexes::configure))
|
||||
|
||||
32
meilisearch/src/routes/snapshot.rs
Normal file
32
meilisearch/src/routes/snapshot.rs
Normal file
@@ -0,0 +1,32 @@
|
||||
use actix_web::web::Data;
|
||||
use actix_web::{web, HttpRequest, HttpResponse};
|
||||
use index_scheduler::IndexScheduler;
|
||||
use log::debug;
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::tasks::KindWithContent;
|
||||
use serde_json::json;
|
||||
|
||||
use crate::analytics::Analytics;
|
||||
use crate::extractors::authentication::policies::*;
|
||||
use crate::extractors::authentication::GuardedData;
|
||||
use crate::extractors::sequential_extractor::SeqHandler;
|
||||
use crate::routes::SummarizedTaskView;
|
||||
|
||||
pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
cfg.service(web::resource("").route(web::post().to(SeqHandler(create_snapshot))));
|
||||
}
|
||||
|
||||
pub async fn create_snapshot(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::SNAPSHOTS_CREATE }>, Data<IndexScheduler>>,
|
||||
req: HttpRequest,
|
||||
analytics: web::Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
analytics.publish("Snapshot Created".to_string(), json!({}), Some(&req));
|
||||
|
||||
let task = KindWithContent::SnapshotCreation;
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??.into();
|
||||
|
||||
debug!("returns: {:?}", task);
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
}
|
||||
@@ -491,6 +491,20 @@ pub fn perform_search(
|
||||
tokenizer_builder.allow_list(&script_lang_map);
|
||||
}
|
||||
|
||||
let separators = index.allowed_separators(&rtxn)?;
|
||||
let separators: Option<Vec<_>> =
|
||||
separators.as_ref().map(|x| x.iter().map(String::as_str).collect());
|
||||
if let Some(ref separators) = separators {
|
||||
tokenizer_builder.separators(separators);
|
||||
}
|
||||
|
||||
let dictionary = index.dictionary(&rtxn)?;
|
||||
let dictionary: Option<Vec<_>> =
|
||||
dictionary.as_ref().map(|x| x.iter().map(String::as_str).collect());
|
||||
if let Some(ref dictionary) = dictionary {
|
||||
tokenizer_builder.words_dict(dictionary);
|
||||
}
|
||||
|
||||
let mut formatter_builder = MatcherBuilder::new(matching_words, tokenizer_builder.build());
|
||||
formatter_builder.crop_marker(query.crop_marker);
|
||||
formatter_builder.highlight_prefix(query.highlight_pre_tag);
|
||||
@@ -666,6 +680,7 @@ fn compute_semantic_score(query: &[f32], vectors: Value) -> milli::Result<Option
|
||||
.map_err(InternalError::SerdeJson)?;
|
||||
Ok(vectors
|
||||
.into_iter()
|
||||
.flatten()
|
||||
.map(|v| OrderedFloat(dot_product_similarity(query, &v)))
|
||||
.max()
|
||||
.map(OrderedFloat::into_inner))
|
||||
|
||||
@@ -156,6 +156,10 @@ impl Server {
|
||||
self.service.post("/dumps", json!(null)).await
|
||||
}
|
||||
|
||||
pub async fn create_snapshot(&self) -> (Value, StatusCode) {
|
||||
self.service.post("/snapshots", json!(null)).await
|
||||
}
|
||||
|
||||
pub async fn index_swap(&self, value: Value) -> (Value, StatusCode) {
|
||||
self.service.post("/swap-indexes", value).await
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,3 +1,4 @@
|
||||
use meili_snap::{json_string, snapshot};
|
||||
use once_cell::sync::Lazy;
|
||||
use serde_json::{json, Value};
|
||||
|
||||
@@ -60,3 +61,59 @@ async fn geo_sort_with_geo_strings() {
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn geo_bounding_box_with_string_and_number() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
index.update_settings_filterable_attributes(json!(["_geo"])).await;
|
||||
index.update_settings_sortable_attributes(json!(["_geo"])).await;
|
||||
index.add_documents(documents, None).await;
|
||||
index.wait_task(2).await;
|
||||
|
||||
index
|
||||
.search(
|
||||
json!({
|
||||
"filter": "_geoBoundingBox([89, 179], [-89, -179])",
|
||||
}),
|
||||
|response, code| {
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
snapshot!(json_string!(response, { ".processingTimeMs" => "[time]" }), @r###"
|
||||
{
|
||||
"hits": [
|
||||
{
|
||||
"id": 1,
|
||||
"name": "Taco Truck",
|
||||
"address": "444 Salsa Street, Burritoville",
|
||||
"type": "Mexican",
|
||||
"rating": 9,
|
||||
"_geo": {
|
||||
"lat": 34.0522,
|
||||
"lng": -118.2437
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"name": "La Bella Italia",
|
||||
"address": "456 Elm Street, Townsville",
|
||||
"type": "Italian",
|
||||
"rating": 9,
|
||||
"_geo": {
|
||||
"lat": "45.4777599",
|
||||
"lng": "9.1967508"
|
||||
}
|
||||
}
|
||||
],
|
||||
"query": "",
|
||||
"processingTimeMs": "[time]",
|
||||
"limit": 20,
|
||||
"offset": 0,
|
||||
"estimatedTotalHits": 2
|
||||
}
|
||||
"###);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
@@ -16,6 +16,9 @@ static DEFAULT_SETTINGS_VALUES: Lazy<HashMap<&'static str, Value>> = Lazy::new(|
|
||||
json!(["words", "typo", "proximity", "attribute", "sort", "exactness"]),
|
||||
);
|
||||
map.insert("stop_words", json!([]));
|
||||
map.insert("non_separator_tokens", json!([]));
|
||||
map.insert("separator_tokens", json!([]));
|
||||
map.insert("dictionary", json!([]));
|
||||
map.insert("synonyms", json!({}));
|
||||
map.insert(
|
||||
"faceting",
|
||||
@@ -51,7 +54,7 @@ async fn get_settings() {
|
||||
let (response, code) = index.settings().await;
|
||||
assert_eq!(code, 200);
|
||||
let settings = response.as_object().unwrap();
|
||||
assert_eq!(settings.keys().len(), 11);
|
||||
assert_eq!(settings.keys().len(), 14);
|
||||
assert_eq!(settings["displayedAttributes"], json!(["*"]));
|
||||
assert_eq!(settings["searchableAttributes"], json!(["*"]));
|
||||
assert_eq!(settings["filterableAttributes"], json!([]));
|
||||
@@ -62,6 +65,9 @@ async fn get_settings() {
|
||||
json!(["words", "typo", "proximity", "attribute", "sort", "exactness"])
|
||||
);
|
||||
assert_eq!(settings["stopWords"], json!([]));
|
||||
assert_eq!(settings["nonSeparatorTokens"], json!([]));
|
||||
assert_eq!(settings["separatorTokens"], json!([]));
|
||||
assert_eq!(settings["dictionary"], json!([]));
|
||||
assert_eq!(
|
||||
settings["faceting"],
|
||||
json!({
|
||||
@@ -272,6 +278,9 @@ test_setting_routes!(
|
||||
searchable_attributes put,
|
||||
distinct_attribute put,
|
||||
stop_words put,
|
||||
separator_tokens put,
|
||||
non_separator_tokens put,
|
||||
dictionary put,
|
||||
ranking_rules put,
|
||||
synonyms put,
|
||||
pagination patch,
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
mod distinct;
|
||||
mod errors;
|
||||
mod get_settings;
|
||||
mod tokenizer_customization;
|
||||
|
||||
467
meilisearch/tests/settings/tokenizer_customization.rs
Normal file
467
meilisearch/tests/settings/tokenizer_customization.rs
Normal file
@@ -0,0 +1,467 @@
|
||||
use meili_snap::{json_string, snapshot};
|
||||
use serde_json::json;
|
||||
|
||||
use crate::common::Server;
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn set_and_reset() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
|
||||
let (_response, _code) = index
|
||||
.update_settings(json!({
|
||||
"nonSeparatorTokens": ["#", "&"],
|
||||
"separatorTokens": ["&sep", "<br/>"],
|
||||
"dictionary": ["J.R.R.", "J. R. R."],
|
||||
}))
|
||||
.await;
|
||||
index.wait_task(0).await;
|
||||
|
||||
let (response, _) = index.settings().await;
|
||||
snapshot!(json_string!(response["nonSeparatorTokens"]), @r###"
|
||||
[
|
||||
"#",
|
||||
"&"
|
||||
]
|
||||
"###);
|
||||
snapshot!(json_string!(response["separatorTokens"]), @r###"
|
||||
[
|
||||
"&sep",
|
||||
"<br/>"
|
||||
]
|
||||
"###);
|
||||
snapshot!(json_string!(response["dictionary"]), @r###"
|
||||
[
|
||||
"J. R. R.",
|
||||
"J.R.R."
|
||||
]
|
||||
"###);
|
||||
|
||||
index
|
||||
.update_settings(json!({
|
||||
"nonSeparatorTokens": null,
|
||||
"separatorTokens": null,
|
||||
"dictionary": null,
|
||||
}))
|
||||
.await;
|
||||
|
||||
index.wait_task(1).await;
|
||||
|
||||
let (response, _) = index.settings().await;
|
||||
snapshot!(json_string!(response["nonSeparatorTokens"]), @"[]");
|
||||
snapshot!(json_string!(response["separatorTokens"]), @"[]");
|
||||
snapshot!(json_string!(response["dictionary"]), @"[]");
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn set_and_search() {
|
||||
let documents = json!([
|
||||
{
|
||||
"id": 1,
|
||||
"content": "Mac & cheese",
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"content": "G#D#G#D#G#C#D#G#C#",
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"content": "Mac&sep&&sepcheese",
|
||||
},
|
||||
]);
|
||||
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
|
||||
index.add_documents(documents, None).await;
|
||||
index.wait_task(0).await;
|
||||
|
||||
let (_response, _code) = index
|
||||
.update_settings(json!({
|
||||
"nonSeparatorTokens": ["#", "&"],
|
||||
"separatorTokens": ["<br/>", "&sep"],
|
||||
"dictionary": ["#", "A#", "B#", "C#", "D#", "E#", "F#", "G#"],
|
||||
}))
|
||||
.await;
|
||||
index.wait_task(1).await;
|
||||
|
||||
index
|
||||
.search(json!({"q": "&", "attributesToHighlight": ["content"]}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"id": 1,
|
||||
"content": "Mac & cheese",
|
||||
"_formatted": {
|
||||
"id": "1",
|
||||
"content": "Mac <em>&</em> cheese"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"content": "Mac&sep&&sepcheese",
|
||||
"_formatted": {
|
||||
"id": "3",
|
||||
"content": "Mac&sep<em>&</em>&sepcheese"
|
||||
}
|
||||
}
|
||||
]
|
||||
"###);
|
||||
})
|
||||
.await;
|
||||
|
||||
index
|
||||
.search(
|
||||
json!({"q": "Mac & cheese", "attributesToHighlight": ["content"]}),
|
||||
|response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"id": 1,
|
||||
"content": "Mac & cheese",
|
||||
"_formatted": {
|
||||
"id": "1",
|
||||
"content": "<em>Mac</em> <em>&</em> <em>cheese</em>"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"content": "Mac&sep&&sepcheese",
|
||||
"_formatted": {
|
||||
"id": "3",
|
||||
"content": "<em>Mac</em>&sep<em>&</em>&sep<em>cheese</em>"
|
||||
}
|
||||
}
|
||||
]
|
||||
"###);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
||||
index
|
||||
.search(
|
||||
json!({"q": "Mac&sep&&sepcheese", "attributesToHighlight": ["content"]}),
|
||||
|response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"id": 1,
|
||||
"content": "Mac & cheese",
|
||||
"_formatted": {
|
||||
"id": "1",
|
||||
"content": "<em>Mac</em> <em>&</em> <em>cheese</em>"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"content": "Mac&sep&&sepcheese",
|
||||
"_formatted": {
|
||||
"id": "3",
|
||||
"content": "<em>Mac</em>&sep<em>&</em>&sep<em>cheese</em>"
|
||||
}
|
||||
}
|
||||
]
|
||||
"###);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
||||
index
|
||||
.search(json!({"q": "C#D#G", "attributesToHighlight": ["content"]}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"id": 2,
|
||||
"content": "G#D#G#D#G#C#D#G#C#",
|
||||
"_formatted": {
|
||||
"id": "2",
|
||||
"content": "<em>G</em>#<em>D#</em><em>G</em>#<em>D#</em><em>G</em>#<em>C#</em><em>D#</em><em>G</em>#<em>C#</em>"
|
||||
}
|
||||
}
|
||||
]
|
||||
"###);
|
||||
})
|
||||
.await;
|
||||
|
||||
index
|
||||
.search(json!({"q": "#", "attributesToHighlight": ["content"]}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @"[]");
|
||||
})
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn advanced_synergies() {
|
||||
let documents = json!([
|
||||
{
|
||||
"id": 1,
|
||||
"content": "J.R.R. Tolkien",
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"content": "J. R. R. Tolkien",
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"content": "jrr Tolkien",
|
||||
},
|
||||
{
|
||||
"id": 4,
|
||||
"content": "J.K. Rowlings",
|
||||
},
|
||||
{
|
||||
"id": 5,
|
||||
"content": "J. K. Rowlings",
|
||||
},
|
||||
{
|
||||
"id": 6,
|
||||
"content": "jk Rowlings",
|
||||
},
|
||||
]);
|
||||
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
|
||||
index.add_documents(documents, None).await;
|
||||
index.wait_task(0).await;
|
||||
|
||||
let (_response, _code) = index
|
||||
.update_settings(json!({
|
||||
"dictionary": ["J.R.R.", "J. R. R."],
|
||||
"synonyms": {
|
||||
"J.R.R.": ["jrr", "J. R. R."],
|
||||
"J. R. R.": ["jrr", "J.R.R."],
|
||||
"jrr": ["J.R.R.", "J. R. R."],
|
||||
"J.K.": ["jk", "J. K."],
|
||||
"J. K.": ["jk", "J.K."],
|
||||
"jk": ["J.K.", "J. K."],
|
||||
}
|
||||
}))
|
||||
.await;
|
||||
index.wait_task(1).await;
|
||||
|
||||
index
|
||||
.search(json!({"q": "J.R.R.", "attributesToHighlight": ["content"]}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"id": 1,
|
||||
"content": "J.R.R. Tolkien",
|
||||
"_formatted": {
|
||||
"id": "1",
|
||||
"content": "<em>J.R.R.</em> Tolkien"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"content": "J. R. R. Tolkien",
|
||||
"_formatted": {
|
||||
"id": "2",
|
||||
"content": "<em>J. R. R.</em> Tolkien"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"content": "jrr Tolkien",
|
||||
"_formatted": {
|
||||
"id": "3",
|
||||
"content": "<em>jrr</em> Tolkien"
|
||||
}
|
||||
}
|
||||
]
|
||||
"###);
|
||||
})
|
||||
.await;
|
||||
|
||||
index
|
||||
.search(json!({"q": "jrr", "attributesToHighlight": ["content"]}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"id": 3,
|
||||
"content": "jrr Tolkien",
|
||||
"_formatted": {
|
||||
"id": "3",
|
||||
"content": "<em>jrr</em> Tolkien"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 1,
|
||||
"content": "J.R.R. Tolkien",
|
||||
"_formatted": {
|
||||
"id": "1",
|
||||
"content": "<em>J.R.R.</em> Tolkien"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"content": "J. R. R. Tolkien",
|
||||
"_formatted": {
|
||||
"id": "2",
|
||||
"content": "<em>J. R. R.</em> Tolkien"
|
||||
}
|
||||
}
|
||||
]
|
||||
"###);
|
||||
})
|
||||
.await;
|
||||
|
||||
index
|
||||
.search(json!({"q": "J. R. R.", "attributesToHighlight": ["content"]}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"id": 2,
|
||||
"content": "J. R. R. Tolkien",
|
||||
"_formatted": {
|
||||
"id": "2",
|
||||
"content": "<em>J. R. R.</em> Tolkien"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 1,
|
||||
"content": "J.R.R. Tolkien",
|
||||
"_formatted": {
|
||||
"id": "1",
|
||||
"content": "<em>J.R.R.</em> Tolkien"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"content": "jrr Tolkien",
|
||||
"_formatted": {
|
||||
"id": "3",
|
||||
"content": "<em>jrr</em> Tolkien"
|
||||
}
|
||||
}
|
||||
]
|
||||
"###);
|
||||
})
|
||||
.await;
|
||||
|
||||
// Only update dictionary, the synonyms should be recomputed.
|
||||
let (_response, _code) = index
|
||||
.update_settings(json!({
|
||||
"dictionary": ["J.R.R.", "J. R. R.", "J.K.", "J. K."],
|
||||
}))
|
||||
.await;
|
||||
index.wait_task(2).await;
|
||||
|
||||
index
|
||||
.search(json!({"q": "jk", "attributesToHighlight": ["content"]}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"id": 6,
|
||||
"content": "jk Rowlings",
|
||||
"_formatted": {
|
||||
"id": "6",
|
||||
"content": "<em>jk</em> Rowlings"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 4,
|
||||
"content": "J.K. Rowlings",
|
||||
"_formatted": {
|
||||
"id": "4",
|
||||
"content": "<em>J.K.</em> Rowlings"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 5,
|
||||
"content": "J. K. Rowlings",
|
||||
"_formatted": {
|
||||
"id": "5",
|
||||
"content": "<em>J. K.</em> Rowlings"
|
||||
}
|
||||
}
|
||||
]
|
||||
"###);
|
||||
})
|
||||
.await;
|
||||
|
||||
index
|
||||
.search(json!({"q": "J.K.", "attributesToHighlight": ["content"]}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"id": 4,
|
||||
"content": "J.K. Rowlings",
|
||||
"_formatted": {
|
||||
"id": "4",
|
||||
"content": "<em>J.K.</em> Rowlings"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 5,
|
||||
"content": "J. K. Rowlings",
|
||||
"_formatted": {
|
||||
"id": "5",
|
||||
"content": "<em>J. K.</em> Rowlings"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 6,
|
||||
"content": "jk Rowlings",
|
||||
"_formatted": {
|
||||
"id": "6",
|
||||
"content": "<em>jk</em> Rowlings"
|
||||
}
|
||||
}
|
||||
]
|
||||
"###);
|
||||
})
|
||||
.await;
|
||||
|
||||
index
|
||||
.search(json!({"q": "J. K.", "attributesToHighlight": ["content"]}), |response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"id": 5,
|
||||
"content": "J. K. Rowlings",
|
||||
"_formatted": {
|
||||
"id": "5",
|
||||
"content": "<em>J. K.</em> Rowlings"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 4,
|
||||
"content": "J.K. Rowlings",
|
||||
"_formatted": {
|
||||
"id": "4",
|
||||
"content": "<em>J.K.</em> Rowlings"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 6,
|
||||
"content": "jk Rowlings",
|
||||
"_formatted": {
|
||||
"id": "6",
|
||||
"content": "<em>jk</em> Rowlings"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"content": "J. R. R. Tolkien",
|
||||
"_formatted": {
|
||||
"id": "2",
|
||||
"content": "<em>J. R.</em> R. Tolkien"
|
||||
}
|
||||
}
|
||||
]
|
||||
"###);
|
||||
})
|
||||
.await;
|
||||
}
|
||||
@@ -1,6 +1,7 @@
|
||||
use std::time::Duration;
|
||||
|
||||
use actix_rt::time::sleep;
|
||||
use meili_snap::{json_string, snapshot};
|
||||
use meilisearch::option::ScheduleSnapshot;
|
||||
use meilisearch::Opt;
|
||||
|
||||
@@ -90,3 +91,97 @@ async fn perform_snapshot() {
|
||||
server.index("test1").settings(),
|
||||
);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn perform_on_demand_snapshot() {
|
||||
let temp = tempfile::tempdir().unwrap();
|
||||
let snapshot_dir = tempfile::tempdir().unwrap();
|
||||
|
||||
let options =
|
||||
Opt { snapshot_dir: snapshot_dir.path().to_owned(), ..default_settings(temp.path()) };
|
||||
|
||||
let server = Server::new_with_options(options).await.unwrap();
|
||||
|
||||
let index = server.index("catto");
|
||||
index
|
||||
.update_settings(serde_json::json! ({
|
||||
"searchableAttributes": [],
|
||||
}))
|
||||
.await;
|
||||
|
||||
index.load_test_set().await;
|
||||
|
||||
server.index("doggo").create(Some("bone")).await;
|
||||
index.wait_task(2).await;
|
||||
|
||||
server.index("doggo").create(Some("bone")).await;
|
||||
index.wait_task(2).await;
|
||||
|
||||
let (task, code) = server.create_snapshot().await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
snapshot!(json_string!(task, { ".enqueuedAt" => "[date]" }), @r###"
|
||||
{
|
||||
"taskUid": 4,
|
||||
"indexUid": null,
|
||||
"status": "enqueued",
|
||||
"type": "snapshotCreation",
|
||||
"enqueuedAt": "[date]"
|
||||
}
|
||||
"###);
|
||||
let task = index.wait_task(3).await;
|
||||
snapshot!(json_string!(task, { ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]" }), @r###"
|
||||
{
|
||||
"uid": 3,
|
||||
"indexUid": "doggo",
|
||||
"status": "failed",
|
||||
"type": "indexCreation",
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"primaryKey": "bone"
|
||||
},
|
||||
"error": {
|
||||
"message": "Index `doggo` already exists.",
|
||||
"code": "index_already_exists",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#index_already_exists"
|
||||
},
|
||||
"duration": "[duration]",
|
||||
"enqueuedAt": "[date]",
|
||||
"startedAt": "[date]",
|
||||
"finishedAt": "[date]"
|
||||
}
|
||||
"###);
|
||||
|
||||
let temp = tempfile::tempdir().unwrap();
|
||||
|
||||
let snapshot_path = snapshot_dir.path().to_owned().join("db.snapshot");
|
||||
#[cfg_attr(windows, allow(unused))]
|
||||
let snapshot_meta = std::fs::metadata(&snapshot_path).unwrap();
|
||||
|
||||
#[cfg(unix)]
|
||||
{
|
||||
use std::os::unix::fs::PermissionsExt;
|
||||
let mode = snapshot_meta.permissions().mode();
|
||||
// rwxrwxrwx
|
||||
meili_snap::snapshot!(format!("{:b}", mode), @"1000000100100100");
|
||||
}
|
||||
|
||||
let options = Opt { import_snapshot: Some(snapshot_path), ..default_settings(temp.path()) };
|
||||
|
||||
let snapshot_server = Server::new_with_options(options).await.unwrap();
|
||||
|
||||
verify_snapshot!(server, snapshot_server, |server| =>
|
||||
server.list_indexes(None, None),
|
||||
// for some reason the db sizes differ. this may be due to the compaction options we have
|
||||
// set when performing the snapshot
|
||||
//server.stats(),
|
||||
|
||||
// The original instance contains the snapshotCreation task, while the snapshotted-instance does not. For this reason we need to compare the task queue **after** the task 4
|
||||
server.tasks_filter("?from=2"),
|
||||
|
||||
server.index("test").get_all_documents(GetAllDocumentsOptions::default()),
|
||||
server.index("test").settings(),
|
||||
server.index("test1").get_all_documents(GetAllDocumentsOptions::default()),
|
||||
server.index("test1").settings(),
|
||||
);
|
||||
}
|
||||
|
||||
@@ -17,10 +17,10 @@ bincode = "1.3.3"
|
||||
bstr = "1.4.0"
|
||||
bytemuck = { version = "1.13.1", features = ["extern_crate_alloc"] }
|
||||
byteorder = "1.4.3"
|
||||
charabia = { version = "0.8.2", default-features = false }
|
||||
charabia = { version = "0.8.3", default-features = false }
|
||||
concat-arrays = "0.1.2"
|
||||
crossbeam-channel = "0.5.8"
|
||||
deserr = "0.5.0"
|
||||
deserr = { version = "0.6.0", features = ["actix-web"]}
|
||||
either = { version = "1.8.1", features = ["serde"] }
|
||||
flatten-serde-json = { path = "../flatten-serde-json" }
|
||||
fst = "0.4.7"
|
||||
@@ -32,18 +32,18 @@ grenad = { version = "0.4.4", default-features = false, features = [
|
||||
heed = { git = "https://github.com/meilisearch/heed", tag = "v0.12.7", default-features = false, features = [
|
||||
"lmdb", "read-txn-no-tls"
|
||||
] }
|
||||
indexmap = { version = "1.9.3", features = ["serde"] }
|
||||
indexmap = { version = "2.0.0", features = ["serde"] }
|
||||
instant-distance = { version = "0.6.1", features = ["with-serde"] }
|
||||
json-depth-checker = { path = "../json-depth-checker" }
|
||||
levenshtein_automata = { version = "0.2.1", features = ["fst_automaton"] }
|
||||
memmap2 = "0.5.10"
|
||||
memmap2 = "0.7.1"
|
||||
obkv = "0.2.0"
|
||||
once_cell = "1.17.1"
|
||||
ordered-float = "3.6.0"
|
||||
rand_pcg = { version = "0.3.1", features = ["serde1"] }
|
||||
rayon = "1.7.0"
|
||||
roaring = "0.10.1"
|
||||
rstar = { version = "0.10.0", features = ["serde"] }
|
||||
rstar = { version = "0.11.0", features = ["serde"] }
|
||||
serde = { version = "1.0.160", features = ["derive"] }
|
||||
serde_json = { version = "1.0.95", features = ["preserve_order"] }
|
||||
slice-group-by = "0.3.0"
|
||||
@@ -63,7 +63,7 @@ uuid = { version = "1.3.1", features = ["v4"] }
|
||||
filter-parser = { path = "../filter-parser" }
|
||||
|
||||
# documents words self-join
|
||||
itertools = "0.10.5"
|
||||
itertools = "0.11.0"
|
||||
|
||||
# profiling
|
||||
puffin = "0.16.0"
|
||||
|
||||
@@ -122,22 +122,28 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco
|
||||
.field,
|
||||
match .valid_fields.is_empty() {
|
||||
true => "This index does not have configured sortable attributes.".to_string(),
|
||||
false => format!("Available sortable attributes are: `{}`.",
|
||||
valid_fields.iter().map(AsRef::as_ref).collect::<Vec<&str>>().join(", ")
|
||||
false => format!("Available sortable attributes are: `{}{}`.",
|
||||
valid_fields.iter().map(AsRef::as_ref).collect::<Vec<&str>>().join(", "),
|
||||
.hidden_fields.then_some(", <..hidden-attributes>").unwrap_or(""),
|
||||
),
|
||||
}
|
||||
)]
|
||||
InvalidSortableAttribute { field: String, valid_fields: BTreeSet<String> },
|
||||
InvalidSortableAttribute { field: String, valid_fields: BTreeSet<String>, hidden_fields: bool },
|
||||
#[error("Attribute `{}` is not facet-searchable. {}",
|
||||
.field,
|
||||
match .valid_fields.is_empty() {
|
||||
true => "This index does not have configured facet-searchable attributes. To make it facet-searchable add it to the `filterableAttributes` index settings.".to_string(),
|
||||
false => format!("Available facet-searchable attributes are: `{}`. To make it facet-searchable add it to the `filterableAttributes` index settings.",
|
||||
valid_fields.iter().map(AsRef::as_ref).collect::<Vec<&str>>().join(", ")
|
||||
false => format!("Available facet-searchable attributes are: `{}{}`. To make it facet-searchable add it to the `filterableAttributes` index settings.",
|
||||
valid_fields.iter().map(AsRef::as_ref).collect::<Vec<&str>>().join(", "),
|
||||
.hidden_fields.then_some(", <..hidden-attributes>").unwrap_or(""),
|
||||
),
|
||||
}
|
||||
)]
|
||||
InvalidFacetSearchFacetName { field: String, valid_fields: BTreeSet<String> },
|
||||
InvalidFacetSearchFacetName {
|
||||
field: String,
|
||||
valid_fields: BTreeSet<String>,
|
||||
hidden_fields: bool,
|
||||
},
|
||||
#[error("Attribute `{}` is not searchable. Available searchable attributes are: `{}{}`.",
|
||||
.field,
|
||||
.valid_fields.iter().map(AsRef::as_ref).collect::<Vec<&str>>().join(", "),
|
||||
@@ -340,8 +346,11 @@ fn conditionally_lookup_for_error_message() {
|
||||
];
|
||||
|
||||
for (list, suffix) in messages {
|
||||
let err =
|
||||
UserError::InvalidSortableAttribute { field: "name".to_string(), valid_fields: list };
|
||||
let err = UserError::InvalidSortableAttribute {
|
||||
field: "name".to_string(),
|
||||
valid_fields: list,
|
||||
hidden_fields: false,
|
||||
};
|
||||
|
||||
assert_eq!(err.to_string(), format!("{} {}", prefix, suffix));
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
use std::borrow::Cow;
|
||||
use std::collections::{BTreeSet, HashMap, HashSet};
|
||||
use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
|
||||
use std::fs::File;
|
||||
use std::mem::size_of;
|
||||
use std::path::Path;
|
||||
@@ -61,8 +61,12 @@ pub mod main_key {
|
||||
pub const USER_DEFINED_SEARCHABLE_FIELDS_KEY: &str = "user-defined-searchable-fields";
|
||||
pub const SOFT_EXTERNAL_DOCUMENTS_IDS_KEY: &str = "soft-external-documents-ids";
|
||||
pub const STOP_WORDS_KEY: &str = "stop-words";
|
||||
pub const NON_SEPARATOR_TOKENS_KEY: &str = "non-separator-tokens";
|
||||
pub const SEPARATOR_TOKENS_KEY: &str = "separator-tokens";
|
||||
pub const DICTIONARY_KEY: &str = "dictionary";
|
||||
pub const STRING_FACETED_DOCUMENTS_IDS_PREFIX: &str = "string-faceted-documents-ids";
|
||||
pub const SYNONYMS_KEY: &str = "synonyms";
|
||||
pub const USER_DEFINED_SYNONYMS_KEY: &str = "user-defined-synonyms";
|
||||
pub const WORDS_FST_KEY: &str = "words-fst";
|
||||
pub const WORDS_PREFIXES_FST_KEY: &str = "words-prefixes-fst";
|
||||
pub const CREATED_AT_KEY: &str = "created-at";
|
||||
@@ -651,6 +655,26 @@ impl Index {
|
||||
}
|
||||
}
|
||||
|
||||
/* remove hidden fields */
|
||||
pub fn remove_hidden_fields(
|
||||
&self,
|
||||
rtxn: &RoTxn,
|
||||
fields: impl IntoIterator<Item = impl AsRef<str>>,
|
||||
) -> Result<(BTreeSet<String>, bool)> {
|
||||
let mut valid_fields =
|
||||
fields.into_iter().map(|f| f.as_ref().to_string()).collect::<BTreeSet<String>>();
|
||||
|
||||
let fields_len = valid_fields.len();
|
||||
|
||||
if let Some(dn) = self.displayed_fields(rtxn)? {
|
||||
let displayable_names = dn.iter().map(|s| s.to_string()).collect();
|
||||
valid_fields = &valid_fields & &displayable_names;
|
||||
}
|
||||
|
||||
let hidden_fields = fields_len > valid_fields.len();
|
||||
Ok((valid_fields, hidden_fields))
|
||||
}
|
||||
|
||||
/* searchable fields */
|
||||
|
||||
/// Write the user defined searchable fields and generate the real searchable fields from the specified fields ids map.
|
||||
@@ -1055,18 +1079,116 @@ impl Index {
|
||||
}
|
||||
}
|
||||
|
||||
/* non separator tokens */
|
||||
|
||||
pub(crate) fn put_non_separator_tokens(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
set: &BTreeSet<String>,
|
||||
) -> heed::Result<()> {
|
||||
self.main.put::<_, Str, SerdeBincode<_>>(wtxn, main_key::NON_SEPARATOR_TOKENS_KEY, set)
|
||||
}
|
||||
|
||||
pub(crate) fn delete_non_separator_tokens(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
|
||||
self.main.delete::<_, Str>(wtxn, main_key::NON_SEPARATOR_TOKENS_KEY)
|
||||
}
|
||||
|
||||
pub fn non_separator_tokens(&self, rtxn: &RoTxn) -> Result<Option<BTreeSet<String>>> {
|
||||
Ok(self.main.get::<_, Str, SerdeBincode<BTreeSet<String>>>(
|
||||
rtxn,
|
||||
main_key::NON_SEPARATOR_TOKENS_KEY,
|
||||
)?)
|
||||
}
|
||||
|
||||
/* separator tokens */
|
||||
|
||||
pub(crate) fn put_separator_tokens(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
set: &BTreeSet<String>,
|
||||
) -> heed::Result<()> {
|
||||
self.main.put::<_, Str, SerdeBincode<_>>(wtxn, main_key::SEPARATOR_TOKENS_KEY, set)
|
||||
}
|
||||
|
||||
pub(crate) fn delete_separator_tokens(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
|
||||
self.main.delete::<_, Str>(wtxn, main_key::SEPARATOR_TOKENS_KEY)
|
||||
}
|
||||
|
||||
pub fn separator_tokens(&self, rtxn: &RoTxn) -> Result<Option<BTreeSet<String>>> {
|
||||
Ok(self
|
||||
.main
|
||||
.get::<_, Str, SerdeBincode<BTreeSet<String>>>(rtxn, main_key::SEPARATOR_TOKENS_KEY)?)
|
||||
}
|
||||
|
||||
/* separators easing method */
|
||||
|
||||
pub fn allowed_separators(&self, rtxn: &RoTxn) -> Result<Option<BTreeSet<String>>> {
|
||||
let default_separators =
|
||||
charabia::separators::DEFAULT_SEPARATORS.iter().map(|s| s.to_string());
|
||||
let mut separators: Option<BTreeSet<_>> = None;
|
||||
if let Some(mut separator_tokens) = self.separator_tokens(rtxn)? {
|
||||
separator_tokens.extend(default_separators.clone());
|
||||
separators = Some(separator_tokens);
|
||||
}
|
||||
|
||||
if let Some(non_separator_tokens) = self.non_separator_tokens(rtxn)? {
|
||||
separators = separators
|
||||
.or_else(|| Some(default_separators.collect()))
|
||||
.map(|separators| &separators - &non_separator_tokens);
|
||||
}
|
||||
|
||||
Ok(separators)
|
||||
}
|
||||
|
||||
/* dictionary */
|
||||
|
||||
pub(crate) fn put_dictionary(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
set: &BTreeSet<String>,
|
||||
) -> heed::Result<()> {
|
||||
self.main.put::<_, Str, SerdeBincode<_>>(wtxn, main_key::DICTIONARY_KEY, set)
|
||||
}
|
||||
|
||||
pub(crate) fn delete_dictionary(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
|
||||
self.main.delete::<_, Str>(wtxn, main_key::DICTIONARY_KEY)
|
||||
}
|
||||
|
||||
pub fn dictionary(&self, rtxn: &RoTxn) -> Result<Option<BTreeSet<String>>> {
|
||||
Ok(self
|
||||
.main
|
||||
.get::<_, Str, SerdeBincode<BTreeSet<String>>>(rtxn, main_key::DICTIONARY_KEY)?)
|
||||
}
|
||||
|
||||
/* synonyms */
|
||||
|
||||
pub(crate) fn put_synonyms(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
synonyms: &HashMap<Vec<String>, Vec<Vec<String>>>,
|
||||
user_defined_synonyms: &BTreeMap<String, Vec<String>>,
|
||||
) -> heed::Result<()> {
|
||||
self.main.put::<_, Str, SerdeBincode<_>>(wtxn, main_key::SYNONYMS_KEY, synonyms)
|
||||
self.main.put::<_, Str, SerdeBincode<_>>(wtxn, main_key::SYNONYMS_KEY, synonyms)?;
|
||||
self.main.put::<_, Str, SerdeBincode<_>>(
|
||||
wtxn,
|
||||
main_key::USER_DEFINED_SYNONYMS_KEY,
|
||||
user_defined_synonyms,
|
||||
)
|
||||
}
|
||||
|
||||
pub(crate) fn delete_synonyms(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
|
||||
self.main.delete::<_, Str>(wtxn, main_key::SYNONYMS_KEY)
|
||||
self.main.delete::<_, Str>(wtxn, main_key::SYNONYMS_KEY)?;
|
||||
self.main.delete::<_, Str>(wtxn, main_key::USER_DEFINED_SYNONYMS_KEY)
|
||||
}
|
||||
|
||||
pub fn user_defined_synonyms(
|
||||
&self,
|
||||
rtxn: &RoTxn,
|
||||
) -> heed::Result<BTreeMap<String, Vec<String>>> {
|
||||
Ok(self
|
||||
.main
|
||||
.get::<_, Str, SerdeBincode<_>>(rtxn, main_key::USER_DEFINED_SYNONYMS_KEY)?
|
||||
.unwrap_or_default())
|
||||
}
|
||||
|
||||
pub fn synonyms(&self, rtxn: &RoTxn) -> heed::Result<HashMap<Vec<String>, Vec<Vec<String>>>> {
|
||||
@@ -1718,11 +1840,11 @@ pub(crate) mod tests {
|
||||
.unwrap();
|
||||
index
|
||||
.add_documents(documents!([
|
||||
{ "id": 0, "_geo": { "lat": 0, "lng": 0 } },
|
||||
{ "id": 1, "_geo": { "lat": 0, "lng": -175 } },
|
||||
{ "id": 2, "_geo": { "lat": 0, "lng": 175 } },
|
||||
{ "id": 0, "_geo": { "lat": "0", "lng": "0" } },
|
||||
{ "id": 1, "_geo": { "lat": 0, "lng": "-175" } },
|
||||
{ "id": 2, "_geo": { "lat": "0", "lng": 175 } },
|
||||
{ "id": 3, "_geo": { "lat": 85, "lng": 0 } },
|
||||
{ "id": 4, "_geo": { "lat": -85, "lng": 0 } },
|
||||
{ "id": 4, "_geo": { "lat": "-85", "lng": "0" } },
|
||||
]))
|
||||
.unwrap();
|
||||
|
||||
|
||||
@@ -97,7 +97,7 @@ const MAX_LMDB_KEY_LENGTH: usize = 500;
|
||||
///
|
||||
/// This number is determined by the keys of the different facet databases
|
||||
/// and adding a margin of safety.
|
||||
pub const MAX_FACET_VALUE_LENGTH: usize = MAX_LMDB_KEY_LENGTH - 20;
|
||||
pub const MAX_FACET_VALUE_LENGTH: usize = MAX_LMDB_KEY_LENGTH - 32;
|
||||
|
||||
/// The maximum length a word can be
|
||||
pub const MAX_WORD_LENGTH: usize = MAX_LMDB_KEY_LENGTH / 2;
|
||||
@@ -293,15 +293,15 @@ pub fn normalize_facet(original: &str) -> String {
|
||||
#[derive(serde::Serialize, serde::Deserialize, Debug)]
|
||||
#[serde(transparent)]
|
||||
pub struct VectorOrArrayOfVectors {
|
||||
#[serde(with = "either::serde_untagged")]
|
||||
inner: either::Either<Vec<f32>, Vec<Vec<f32>>>,
|
||||
#[serde(with = "either::serde_untagged_optional")]
|
||||
inner: Option<either::Either<Vec<f32>, Vec<Vec<f32>>>>,
|
||||
}
|
||||
|
||||
impl VectorOrArrayOfVectors {
|
||||
pub fn into_array_of_vectors(self) -> Vec<Vec<f32>> {
|
||||
match self.inner {
|
||||
either::Either::Left(vector) => vec![vector],
|
||||
either::Either::Right(vectors) => vectors,
|
||||
pub fn into_array_of_vectors(self) -> Option<Vec<Vec<f32>>> {
|
||||
match self.inner? {
|
||||
either::Either::Left(vector) => Some(vec![vector]),
|
||||
either::Either::Right(vectors) => Some(vectors),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -280,9 +280,13 @@ impl<'a> SearchForFacetValues<'a> {
|
||||
|
||||
let filterable_fields = index.filterable_fields(rtxn)?;
|
||||
if !filterable_fields.contains(&self.facet) {
|
||||
let (valid_fields, hidden_fields) =
|
||||
index.remove_hidden_fields(rtxn, filterable_fields)?;
|
||||
|
||||
return Err(UserError::InvalidFacetSearchFacetName {
|
||||
field: self.facet.clone(),
|
||||
valid_fields: filterable_fields.into_iter().collect(),
|
||||
valid_fields,
|
||||
hidden_fields,
|
||||
}
|
||||
.into());
|
||||
}
|
||||
|
||||
@@ -91,11 +91,12 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
|
||||
/// Update the universes accordingly and inform the logger.
|
||||
macro_rules! back {
|
||||
() => {
|
||||
assert!(
|
||||
ranking_rule_universes[cur_ranking_rule_index].is_empty(),
|
||||
"The ranking rule {} did not sort its bucket exhaustively",
|
||||
ranking_rules[cur_ranking_rule_index].id()
|
||||
);
|
||||
// FIXME: temporarily disabled assert: see <https://github.com/meilisearch/meilisearch/pull/4013>
|
||||
// assert!(
|
||||
// ranking_rule_universes[cur_ranking_rule_index].is_empty(),
|
||||
// "The ranking rule {} did not sort its bucket exhaustively",
|
||||
// ranking_rules[cur_ranking_rule_index].id()
|
||||
// );
|
||||
logger.end_iteration_ranking_rule(
|
||||
cur_ranking_rule_index,
|
||||
ranking_rules[cur_ranking_rule_index].as_ref(),
|
||||
|
||||
@@ -20,7 +20,7 @@ mod sort;
|
||||
#[cfg(test)]
|
||||
mod tests;
|
||||
|
||||
use std::collections::{BTreeSet, HashSet};
|
||||
use std::collections::HashSet;
|
||||
|
||||
use bucket_sort::{bucket_sort, BucketSortOutput};
|
||||
use charabia::TokenizerBuilder;
|
||||
@@ -108,24 +108,11 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
(None, None) => continue,
|
||||
// The field is not searchable => User error
|
||||
(_fid, Some(false)) => {
|
||||
let mut valid_fields: BTreeSet<_> =
|
||||
fids_map.names().map(String::from).collect();
|
||||
let (valid_fields, hidden_fields) = match searchable_names {
|
||||
Some(sn) => self.index.remove_hidden_fields(self.txn, sn)?,
|
||||
None => self.index.remove_hidden_fields(self.txn, fids_map.names())?,
|
||||
};
|
||||
|
||||
// Filter by the searchable names
|
||||
if let Some(sn) = searchable_names {
|
||||
let searchable_names = sn.iter().map(|s| s.to_string()).collect();
|
||||
valid_fields = &valid_fields & &searchable_names;
|
||||
}
|
||||
|
||||
let searchable_count = valid_fields.len();
|
||||
|
||||
// Remove hidden fields
|
||||
if let Some(dn) = self.index.displayed_fields(self.txn)? {
|
||||
let displayable_names = dn.iter().map(|s| s.to_string()).collect();
|
||||
valid_fields = &valid_fields & &displayable_names;
|
||||
}
|
||||
|
||||
let hidden_fields = searchable_count > valid_fields.len();
|
||||
let field = field_name.to_string();
|
||||
return Err(UserError::InvalidSearchableAttribute {
|
||||
field,
|
||||
@@ -488,6 +475,20 @@ pub fn execute_search(
|
||||
tokbuilder.stop_words(stop_words);
|
||||
}
|
||||
|
||||
let separators = ctx.index.allowed_separators(ctx.txn)?;
|
||||
let separators: Option<Vec<_>> =
|
||||
separators.as_ref().map(|x| x.iter().map(String::as_str).collect());
|
||||
if let Some(ref separators) = separators {
|
||||
tokbuilder.separators(separators);
|
||||
}
|
||||
|
||||
let dictionary = ctx.index.dictionary(ctx.txn)?;
|
||||
let dictionary: Option<Vec<_>> =
|
||||
dictionary.as_ref().map(|x| x.iter().map(String::as_str).collect());
|
||||
if let Some(ref dictionary) = dictionary {
|
||||
tokbuilder.words_dict(dictionary);
|
||||
}
|
||||
|
||||
let script_lang_map = ctx.index.script_language(ctx.txn)?;
|
||||
if !script_lang_map.is_empty() {
|
||||
tokbuilder.allow_list(&script_lang_map);
|
||||
@@ -590,16 +591,24 @@ fn check_sort_criteria(ctx: &SearchContext, sort_criteria: Option<&Vec<AscDesc>>
|
||||
for asc_desc in sort_criteria {
|
||||
match asc_desc.member() {
|
||||
Member::Field(ref field) if !crate::is_faceted(field, &sortable_fields) => {
|
||||
let (valid_fields, hidden_fields) =
|
||||
ctx.index.remove_hidden_fields(ctx.txn, sortable_fields)?;
|
||||
|
||||
return Err(UserError::InvalidSortableAttribute {
|
||||
field: field.to_string(),
|
||||
valid_fields: sortable_fields.into_iter().collect(),
|
||||
})?
|
||||
valid_fields,
|
||||
hidden_fields,
|
||||
})?;
|
||||
}
|
||||
Member::Geo(_) if !sortable_fields.contains("_geo") => {
|
||||
let (valid_fields, hidden_fields) =
|
||||
ctx.index.remove_hidden_fields(ctx.txn, sortable_fields)?;
|
||||
|
||||
return Err(UserError::InvalidSortableAttribute {
|
||||
field: "_geo".to_string(),
|
||||
valid_fields: sortable_fields.into_iter().collect(),
|
||||
})?
|
||||
valid_fields,
|
||||
hidden_fields,
|
||||
})?;
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
|
||||
@@ -2,7 +2,7 @@ use std::io::Cursor;
|
||||
|
||||
use big_s::S;
|
||||
use heed::EnvOpenOptions;
|
||||
use maplit::{hashmap, hashset};
|
||||
use maplit::{btreemap, hashset};
|
||||
|
||||
use crate::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
|
||||
use crate::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings};
|
||||
@@ -33,7 +33,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
|
||||
S("tag"),
|
||||
S("asc_desc_rank"),
|
||||
});
|
||||
builder.set_synonyms(hashmap! {
|
||||
builder.set_synonyms(btreemap! {
|
||||
S("hello") => vec![S("good morning")],
|
||||
S("world") => vec![S("earth")],
|
||||
S("america") => vec![S("the united states")],
|
||||
|
||||
@@ -15,7 +15,7 @@ they store fewer sprximities than the regular word sprximity DB.
|
||||
|
||||
*/
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use crate::index::tests::TempIndex;
|
||||
use crate::search::new::tests::collect_field_values;
|
||||
@@ -336,7 +336,7 @@ fn test_proximity_split_word() {
|
||||
|
||||
index
|
||||
.update_settings(|s| {
|
||||
let mut syns = HashMap::new();
|
||||
let mut syns = BTreeMap::new();
|
||||
syns.insert("xyz".to_owned(), vec!["sun flower".to_owned()]);
|
||||
s.set_synonyms(syns);
|
||||
})
|
||||
|
||||
@@ -18,7 +18,7 @@ if `words` doesn't exist before it.
|
||||
14. Synonyms cost nothing according to the typo ranking rule
|
||||
*/
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use crate::index::tests::TempIndex;
|
||||
use crate::search::new::tests::collect_field_values;
|
||||
@@ -591,7 +591,7 @@ fn test_typo_synonyms() {
|
||||
.update_settings(|s| {
|
||||
s.set_criteria(vec![Criterion::Typo]);
|
||||
|
||||
let mut synonyms = HashMap::new();
|
||||
let mut synonyms = BTreeMap::new();
|
||||
synonyms.insert("lackadaisical".to_owned(), vec!["lazy".to_owned()]);
|
||||
synonyms.insert("fast brownish".to_owned(), vec!["quick brown".to_owned()]);
|
||||
|
||||
|
||||
@@ -94,7 +94,7 @@ use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValu
|
||||
use crate::heed_codec::ByteSliceRefCodec;
|
||||
use crate::update::index_documents::create_sorter;
|
||||
use crate::update::merge_btreeset_string;
|
||||
use crate::{BEU16StrCodec, Index, Result, BEU16};
|
||||
use crate::{BEU16StrCodec, Index, Result, BEU16, MAX_FACET_VALUE_LENGTH};
|
||||
|
||||
pub mod bulk;
|
||||
pub mod delete;
|
||||
@@ -191,7 +191,16 @@ impl<'i> FacetsUpdate<'i> {
|
||||
for result in database.iter(wtxn)? {
|
||||
let (facet_group_key, ()) = result?;
|
||||
if let FacetGroupKey { field_id, level: 0, left_bound } = facet_group_key {
|
||||
let normalized_facet = left_bound.normalize(&options);
|
||||
let mut normalized_facet = left_bound.normalize(&options);
|
||||
let normalized_truncated_facet: String;
|
||||
if normalized_facet.len() > MAX_FACET_VALUE_LENGTH {
|
||||
normalized_truncated_facet = normalized_facet
|
||||
.char_indices()
|
||||
.take_while(|(idx, _)| *idx < MAX_FACET_VALUE_LENGTH)
|
||||
.map(|(_, c)| c)
|
||||
.collect();
|
||||
normalized_facet = normalized_truncated_facet.into();
|
||||
}
|
||||
let set = BTreeSet::from_iter(std::iter::once(left_bound));
|
||||
let key = (field_id, normalized_facet.as_ref());
|
||||
let key = BEU16StrCodec::bytes_encode(&key).ok_or(heed::Error::Encoding)?;
|
||||
|
||||
@@ -28,6 +28,8 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
|
||||
indexer: GrenadParameters,
|
||||
searchable_fields: &Option<HashSet<FieldId>>,
|
||||
stop_words: Option<&fst::Set<&[u8]>>,
|
||||
allowed_separators: Option<&[&str]>,
|
||||
dictionary: Option<&[&str]>,
|
||||
max_positions_per_attributes: Option<u32>,
|
||||
) -> Result<(RoaringBitmap, grenad::Reader<File>, ScriptLanguageDocidsMap)> {
|
||||
puffin::profile_function!();
|
||||
@@ -52,6 +54,12 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
|
||||
if let Some(stop_words) = stop_words {
|
||||
tokenizer_builder.stop_words(stop_words);
|
||||
}
|
||||
if let Some(dictionary) = dictionary {
|
||||
tokenizer_builder.words_dict(dictionary);
|
||||
}
|
||||
if let Some(separators) = allowed_separators {
|
||||
tokenizer_builder.separators(separators);
|
||||
}
|
||||
let tokenizer = tokenizer_builder.build();
|
||||
|
||||
let mut cursor = obkv_documents.into_cursor()?;
|
||||
|
||||
@@ -46,7 +46,7 @@ pub fn extract_facet_string_docids<R: io::Read + io::Seek>(
|
||||
if normalised_value.len() > MAX_FACET_VALUE_LENGTH {
|
||||
normalised_truncated_value = normalised_value
|
||||
.char_indices()
|
||||
.take_while(|(idx, _)| idx + 4 < MAX_FACET_VALUE_LENGTH)
|
||||
.take_while(|(idx, _)| *idx < MAX_FACET_VALUE_LENGTH)
|
||||
.map(|(_, c)| c)
|
||||
.collect();
|
||||
normalised_value = normalised_truncated_value.as_str();
|
||||
|
||||
@@ -28,11 +28,13 @@ pub struct ExtractedFacetValues {
|
||||
///
|
||||
/// Returns the generated grenad reader containing the docid the fid and the orginal value as key
|
||||
/// and the normalized value as value extracted from the given chunk of documents.
|
||||
/// We need the fid of the geofields to correctly parse them as numbers if they were sent as strings initially.
|
||||
#[logging_timer::time]
|
||||
pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
|
||||
obkv_documents: grenad::Reader<R>,
|
||||
indexer: GrenadParameters,
|
||||
faceted_fields: &HashSet<FieldId>,
|
||||
geo_fields_ids: Option<(FieldId, FieldId)>,
|
||||
) -> Result<ExtractedFacetValues> {
|
||||
puffin::profile_function!();
|
||||
|
||||
@@ -84,7 +86,10 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
|
||||
|
||||
let value = from_slice(field_bytes).map_err(InternalError::SerdeJson)?;
|
||||
|
||||
match extract_facet_values(&value) {
|
||||
match extract_facet_values(
|
||||
&value,
|
||||
geo_fields_ids.map_or(false, |(lat, lng)| field_id == lat || field_id == lng),
|
||||
) {
|
||||
FilterableValues::Null => {
|
||||
facet_is_null_docids.entry(field_id).or_default().insert(document);
|
||||
}
|
||||
@@ -177,12 +182,13 @@ enum FilterableValues {
|
||||
Values { numbers: Vec<f64>, strings: Vec<(String, String)> },
|
||||
}
|
||||
|
||||
fn extract_facet_values(value: &Value) -> FilterableValues {
|
||||
fn extract_facet_values(value: &Value, geo_field: bool) -> FilterableValues {
|
||||
fn inner_extract_facet_values(
|
||||
value: &Value,
|
||||
can_recurse: bool,
|
||||
output_numbers: &mut Vec<f64>,
|
||||
output_strings: &mut Vec<(String, String)>,
|
||||
geo_field: bool,
|
||||
) {
|
||||
match value {
|
||||
Value::Null => (),
|
||||
@@ -193,13 +199,30 @@ fn extract_facet_values(value: &Value) -> FilterableValues {
|
||||
}
|
||||
}
|
||||
Value::String(original) => {
|
||||
// if we're working on a geofield it MUST be something we can parse or else there was an internal error
|
||||
// in the enrich pipeline. But since the enrich pipeline worked, we want to avoid crashing at all costs.
|
||||
if geo_field {
|
||||
if let Ok(float) = original.parse() {
|
||||
output_numbers.push(float);
|
||||
} else {
|
||||
log::warn!(
|
||||
"Internal error, could not parse a geofield that has been validated. Please open an issue."
|
||||
)
|
||||
}
|
||||
}
|
||||
let normalized = crate::normalize_facet(original);
|
||||
output_strings.push((normalized, original.clone()));
|
||||
}
|
||||
Value::Array(values) => {
|
||||
if can_recurse {
|
||||
for value in values {
|
||||
inner_extract_facet_values(value, false, output_numbers, output_strings);
|
||||
inner_extract_facet_values(
|
||||
value,
|
||||
false,
|
||||
output_numbers,
|
||||
output_strings,
|
||||
geo_field,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -215,7 +238,7 @@ fn extract_facet_values(value: &Value) -> FilterableValues {
|
||||
otherwise => {
|
||||
let mut numbers = Vec::new();
|
||||
let mut strings = Vec::new();
|
||||
inner_extract_facet_values(otherwise, true, &mut numbers, &mut strings);
|
||||
inner_extract_facet_values(otherwise, true, &mut numbers, &mut strings, geo_field);
|
||||
FilterableValues::Values { numbers, strings }
|
||||
}
|
||||
}
|
||||
|
||||
@@ -35,7 +35,7 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
|
||||
// lazily get it when needed
|
||||
let document_id = || -> Value {
|
||||
let document_id = obkv.get(primary_key_id).unwrap();
|
||||
serde_json::from_slice(document_id).unwrap()
|
||||
from_slice(document_id).unwrap()
|
||||
};
|
||||
|
||||
// first we retrieve the _vectors field
|
||||
@@ -52,12 +52,14 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
|
||||
}
|
||||
};
|
||||
|
||||
for (i, vector) in vectors.into_iter().enumerate().take(u16::MAX as usize) {
|
||||
let index = u16::try_from(i).unwrap();
|
||||
let mut key = docid_bytes.to_vec();
|
||||
key.extend_from_slice(&index.to_be_bytes());
|
||||
let bytes = cast_slice(&vector);
|
||||
writer.insert(key, bytes)?;
|
||||
if let Some(vectors) = vectors {
|
||||
for (i, vector) in vectors.into_iter().enumerate().take(u16::MAX as usize) {
|
||||
let index = u16::try_from(i).unwrap();
|
||||
let mut key = docid_bytes.to_vec();
|
||||
key.extend_from_slice(&index.to_be_bytes());
|
||||
let bytes = cast_slice(&vector);
|
||||
writer.insert(key, bytes)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
// else => the `_vectors` object was `null`, there is nothing to do
|
||||
|
||||
@@ -49,6 +49,8 @@ pub(crate) fn data_from_obkv_documents(
|
||||
geo_fields_ids: Option<(FieldId, FieldId)>,
|
||||
vectors_field_id: Option<FieldId>,
|
||||
stop_words: Option<fst::Set<&[u8]>>,
|
||||
allowed_separators: Option<&[&str]>,
|
||||
dictionary: Option<&[&str]>,
|
||||
max_positions_per_attributes: Option<u32>,
|
||||
exact_attributes: HashSet<FieldId>,
|
||||
) -> Result<()> {
|
||||
@@ -76,6 +78,8 @@ pub(crate) fn data_from_obkv_documents(
|
||||
geo_fields_ids,
|
||||
vectors_field_id,
|
||||
&stop_words,
|
||||
&allowed_separators,
|
||||
&dictionary,
|
||||
max_positions_per_attributes,
|
||||
)
|
||||
})
|
||||
@@ -289,6 +293,8 @@ fn send_and_extract_flattened_documents_data(
|
||||
geo_fields_ids: Option<(FieldId, FieldId)>,
|
||||
vectors_field_id: Option<FieldId>,
|
||||
stop_words: &Option<fst::Set<&[u8]>>,
|
||||
allowed_separators: &Option<&[&str]>,
|
||||
dictionary: &Option<&[&str]>,
|
||||
max_positions_per_attributes: Option<u32>,
|
||||
) -> Result<(
|
||||
grenad::Reader<CursorClonableMmap>,
|
||||
@@ -344,6 +350,8 @@ fn send_and_extract_flattened_documents_data(
|
||||
indexer,
|
||||
searchable_fields,
|
||||
stop_words.as_ref(),
|
||||
*allowed_separators,
|
||||
*dictionary,
|
||||
max_positions_per_attributes,
|
||||
)?;
|
||||
|
||||
@@ -370,6 +378,7 @@ fn send_and_extract_flattened_documents_data(
|
||||
flattened_documents_chunk.clone(),
|
||||
indexer,
|
||||
faceted_fields,
|
||||
geo_fields_ids,
|
||||
)?;
|
||||
|
||||
// send docid_fid_facet_numbers_chunk to DB writer
|
||||
|
||||
@@ -316,6 +316,12 @@ where
|
||||
let vectors_field_id = self.index.fields_ids_map(self.wtxn)?.id("_vectors");
|
||||
|
||||
let stop_words = self.index.stop_words(self.wtxn)?;
|
||||
let separators = self.index.allowed_separators(self.wtxn)?;
|
||||
let separators: Option<Vec<_>> =
|
||||
separators.as_ref().map(|x| x.iter().map(String::as_str).collect());
|
||||
let dictionary = self.index.dictionary(self.wtxn)?;
|
||||
let dictionary: Option<Vec<_>> =
|
||||
dictionary.as_ref().map(|x| x.iter().map(String::as_str).collect());
|
||||
let exact_attributes = self.index.exact_attributes_ids(self.wtxn)?;
|
||||
|
||||
let pool_params = GrenadParameters {
|
||||
@@ -353,6 +359,8 @@ where
|
||||
geo_fields_ids,
|
||||
vectors_field_id,
|
||||
stop_words,
|
||||
separators.as_deref(),
|
||||
dictionary.as_deref(),
|
||||
max_positions_per_attributes,
|
||||
exact_attributes,
|
||||
)
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
use std::collections::{BTreeSet, HashMap, HashSet};
|
||||
use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
|
||||
use std::result::Result as StdResult;
|
||||
|
||||
use charabia::{Normalize, Tokenizer, TokenizerBuilder};
|
||||
@@ -112,8 +112,11 @@ pub struct Settings<'a, 't, 'u, 'i> {
|
||||
sortable_fields: Setting<HashSet<String>>,
|
||||
criteria: Setting<Vec<Criterion>>,
|
||||
stop_words: Setting<BTreeSet<String>>,
|
||||
non_separator_tokens: Setting<BTreeSet<String>>,
|
||||
separator_tokens: Setting<BTreeSet<String>>,
|
||||
dictionary: Setting<BTreeSet<String>>,
|
||||
distinct_field: Setting<String>,
|
||||
synonyms: Setting<HashMap<String, Vec<String>>>,
|
||||
synonyms: Setting<BTreeMap<String, Vec<String>>>,
|
||||
primary_key: Setting<String>,
|
||||
authorize_typos: Setting<bool>,
|
||||
min_word_len_two_typos: Setting<u8>,
|
||||
@@ -141,6 +144,9 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
||||
sortable_fields: Setting::NotSet,
|
||||
criteria: Setting::NotSet,
|
||||
stop_words: Setting::NotSet,
|
||||
non_separator_tokens: Setting::NotSet,
|
||||
separator_tokens: Setting::NotSet,
|
||||
dictionary: Setting::NotSet,
|
||||
distinct_field: Setting::NotSet,
|
||||
synonyms: Setting::NotSet,
|
||||
primary_key: Setting::NotSet,
|
||||
@@ -205,6 +211,39 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
||||
if stop_words.is_empty() { Setting::Reset } else { Setting::Set(stop_words) }
|
||||
}
|
||||
|
||||
pub fn reset_non_separator_tokens(&mut self) {
|
||||
self.non_separator_tokens = Setting::Reset;
|
||||
}
|
||||
|
||||
pub fn set_non_separator_tokens(&mut self, non_separator_tokens: BTreeSet<String>) {
|
||||
self.non_separator_tokens = if non_separator_tokens.is_empty() {
|
||||
Setting::Reset
|
||||
} else {
|
||||
Setting::Set(non_separator_tokens)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn reset_separator_tokens(&mut self) {
|
||||
self.separator_tokens = Setting::Reset;
|
||||
}
|
||||
|
||||
pub fn set_separator_tokens(&mut self, separator_tokens: BTreeSet<String>) {
|
||||
self.separator_tokens = if separator_tokens.is_empty() {
|
||||
Setting::Reset
|
||||
} else {
|
||||
Setting::Set(separator_tokens)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn reset_dictionary(&mut self) {
|
||||
self.dictionary = Setting::Reset;
|
||||
}
|
||||
|
||||
pub fn set_dictionary(&mut self, dictionary: BTreeSet<String>) {
|
||||
self.dictionary =
|
||||
if dictionary.is_empty() { Setting::Reset } else { Setting::Set(dictionary) }
|
||||
}
|
||||
|
||||
pub fn reset_distinct_field(&mut self) {
|
||||
self.distinct_field = Setting::Reset;
|
||||
}
|
||||
@@ -217,7 +256,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
||||
self.synonyms = Setting::Reset;
|
||||
}
|
||||
|
||||
pub fn set_synonyms(&mut self, synonyms: HashMap<String, Vec<String>>) {
|
||||
pub fn set_synonyms(&mut self, synonyms: BTreeMap<String, Vec<String>>) {
|
||||
self.synonyms = if synonyms.is_empty() { Setting::Reset } else { Setting::Set(synonyms) }
|
||||
}
|
||||
|
||||
@@ -452,9 +491,84 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
||||
}
|
||||
}
|
||||
|
||||
fn update_non_separator_tokens(&mut self) -> Result<bool> {
|
||||
let changes = match self.non_separator_tokens {
|
||||
Setting::Set(ref non_separator_tokens) => {
|
||||
let current = self.index.non_separator_tokens(self.wtxn)?;
|
||||
|
||||
// Does the new list differ from the previous one?
|
||||
if current.map_or(true, |current| ¤t != non_separator_tokens) {
|
||||
self.index.put_non_separator_tokens(self.wtxn, non_separator_tokens)?;
|
||||
true
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
Setting::Reset => self.index.delete_non_separator_tokens(self.wtxn)?,
|
||||
Setting::NotSet => false,
|
||||
};
|
||||
|
||||
// the synonyms must be updated if non separator tokens have been updated.
|
||||
if changes && self.synonyms == Setting::NotSet {
|
||||
self.synonyms = Setting::Set(self.index.user_defined_synonyms(self.wtxn)?);
|
||||
}
|
||||
|
||||
Ok(changes)
|
||||
}
|
||||
|
||||
fn update_separator_tokens(&mut self) -> Result<bool> {
|
||||
let changes = match self.separator_tokens {
|
||||
Setting::Set(ref separator_tokens) => {
|
||||
let current = self.index.separator_tokens(self.wtxn)?;
|
||||
|
||||
// Does the new list differ from the previous one?
|
||||
if current.map_or(true, |current| ¤t != separator_tokens) {
|
||||
self.index.put_separator_tokens(self.wtxn, separator_tokens)?;
|
||||
true
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
Setting::Reset => self.index.delete_separator_tokens(self.wtxn)?,
|
||||
Setting::NotSet => false,
|
||||
};
|
||||
|
||||
// the synonyms must be updated if separator tokens have been updated.
|
||||
if changes && self.synonyms == Setting::NotSet {
|
||||
self.synonyms = Setting::Set(self.index.user_defined_synonyms(self.wtxn)?);
|
||||
}
|
||||
|
||||
Ok(changes)
|
||||
}
|
||||
|
||||
fn update_dictionary(&mut self) -> Result<bool> {
|
||||
let changes = match self.dictionary {
|
||||
Setting::Set(ref dictionary) => {
|
||||
let current = self.index.dictionary(self.wtxn)?;
|
||||
|
||||
// Does the new list differ from the previous one?
|
||||
if current.map_or(true, |current| ¤t != dictionary) {
|
||||
self.index.put_dictionary(self.wtxn, dictionary)?;
|
||||
true
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
Setting::Reset => self.index.delete_dictionary(self.wtxn)?,
|
||||
Setting::NotSet => false,
|
||||
};
|
||||
|
||||
// the synonyms must be updated if dictionary has been updated.
|
||||
if changes && self.synonyms == Setting::NotSet {
|
||||
self.synonyms = Setting::Set(self.index.user_defined_synonyms(self.wtxn)?);
|
||||
}
|
||||
|
||||
Ok(changes)
|
||||
}
|
||||
|
||||
fn update_synonyms(&mut self) -> Result<bool> {
|
||||
match self.synonyms {
|
||||
Setting::Set(ref synonyms) => {
|
||||
Setting::Set(ref user_synonyms) => {
|
||||
fn normalize(tokenizer: &Tokenizer, text: &str) -> Vec<String> {
|
||||
tokenizer
|
||||
.tokenize(text)
|
||||
@@ -473,10 +587,25 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
||||
if let Some(ref stop_words) = stop_words {
|
||||
builder.stop_words(stop_words);
|
||||
}
|
||||
|
||||
let separators = self.index.allowed_separators(self.wtxn)?;
|
||||
let separators: Option<Vec<_>> =
|
||||
separators.as_ref().map(|x| x.iter().map(String::as_str).collect());
|
||||
if let Some(ref separators) = separators {
|
||||
builder.separators(separators);
|
||||
}
|
||||
|
||||
let dictionary = self.index.dictionary(self.wtxn)?;
|
||||
let dictionary: Option<Vec<_>> =
|
||||
dictionary.as_ref().map(|x| x.iter().map(String::as_str).collect());
|
||||
if let Some(ref dictionary) = dictionary {
|
||||
builder.words_dict(dictionary);
|
||||
}
|
||||
|
||||
let tokenizer = builder.build();
|
||||
|
||||
let mut new_synonyms = HashMap::new();
|
||||
for (word, synonyms) in synonyms {
|
||||
for (word, synonyms) in user_synonyms {
|
||||
// Normalize both the word and associated synonyms.
|
||||
let normalized_word = normalize(&tokenizer, word);
|
||||
let normalized_synonyms =
|
||||
@@ -497,7 +626,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
||||
let old_synonyms = self.index.synonyms(self.wtxn)?;
|
||||
|
||||
if new_synonyms != old_synonyms {
|
||||
self.index.put_synonyms(self.wtxn, &new_synonyms)?;
|
||||
self.index.put_synonyms(self.wtxn, &new_synonyms, user_synonyms)?;
|
||||
Ok(true)
|
||||
} else {
|
||||
Ok(false)
|
||||
@@ -757,11 +886,17 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
||||
let faceted_updated = old_faceted_fields != new_faceted_fields;
|
||||
|
||||
let stop_words_updated = self.update_stop_words()?;
|
||||
let non_separator_tokens_updated = self.update_non_separator_tokens()?;
|
||||
let separator_tokens_updated = self.update_separator_tokens()?;
|
||||
let dictionary_updated = self.update_dictionary()?;
|
||||
let synonyms_updated = self.update_synonyms()?;
|
||||
let searchable_updated = self.update_searchable()?;
|
||||
let exact_attributes_updated = self.update_exact_attributes()?;
|
||||
|
||||
if stop_words_updated
|
||||
|| non_separator_tokens_updated
|
||||
|| separator_tokens_updated
|
||||
|| dictionary_updated
|
||||
|| faceted_updated
|
||||
|| synonyms_updated
|
||||
|| searchable_updated
|
||||
@@ -778,7 +913,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
||||
mod tests {
|
||||
use big_s::S;
|
||||
use heed::types::ByteSlice;
|
||||
use maplit::{btreeset, hashmap, hashset};
|
||||
use maplit::{btreemap, btreeset, hashset};
|
||||
|
||||
use super::*;
|
||||
use crate::error::Error;
|
||||
@@ -1244,7 +1379,7 @@ mod tests {
|
||||
// In the same transaction provide some synonyms
|
||||
index
|
||||
.update_settings_using_wtxn(&mut wtxn, |settings| {
|
||||
settings.set_synonyms(hashmap! {
|
||||
settings.set_synonyms(btreemap! {
|
||||
"blini".to_string() => vec!["crepes".to_string()],
|
||||
"super like".to_string() => vec!["love".to_string()],
|
||||
"puppies".to_string() => vec!["dogs".to_string(), "doggos".to_string()]
|
||||
@@ -1540,6 +1675,9 @@ mod tests {
|
||||
sortable_fields,
|
||||
criteria,
|
||||
stop_words,
|
||||
non_separator_tokens,
|
||||
separator_tokens,
|
||||
dictionary,
|
||||
distinct_field,
|
||||
synonyms,
|
||||
primary_key,
|
||||
@@ -1558,6 +1696,9 @@ mod tests {
|
||||
assert!(matches!(sortable_fields, Setting::NotSet));
|
||||
assert!(matches!(criteria, Setting::NotSet));
|
||||
assert!(matches!(stop_words, Setting::NotSet));
|
||||
assert!(matches!(non_separator_tokens, Setting::NotSet));
|
||||
assert!(matches!(separator_tokens, Setting::NotSet));
|
||||
assert!(matches!(dictionary, Setting::NotSet));
|
||||
assert!(matches!(distinct_field, Setting::NotSet));
|
||||
assert!(matches!(synonyms, Setting::NotSet));
|
||||
assert!(matches!(primary_key, Setting::NotSet));
|
||||
|
||||
@@ -5,7 +5,7 @@ use std::io::Cursor;
|
||||
use big_s::S;
|
||||
use either::{Either, Left, Right};
|
||||
use heed::EnvOpenOptions;
|
||||
use maplit::{hashmap, hashset};
|
||||
use maplit::{btreemap, hashset};
|
||||
use milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
|
||||
use milli::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings};
|
||||
use milli::{AscDesc, Criterion, DocumentId, Index, Member, Object, TermsMatchingStrategy};
|
||||
@@ -51,7 +51,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
|
||||
S("tag"),
|
||||
S("asc_desc_rank"),
|
||||
});
|
||||
builder.set_synonyms(hashmap! {
|
||||
builder.set_synonyms(btreemap! {
|
||||
S("hello") => vec![S("good morning")],
|
||||
S("world") => vec![S("earth")],
|
||||
S("america") => vec![S("the united states")],
|
||||
|
||||
Reference in New Issue
Block a user