Compare commits

..

15 Commits

Author SHA1 Message Date
6d8c88b129 Analytics: change event name 2023-02-21 09:54:14 +01:00
7510e3d684 rename search to multi_search in auth tests 2023-02-21 09:45:39 +01:00
797da246a4 rename search to multi_search in tests 2023-02-21 09:45:17 +01:00
e19bd82202 Rename search to multi_search in test server 2023-02-21 09:44:50 +01:00
9edda9a1e8 Rename search to multi-search 2023-02-21 09:44:23 +01:00
2bffc0b32a multi-search/authentication: Add authentication tests 2023-02-20 13:52:12 +01:00
34ac3c0535 multi-search: Add multi search tests 2023-02-20 13:52:12 +01:00
9ce65b3aaa multi-search: Add test server search method for multi search 2023-02-20 13:52:12 +01:00
809847f138 multi-search: Add search with an array of indexes 2023-02-20 13:52:11 +01:00
e83bc57b18 multi-search: Add basic analytics 2023-02-20 13:52:11 +01:00
b83b67fe12 Authentication: Make allow_index_creation a private field 2023-02-20 13:52:11 +01:00
421666a64d Authentication: Make search_rules optional in AuthFilter 2023-02-20 13:52:11 +01:00
181a86305e Authentication: Directly pass the authfilter to the index scheduler 2023-02-20 13:52:11 +01:00
d1e66f687e Authentication: is_index_authorized takes into account API key indexes even with a tenant token 2023-02-20 13:52:10 +01:00
2b9cea271b Authentication: Refactor authentication check to work for tenant token even without an index in URL
Callers need to manually check `is_index_authorized` when using the route without an index in URL
2023-02-20 13:52:10 +01:00
54 changed files with 2414 additions and 2383 deletions

9
Cargo.lock generated
View File

@ -1113,9 +1113,9 @@ dependencies = [
[[package]]
name = "deserr"
version = "0.5.0"
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c71c14985c842bf1e520b1ebcd22daff6aeece32f510e11f063cecf9b308c04b"
checksum = "6eee2844f21cf7fb5693aae1fb8f1658127acfdb2fc072167d68a9152584ae64"
dependencies = [
"actix-http",
"actix-utils",
@ -1130,9 +1130,9 @@ dependencies = [
[[package]]
name = "deserr-internal"
version = "0.5.0"
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cae1c51b191528c9e4e5d6cff671de94f61fcda1c206cc891251e0cf438c941a"
checksum = "c27246f8ca9eeba9dd70d614b664dc43b529251ed7bd9e633131010d340da4b9"
dependencies = [
"convert_case 0.5.0",
"proc-macro2",
@ -1921,6 +1921,7 @@ dependencies = [
"insta",
"log",
"meili-snap",
"meilisearch-auth",
"meilisearch-types",
"nelson",
"page_size 0.5.0",

View File

@ -1,5 +1,7 @@
# Compile
FROM rust:bullseye AS compiler
FROM rust:alpine3.16 AS compiler
RUN apk add -q --update-cache --no-cache build-base openssl-dev
WORKDIR /meilisearch
@ -11,22 +13,20 @@ ENV RUSTFLAGS="-C target-feature=-crt-static"
COPY . .
RUN set -eux; \
arch="$(dpkg --print-architecture)"; \
if [ "$arch" = "arm64" ]; then \
apkArch="$(apk --print-arch)"; \
if [ "$apkArch" = "aarch64" ]; then \
export JEMALLOC_SYS_WITH_LG_PAGE=16; \
fi && \
cargo build --release
# Run
FROM debian:11.6
FROM alpine:3.16
ENV MEILI_HTTP_ADDR 0.0.0.0:7700
ENV MEILI_SERVER_PROVIDER docker
RUN set -ex; \
apt-get update -q; \
apt-get install -q -y --no-install-recommends tini; \
rm -rf /var/lib/apt/lists/*
RUN apk update --quiet \
&& apk add -q --no-cache libgcc tini curl
# add meilisearch to the `/bin` so you can run it from anywhere and it's easy
# to find.

View File

@ -19,6 +19,7 @@ dump = { path = "../dump" }
enum-iterator = "1.1.3"
file-store = { path = "../file-store" }
log = "0.4.14"
meilisearch-auth = { path = "../meilisearch-auth" }
meilisearch-types = { path = "../meilisearch-types" }
page_size = "0.5.0"
roaring = { version = "0.10.0", features = ["serde"] }

View File

@ -88,11 +88,11 @@ pub enum BatchKind {
DocumentClear {
ids: Vec<TaskId>,
},
DocumentOperation {
DocumentImport {
method: IndexDocumentsMethod,
allow_index_creation: bool,
primary_key: Option<String>,
operation_ids: Vec<TaskId>,
import_ids: Vec<TaskId>,
},
DocumentDeletion {
deletion_ids: Vec<TaskId>,
@ -102,12 +102,12 @@ pub enum BatchKind {
allow_index_creation: bool,
settings_ids: Vec<TaskId>,
},
SettingsAndDocumentOperation {
SettingsAndDocumentImport {
settings_ids: Vec<TaskId>,
method: IndexDocumentsMethod,
allow_index_creation: bool,
primary_key: Option<String>,
operation_ids: Vec<TaskId>,
import_ids: Vec<TaskId>,
},
Settings {
allow_index_creation: bool,
@ -131,9 +131,9 @@ impl BatchKind {
#[rustfmt::skip]
fn allow_index_creation(&self) -> Option<bool> {
match self {
BatchKind::DocumentOperation { allow_index_creation, .. }
BatchKind::DocumentImport { allow_index_creation, .. }
| BatchKind::ClearAndSettings { allow_index_creation, .. }
| BatchKind::SettingsAndDocumentOperation { allow_index_creation, .. }
| BatchKind::SettingsAndDocumentImport { allow_index_creation, .. }
| BatchKind::Settings { allow_index_creation, .. } => Some(*allow_index_creation),
_ => None,
}
@ -141,8 +141,8 @@ impl BatchKind {
fn primary_key(&self) -> Option<Option<&str>> {
match self {
BatchKind::DocumentOperation { primary_key, .. }
| BatchKind::SettingsAndDocumentOperation { primary_key, .. } => {
BatchKind::DocumentImport { primary_key, .. }
| BatchKind::SettingsAndDocumentImport { primary_key, .. } => {
Some(primary_key.as_deref())
}
_ => None,
@ -173,22 +173,22 @@ impl BatchKind {
if primary_key.is_none() || pk.is_none() || primary_key == pk.as_deref() =>
{
(
Continue(BatchKind::DocumentOperation {
Continue(BatchKind::DocumentImport {
method,
allow_index_creation,
primary_key: pk,
operation_ids: vec![task_id],
import_ids: vec![task_id],
}),
allow_index_creation,
)
}
// if the primary key set in the task was different than ours we should stop and make this batch fail asap.
K::DocumentImport { method, allow_index_creation, primary_key } => (
Break(BatchKind::DocumentOperation {
Break(BatchKind::DocumentImport {
method,
allow_index_creation,
primary_key,
operation_ids: vec![task_id],
import_ids: vec![task_id],
}),
allow_index_creation,
),
@ -249,7 +249,7 @@ impl BatchKind {
(
BatchKind::DocumentClear { mut ids }
| BatchKind::DocumentDeletion { deletion_ids: mut ids }
| BatchKind::DocumentOperation { method: _, allow_index_creation: _, primary_key: _, operation_ids: mut ids }
| BatchKind::DocumentImport { method: _, allow_index_creation: _, primary_key: _, import_ids: mut ids }
| BatchKind::Settings { allow_index_creation: _, settings_ids: mut ids },
K::IndexDeletion,
) => {
@ -258,7 +258,7 @@ impl BatchKind {
}
(
BatchKind::ClearAndSettings { settings_ids: mut ids, allow_index_creation: _, mut other }
| BatchKind::SettingsAndDocumentOperation { operation_ids: mut ids, method: _, allow_index_creation: _, primary_key: _, settings_ids: mut other },
| BatchKind::SettingsAndDocumentImport { import_ids: mut ids, method: _, allow_index_creation: _, primary_key: _, settings_ids: mut other },
K::IndexDeletion,
) => {
ids.push(id);
@ -278,108 +278,63 @@ impl BatchKind {
K::DocumentImport { .. } | K::Settings { .. },
) => Break(this),
(
BatchKind::DocumentOperation { method: _, allow_index_creation: _, primary_key: _, mut operation_ids },
BatchKind::DocumentImport { method: _, allow_index_creation: _, primary_key: _, import_ids: mut ids },
K::DocumentClear,
) => {
operation_ids.push(id);
Continue(BatchKind::DocumentClear { ids: operation_ids })
ids.push(id);
Continue(BatchKind::DocumentClear { ids })
}
// we can autobatch the same kind of document additions / updates
(
BatchKind::DocumentOperation { method: ReplaceDocuments, allow_index_creation, primary_key: _, mut operation_ids },
BatchKind::DocumentImport { method: ReplaceDocuments, allow_index_creation, primary_key: _, mut import_ids },
K::DocumentImport { method: ReplaceDocuments, primary_key: pk, .. },
) => {
operation_ids.push(id);
Continue(BatchKind::DocumentOperation {
import_ids.push(id);
Continue(BatchKind::DocumentImport {
method: ReplaceDocuments,
allow_index_creation,
operation_ids,
import_ids,
primary_key: pk,
})
}
(
BatchKind::DocumentOperation { method: UpdateDocuments, allow_index_creation, primary_key: _, mut operation_ids },
BatchKind::DocumentImport { method: UpdateDocuments, allow_index_creation, primary_key: _, mut import_ids },
K::DocumentImport { method: UpdateDocuments, primary_key: pk, .. },
) => {
operation_ids.push(id);
Continue(BatchKind::DocumentOperation {
import_ids.push(id);
Continue(BatchKind::DocumentImport {
method: UpdateDocuments,
allow_index_creation,
primary_key: pk,
operation_ids,
import_ids,
})
}
(
BatchKind::DocumentOperation { method, allow_index_creation, primary_key, mut operation_ids },
K::DocumentDeletion,
) => {
operation_ids.push(id);
Continue(BatchKind::DocumentOperation {
method,
allow_index_creation,
primary_key,
operation_ids,
})
}
// but we can't autobatch documents if it's not the same kind
// this match branch MUST be AFTER the previous one
(
this @ BatchKind::DocumentOperation { .. },
K::DocumentImport { .. },
this @ BatchKind::DocumentImport { .. },
K::DocumentDeletion | K::DocumentImport { .. },
) => Break(this),
(
BatchKind::DocumentOperation { method, allow_index_creation, primary_key, operation_ids },
BatchKind::DocumentImport { method, allow_index_creation, primary_key, import_ids },
K::Settings { .. },
) => Continue(BatchKind::SettingsAndDocumentOperation {
) => Continue(BatchKind::SettingsAndDocumentImport {
settings_ids: vec![id],
method,
allow_index_creation,
primary_key,
operation_ids,
import_ids,
}),
(BatchKind::DocumentDeletion { mut deletion_ids }, K::DocumentClear) => {
deletion_ids.push(id);
Continue(BatchKind::DocumentClear { ids: deletion_ids })
}
// we can autobatch the deletion and import if the index already exists
(
BatchKind::DocumentDeletion { mut deletion_ids },
K::DocumentImport { method, allow_index_creation, primary_key }
) if index_already_exists => {
deletion_ids.push(id);
Continue(BatchKind::DocumentOperation {
method,
allow_index_creation,
primary_key,
operation_ids: deletion_ids,
})
}
// we can autobatch the deletion and import if both can't create an index
(
BatchKind::DocumentDeletion { mut deletion_ids },
K::DocumentImport { method, allow_index_creation, primary_key }
) if !allow_index_creation => {
deletion_ids.push(id);
Continue(BatchKind::DocumentOperation {
method,
allow_index_creation,
primary_key,
operation_ids: deletion_ids,
})
}
// we can't autobatch a deletion and an import if the index does not exists but would be created by an addition
(
this @ BatchKind::DocumentDeletion { .. },
K::DocumentImport { .. }
) => {
Break(this)
}
(this @ BatchKind::DocumentDeletion { .. }, K::DocumentImport { .. }) => Break(this),
(BatchKind::DocumentDeletion { mut deletion_ids }, K::DocumentDeletion) => {
deletion_ids.push(id);
Continue(BatchKind::DocumentDeletion { deletion_ids })
@ -448,60 +403,60 @@ impl BatchKind {
})
}
(
BatchKind::SettingsAndDocumentOperation { settings_ids, method: _, mut operation_ids, allow_index_creation, primary_key: _ },
BatchKind::SettingsAndDocumentImport { settings_ids, method: _, import_ids: mut other, allow_index_creation, primary_key: _ },
K::DocumentClear,
) => {
operation_ids.push(id);
other.push(id);
Continue(BatchKind::ClearAndSettings {
settings_ids,
other: operation_ids,
other,
allow_index_creation,
})
}
(
BatchKind::SettingsAndDocumentOperation { settings_ids, method: ReplaceDocuments, mut operation_ids, allow_index_creation, primary_key: _},
BatchKind::SettingsAndDocumentImport { settings_ids, method: ReplaceDocuments, mut import_ids, allow_index_creation, primary_key: _},
K::DocumentImport { method: ReplaceDocuments, primary_key: pk2, .. },
) => {
operation_ids.push(id);
Continue(BatchKind::SettingsAndDocumentOperation {
import_ids.push(id);
Continue(BatchKind::SettingsAndDocumentImport {
settings_ids,
method: ReplaceDocuments,
allow_index_creation,
primary_key: pk2,
operation_ids,
import_ids,
})
}
(
BatchKind::SettingsAndDocumentOperation { settings_ids, method: UpdateDocuments, allow_index_creation, primary_key: _, mut operation_ids },
BatchKind::SettingsAndDocumentImport { settings_ids, method: UpdateDocuments, allow_index_creation, primary_key: _, mut import_ids },
K::DocumentImport { method: UpdateDocuments, primary_key: pk2, .. },
) => {
operation_ids.push(id);
Continue(BatchKind::SettingsAndDocumentOperation {
import_ids.push(id);
Continue(BatchKind::SettingsAndDocumentImport {
settings_ids,
method: UpdateDocuments,
allow_index_creation,
primary_key: pk2,
operation_ids,
import_ids,
})
}
// But we can't batch a settings and a doc op with another doc op
// this MUST be AFTER the two previous branch
(
this @ BatchKind::SettingsAndDocumentOperation { .. },
this @ BatchKind::SettingsAndDocumentImport { .. },
K::DocumentDeletion | K::DocumentImport { .. },
) => Break(this),
(
BatchKind::SettingsAndDocumentOperation { mut settings_ids, method, allow_index_creation,primary_key, operation_ids },
BatchKind::SettingsAndDocumentImport { mut settings_ids, method, allow_index_creation,primary_key, import_ids },
K::Settings { .. },
) => {
settings_ids.push(id);
Continue(BatchKind::SettingsAndDocumentOperation {
Continue(BatchKind::SettingsAndDocumentImport {
settings_ids,
method,
allow_index_creation,
primary_key,
operation_ids,
import_ids,
})
}
(
@ -633,29 +588,29 @@ mod tests {
fn autobatch_simple_operation_together() {
// we can autobatch one or multiple `ReplaceDocuments` together.
// if the index exists.
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp( ReplaceDocuments, true , None), doc_imp(ReplaceDocuments, true , None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1, 2] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_imp( ReplaceDocuments, false , None), doc_imp(ReplaceDocuments, false , None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1, 2] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, import_ids: [0] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp( ReplaceDocuments, true , None), doc_imp(ReplaceDocuments, true , None)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0, 1, 2] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_imp( ReplaceDocuments, false , None), doc_imp(ReplaceDocuments, false , None)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, import_ids: [0, 1, 2] }, false))");
// if it doesn't exists.
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None), doc_imp( ReplaceDocuments, true , None), doc_imp(ReplaceDocuments, true , None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1, 2] }, true))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), doc_imp( ReplaceDocuments, true , None), doc_imp(ReplaceDocuments, true , None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, import_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None), doc_imp( ReplaceDocuments, true , None), doc_imp(ReplaceDocuments, true , None)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0, 1, 2] }, true))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), doc_imp( ReplaceDocuments, true , None), doc_imp(ReplaceDocuments, true , None)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, import_ids: [0] }, false))");
// we can autobatch one or multiple `UpdateDocuments` together.
// if the index exists.
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1, 2] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None), doc_imp(UpdateDocuments, false, None), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1, 2] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: true, primary_key: None, import_ids: [0, 1, 2] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: false, primary_key: None, import_ids: [0] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None), doc_imp(UpdateDocuments, false, None), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: false, primary_key: None, import_ids: [0, 1, 2] }, false))");
// if it doesn't exists.
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1, 2] }, true))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, false, None), doc_imp(UpdateDocuments, false, None), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1, 2] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: true, primary_key: None, import_ids: [0, 1, 2] }, true))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: false, primary_key: None, import_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, false, None), doc_imp(UpdateDocuments, false, None), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: false, primary_key: None, import_ids: [0, 1, 2] }, false))");
// we can autobatch one or multiple DocumentDeletion together
debug_snapshot!(autobatch_from(true, None, [doc_del()]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))");
@ -673,83 +628,56 @@ mod tests {
debug_snapshot!(autobatch_from(false,None, [settings(true), settings(true), settings(true)]), @"Some((Settings { allow_index_creation: true, settings_ids: [0, 1, 2] }, true))");
debug_snapshot!(autobatch_from(false,None, [settings(false)]), @"Some((Settings { allow_index_creation: false, settings_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [settings(false), settings(false), settings(false)]), @"Some((Settings { allow_index_creation: false, settings_ids: [0, 1, 2] }, false))");
// We can autobatch document addition with document deletion
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None), doc_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, true))"###);
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, true))"###);
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, true, None), doc_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, true, None), doc_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, false, None), doc_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, false, None), doc_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, true, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, true))"###);
debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, true, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, true))"###);
debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, false, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, false, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
// And the other way around
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(ReplaceDocuments, true, Some("catto"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(UpdateDocuments, true, Some("catto"))]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(ReplaceDocuments, false, Some("catto"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(UpdateDocuments, false, Some("catto"))]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(ReplaceDocuments, false, Some("catto"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(UpdateDocuments, false, Some("catto"))]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
}
#[test]
fn simple_document_operation_dont_autobatch_with_other() {
// addition, updates and deletion can't batch together
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_del()]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_del()]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), idx_create()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), idx_create()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), idx_create()]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), idx_create()]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_del(), idx_create()]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), idx_update()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), idx_update()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), idx_update()]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), idx_update()]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_del(), idx_update()]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), idx_swap()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), idx_swap()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), idx_swap()]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), idx_swap()]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_del(), idx_swap()]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))");
}
#[test]
fn document_addition_batch_with_settings() {
// simple case
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true)]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true)]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
// multiple settings and doc addition
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None), settings(true), settings(true)]), @"Some((SettingsAndDocumentOperation { settings_ids: [2, 3], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None), settings(true), settings(true)]), @"Some((SettingsAndDocumentOperation { settings_ids: [2, 3], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None), settings(true), settings(true)]), @"Some((SettingsAndDocumentImport { settings_ids: [2, 3], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None), settings(true), settings(true)]), @"Some((SettingsAndDocumentImport { settings_ids: [2, 3], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0, 1] }, true))");
// addition and setting unordered
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((SettingsAndDocumentOperation { settings_ids: [1, 3], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 2] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_imp(UpdateDocuments, true, None), settings(true)]), @"Some((SettingsAndDocumentOperation { settings_ids: [1, 3], method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 2] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((SettingsAndDocumentImport { settings_ids: [1, 3], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0, 2] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_imp(UpdateDocuments, true, None), settings(true)]), @"Some((SettingsAndDocumentImport { settings_ids: [1, 3], method: UpdateDocuments, allow_index_creation: true, primary_key: None, import_ids: [0, 2] }, true))");
// We ensure this kind of batch doesn't batch with forbidden operations
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_imp(UpdateDocuments, true, None)]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_imp(ReplaceDocuments, true, None)]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_del()]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_del()]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), idx_create()]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), idx_create()]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), idx_update()]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), idx_update()]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), idx_swap()]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), idx_swap()]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_imp(UpdateDocuments, true, None)]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_imp(ReplaceDocuments, true, None)]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_del()]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_del()]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), idx_create()]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), idx_create()]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), idx_update()]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), idx_update()]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), idx_swap()]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), idx_swap()]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
}
#[test]
@ -861,73 +789,67 @@ mod tests {
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, false, None), settings(false), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, false))");
// The third and final case is when the first task doesn't create an index but is directly followed by a task creating an index. In this case we can't batch whith what
// follows because we first need to process the erronous batch.
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments,false, None), settings(true), idx_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, false, None), settings(true), idx_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments,false, None), settings(true), doc_clr(), idx_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, false, None), settings(true), doc_clr(), idx_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments,false, None), settings(true), idx_del()]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, import_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, false, None), settings(true), idx_del()]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: false, primary_key: None, import_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments,false, None), settings(true), doc_clr(), idx_del()]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, import_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, false, None), settings(true), doc_clr(), idx_del()]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: false, primary_key: None, import_ids: [0] }, false))");
}
#[test]
fn allowed_and_disallowed_index_creation() {
// `DocumentImport` can't be mixed with those disallowed to do so except if the index already exists.
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), settings(true)]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, import_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, import_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), settings(true)]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: false, primary_key: None, import_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), settings(true)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
// batch deletion and addition
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(ReplaceDocuments, true, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(UpdateDocuments, true, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, import_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, import_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), settings(true)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, import_ids: [0] }, false))");
}
#[test]
fn autobatch_primary_key() {
// ==> If I have a pk
// With a single update
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), import_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other"))]), @r###"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), import_ids: [0] }, true))"###);
// With a multiple updates
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0, 1] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0, 1] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("other"))]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("id"))]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), import_ids: [0, 1] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), import_ids: [0, 1] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("other"))]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("id"))]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0, 1] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0, 1] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, Some("other"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), import_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), import_ids: [0, 1] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), import_ids: [0, 1] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, Some("other"))]), @r###"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), import_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), import_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), import_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("other"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), import_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), import_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), import_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("other"))]), @r###"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), import_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), import_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), import_ids: [0] }, true))"###);
// ==> If I don't have a pk
// With a single update
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, Some("other"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), import_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, Some("other"))]), @r###"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), import_ids: [0] }, true))"###);
// With a multiple updates
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("id"))]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("id"))]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), import_ids: [0] }, true))"###);
}
}

View File

@ -28,7 +28,8 @@ use meilisearch_types::heed::{RoTxn, RwTxn};
use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader};
use meilisearch_types::milli::heed::CompactionOption;
use meilisearch_types::milli::update::{
DocumentDeletionResult, IndexDocumentsConfig, IndexDocumentsMethod, Settings as MilliSettings,
DocumentAdditionResult, DocumentDeletionResult, IndexDocumentsConfig, IndexDocumentsMethod,
Settings as MilliSettings,
};
use meilisearch_types::milli::{self, BEU32};
use meilisearch_types::settings::{apply_settings_to_builder, Settings, Unchecked};
@ -85,21 +86,15 @@ pub(crate) enum Batch {
},
}
#[derive(Debug)]
pub(crate) enum DocumentOperation {
Add(Uuid),
Delete(Vec<String>),
}
/// A [batch](Batch) that combines multiple tasks operating on an index.
#[derive(Debug)]
pub(crate) enum IndexOperation {
DocumentOperation {
DocumentImport {
index_uid: String,
primary_key: Option<String>,
method: IndexDocumentsMethod,
documents_counts: Vec<u64>,
operations: Vec<DocumentOperation>,
content_files: Vec<Uuid>,
tasks: Vec<Task>,
},
DocumentDeletion {
@ -126,13 +121,13 @@ pub(crate) enum IndexOperation {
settings: Vec<(bool, Settings<Unchecked>)>,
settings_tasks: Vec<Task>,
},
SettingsAndDocumentOperation {
SettingsAndDocumentImport {
index_uid: String,
primary_key: Option<String>,
method: IndexDocumentsMethod,
documents_counts: Vec<u64>,
operations: Vec<DocumentOperation>,
content_files: Vec<Uuid>,
document_import_tasks: Vec<Task>,
// The boolean indicates if it's a settings deletion or creation.
@ -154,13 +149,13 @@ impl Batch {
tasks.iter().map(|task| task.uid).collect()
}
Batch::IndexOperation { op, .. } => match op {
IndexOperation::DocumentOperation { tasks, .. }
IndexOperation::DocumentImport { tasks, .. }
| IndexOperation::DocumentDeletion { tasks, .. }
| IndexOperation::Settings { tasks, .. }
| IndexOperation::DocumentClear { tasks, .. } => {
tasks.iter().map(|task| task.uid).collect()
}
IndexOperation::SettingsAndDocumentOperation {
IndexOperation::SettingsAndDocumentImport {
document_import_tasks: tasks,
settings_tasks: other,
..
@ -174,33 +169,17 @@ impl Batch {
Batch::IndexSwap { task } => vec![task.uid],
}
}
/// Return the index UID associated with this batch
pub fn index_uid(&self) -> Option<&str> {
use Batch::*;
match self {
TaskCancelation { .. }
| TaskDeletion(_)
| SnapshotCreation(_)
| Dump(_)
| IndexSwap { .. } => None,
IndexOperation { op, .. } => Some(op.index_uid()),
IndexCreation { index_uid, .. }
| IndexUpdate { index_uid, .. }
| IndexDeletion { index_uid, .. } => Some(index_uid),
}
}
}
impl IndexOperation {
pub fn index_uid(&self) -> &str {
match self {
IndexOperation::DocumentOperation { index_uid, .. }
IndexOperation::DocumentImport { index_uid, .. }
| IndexOperation::DocumentDeletion { index_uid, .. }
| IndexOperation::DocumentClear { index_uid, .. }
| IndexOperation::Settings { index_uid, .. }
| IndexOperation::DocumentClearAndSetting { index_uid, .. }
| IndexOperation::SettingsAndDocumentOperation { index_uid, .. } => index_uid,
| IndexOperation::SettingsAndDocumentImport { index_uid, .. } => index_uid,
}
}
}
@ -227,22 +206,17 @@ impl IndexScheduler {
},
must_create_index,
})),
BatchKind::DocumentOperation { method, operation_ids, .. } => {
let tasks = self.get_existing_tasks(rtxn, operation_ids)?;
let primary_key = tasks
.iter()
.find_map(|task| match task.kind {
KindWithContent::DocumentAdditionOrUpdate { ref primary_key, .. } => {
// we want to stop on the first document addition
Some(primary_key.clone())
}
KindWithContent::DocumentDeletion { .. } => None,
_ => unreachable!(),
})
.flatten();
BatchKind::DocumentImport { method, import_ids, .. } => {
let tasks = self.get_existing_tasks(rtxn, import_ids)?;
let primary_key = match &tasks[0].kind {
KindWithContent::DocumentAdditionOrUpdate { primary_key, .. } => {
primary_key.clone()
}
_ => unreachable!(),
};
let mut documents_counts = Vec::new();
let mut operations = Vec::new();
let mut content_files = Vec::new();
for task in tasks.iter() {
match task.kind {
@ -252,23 +226,19 @@ impl IndexScheduler {
..
} => {
documents_counts.push(documents_count);
operations.push(DocumentOperation::Add(content_file));
}
KindWithContent::DocumentDeletion { ref documents_ids, .. } => {
documents_counts.push(documents_ids.len() as u64);
operations.push(DocumentOperation::Delete(documents_ids.clone()));
content_files.push(content_file);
}
_ => unreachable!(),
}
}
Ok(Some(Batch::IndexOperation {
op: IndexOperation::DocumentOperation {
op: IndexOperation::DocumentImport {
index_uid,
primary_key,
method,
documents_counts,
operations,
content_files,
tasks,
},
must_create_index,
@ -352,12 +322,12 @@ impl IndexScheduler {
must_create_index,
}))
}
BatchKind::SettingsAndDocumentOperation {
BatchKind::SettingsAndDocumentImport {
settings_ids,
method,
allow_index_creation,
primary_key,
operation_ids,
import_ids,
} => {
let settings = self.create_next_batch_index(
rtxn,
@ -369,11 +339,11 @@ impl IndexScheduler {
let document_import = self.create_next_batch_index(
rtxn,
index_uid.clone(),
BatchKind::DocumentOperation {
BatchKind::DocumentImport {
method,
allow_index_creation,
primary_key,
operation_ids,
import_ids,
},
must_create_index,
)?;
@ -382,10 +352,10 @@ impl IndexScheduler {
(
Some(Batch::IndexOperation {
op:
IndexOperation::DocumentOperation {
IndexOperation::DocumentImport {
primary_key,
documents_counts,
operations,
content_files,
tasks: document_import_tasks,
..
},
@ -396,12 +366,12 @@ impl IndexScheduler {
..
}),
) => Ok(Some(Batch::IndexOperation {
op: IndexOperation::SettingsAndDocumentOperation {
op: IndexOperation::SettingsAndDocumentImport {
index_uid,
primary_key,
method,
documents_counts,
operations,
content_files,
document_import_tasks,
settings,
settings_tasks,
@ -1017,12 +987,12 @@ impl IndexScheduler {
Ok(tasks)
}
IndexOperation::DocumentOperation {
IndexOperation::DocumentImport {
index_uid: _,
primary_key,
method,
documents_counts: _,
operations,
documents_counts,
content_files,
mut tasks,
} => {
let mut primary_key_has_been_set = false;
@ -1067,82 +1037,26 @@ impl IndexScheduler {
|| must_stop_processing.get(),
)?;
for (operation, task) in operations.into_iter().zip(tasks.iter_mut()) {
match operation {
DocumentOperation::Add(content_uuid) => {
let content_file = self.file_store.get_update(content_uuid)?;
let reader = DocumentsBatchReader::from_reader(content_file)
.map_err(milli::Error::from)?;
let (new_builder, user_result) = builder.add_documents(reader)?;
builder = new_builder;
let mut results = Vec::new();
for content_uuid in content_files.into_iter() {
let content_file = self.file_store.get_update(content_uuid)?;
let reader = DocumentsBatchReader::from_reader(content_file)
.map_err(milli::Error::from)?;
let (new_builder, user_result) = builder.add_documents(reader)?;
builder = new_builder;
let received_documents =
if let Some(Details::DocumentAdditionOrUpdate {
received_documents,
..
}) = task.details
{
received_documents
} else {
// In the case of a `documentAdditionOrUpdate` the details MUST be set
unreachable!();
};
let user_result = match user_result {
Ok(count) => Ok(DocumentAdditionResult {
indexed_documents: count,
number_of_documents: count, // TODO: this is wrong, we should use the value stored in the Details.
}),
Err(e) => Err(milli::Error::from(e)),
};
match user_result {
Ok(count) => {
task.status = Status::Succeeded;
task.details = Some(Details::DocumentAdditionOrUpdate {
received_documents,
indexed_documents: Some(count),
})
}
Err(e) => {
task.status = Status::Failed;
task.details = Some(Details::DocumentAdditionOrUpdate {
received_documents,
indexed_documents: Some(0),
});
task.error = Some(milli::Error::from(e).into());
}
}
}
DocumentOperation::Delete(document_ids) => {
let (new_builder, user_result) =
builder.remove_documents(document_ids)?;
builder = new_builder;
let provided_ids =
if let Some(Details::DocumentDeletion { provided_ids, .. }) =
task.details
{
provided_ids
} else {
// In the case of a `documentAdditionOrUpdate` the details MUST be set
unreachable!();
};
match user_result {
Ok(count) => {
task.status = Status::Succeeded;
task.details = Some(Details::DocumentDeletion {
provided_ids,
deleted_documents: Some(count),
});
}
Err(e) => {
task.status = Status::Failed;
task.details = Some(Details::DocumentDeletion {
provided_ids,
deleted_documents: Some(0),
});
task.error = Some(milli::Error::from(e).into());
}
}
}
}
results.push(user_result);
}
if !tasks.iter().all(|res| res.error.is_some()) {
if results.iter().any(|res| res.is_ok()) {
let addition = builder.execute()?;
info!("document addition done: {:?}", addition);
} else if primary_key_has_been_set {
@ -1157,6 +1071,29 @@ impl IndexScheduler {
)?;
}
for (task, (ret, count)) in
tasks.iter_mut().zip(results.into_iter().zip(documents_counts))
{
match ret {
Ok(DocumentAdditionResult { indexed_documents, number_of_documents }) => {
task.status = Status::Succeeded;
task.details = Some(Details::DocumentAdditionOrUpdate {
received_documents: number_of_documents,
indexed_documents: Some(indexed_documents),
});
}
Err(error) => {
task.status = Status::Failed;
task.details = Some(Details::DocumentAdditionOrUpdate {
received_documents: count,
// if there was an error we indexed 0 documents.
indexed_documents: Some(0),
});
task.error = Some(error.into())
}
}
}
Ok(tasks)
}
IndexOperation::DocumentDeletion { index_uid: _, documents, mut tasks } => {
@ -1199,12 +1136,12 @@ impl IndexScheduler {
Ok(tasks)
}
IndexOperation::SettingsAndDocumentOperation {
IndexOperation::SettingsAndDocumentImport {
index_uid,
primary_key,
method,
documents_counts,
operations,
content_files,
document_import_tasks,
settings,
settings_tasks,
@ -1222,12 +1159,12 @@ impl IndexScheduler {
let mut import_tasks = self.apply_index_operation(
index_wtxn,
index,
IndexOperation::DocumentOperation {
IndexOperation::DocumentImport {
index_uid,
primary_key,
method,
documents_counts,
operations,
content_files,
tasks: document_import_tasks,
},
)?;

View File

@ -9,11 +9,10 @@ use meilisearch_types::heed::types::Str;
use meilisearch_types::heed::{Database, Env, EnvOpenOptions, RoTxn, RwTxn};
use meilisearch_types::milli::update::IndexerConfig;
use meilisearch_types::milli::Index;
use synchronoise::SignalEvent;
use time::OffsetDateTime;
use uuid::Uuid;
use self::IndexStatus::{Available, BeingDeleted, BeingResized};
use self::IndexStatus::{Available, BeingDeleted};
use crate::uuid_codec::UuidCodec;
use crate::{clamp_to_page_size, Error, Result};
@ -46,8 +45,6 @@ pub struct IndexMapper {
pub enum IndexStatus {
/// Do not insert it back in the index map as it is currently being deleted.
BeingDeleted,
/// Temporarily do not insert the index in the index map as it is currently being resized.
BeingResized(Arc<SignalEvent>),
/// You can use the index without worrying about anything.
Available(Index),
}
@ -74,10 +71,9 @@ impl IndexMapper {
&self,
path: &Path,
date: Option<(OffsetDateTime, OffsetDateTime)>,
map_size: usize,
) -> Result<Index> {
let mut options = EnvOpenOptions::new();
options.map_size(clamp_to_page_size(map_size));
options.map_size(clamp_to_page_size(self.index_size));
options.max_readers(1024);
if let Some((created, updated)) = date {
@ -106,15 +102,14 @@ impl IndexMapper {
let index_path = self.base_path.join(uuid.to_string());
fs::create_dir_all(&index_path)?;
let index = self.create_or_open_index(&index_path, date, self.index_size)?;
let index = self.create_or_open_index(&index_path, date)?;
wtxn.commit()?;
// Error if the UUIDv4 somehow already exists in the map, since it should be fresh.
// This is very unlikely to happen in practice.
// TODO: it would be better to lazily create the index. But we need an Index::open function for milli.
if self.index_map.write().unwrap().insert(uuid, Available(index.clone())).is_some()
if let Some(BeingDeleted) =
self.index_map.write().unwrap().insert(uuid, Available(index.clone()))
{
panic!("Uuid v4 conflict: index with UUID {uuid} already exists.");
panic!("Uuid v4 conflict.");
}
Ok(index)
@ -136,24 +131,14 @@ impl IndexMapper {
wtxn.commit()?;
// We remove the index from the in-memory index map.
let closing_event = loop {
let mut lock = self.index_map.write().unwrap();
let resize_operation = match lock.insert(uuid, BeingDeleted) {
Some(Available(index)) => break Some(index.prepare_for_closing()),
// The target index is in the middle of a resize operation.
// Wait for this operation to complete, then try again.
Some(BeingResized(resize_operation)) => resize_operation.clone(),
// The index is already being deleted or doesn't exist.
// It's OK to remove it from the map again.
_ => break None,
};
// Avoiding deadlocks: we need to drop the lock before waiting for the end of the resize, which
// will involve operations on the very map we're locking.
drop(lock);
resize_operation.wait();
let mut lock = self.index_map.write().unwrap();
let closing_event = match lock.insert(uuid, BeingDeleted) {
Some(Available(index)) => Some(index.prepare_for_closing()),
_ => None,
};
drop(lock);
let index_map = self.index_map.clone();
let index_path = self.base_path.join(uuid.to_string());
let index_name = name.to_string();
@ -186,87 +171,6 @@ impl IndexMapper {
Ok(self.index_mapping.get(rtxn, name)?.is_some())
}
/// Resizes the maximum size of the specified index to the double of its current maximum size.
///
/// This operation involves closing the underlying environment and so can take a long time to complete.
///
/// # Panics
///
/// - If the Index corresponding to the passed name is concurrently being deleted/resized or cannot be found in the
/// in memory hash map.
pub fn resize_index(&self, rtxn: &RoTxn, name: &str) -> Result<()> {
// fixme: factor to a function?
let uuid = self
.index_mapping
.get(rtxn, name)?
.ok_or_else(|| Error::IndexNotFound(name.to_string()))?;
// We remove the index from the in-memory index map.
let mut lock = self.index_map.write().unwrap();
// signal that will be sent when the resize operation completes
let resize_operation = Arc::new(SignalEvent::manual(false));
let index = match lock.insert(uuid, BeingResized(resize_operation)) {
Some(Available(index)) => index,
Some(previous_status) => {
lock.insert(uuid, previous_status);
panic!(
"Attempting to resize index {name} that is already being resized or deleted."
)
}
None => {
panic!("Could not find the status of index {name} in the in-memory index mapper.")
}
};
drop(lock);
let resize_succeeded = (move || {
let current_size = index.map_size()?;
let new_size = current_size * 2;
let closing_event = index.prepare_for_closing();
log::debug!("Waiting for index {name} to close");
if !closing_event.wait_timeout(std::time::Duration::from_secs(600)) {
// fail after 10 minutes waiting
panic!("Could not resize index {name} (unable to close it)");
}
log::info!("Resized index {name} from {current_size} to {new_size} bytes");
let index_path = self.base_path.join(uuid.to_string());
let index = self.create_or_open_index(&index_path, None, new_size)?;
Ok(index)
})();
// Put the map back to a consistent state.
// Even if there was an error we don't want to leave the map in an inconsistent state as it would cause
// deadlocks.
let mut lock = self.index_map.write().unwrap();
let (resize_operation, resize_succeeded) = match resize_succeeded {
Ok(index) => {
// insert the resized index
let Some(BeingResized(resize_operation)) = lock.insert(uuid, Available(index)) else {
panic!("Index state for index {name} was modified while it was being resized")
};
(resize_operation, Ok(()))
}
Err(error) => {
// there was an error, not much we can do... delete the index from the in-memory map to prevent future errors
let Some(BeingResized(resize_operation)) = lock.remove(&uuid) else {
panic!("Index state for index {name} was modified while it was being resized")
};
(resize_operation, Err(error))
}
};
// drop the lock before signaling completion so that other threads don't immediately await on the lock after waking up.
drop(lock);
resize_operation.signal();
resize_succeeded
}
/// Return an index, may open it if it wasn't already opened.
pub fn index(&self, rtxn: &RoTxn, name: &str) -> Result<Index> {
let uuid = self
@ -275,47 +179,31 @@ impl IndexMapper {
.ok_or_else(|| Error::IndexNotFound(name.to_string()))?;
// we clone here to drop the lock before entering the match
let index = loop {
let index = self.index_map.read().unwrap().get(&uuid).cloned();
let index = self.index_map.read().unwrap().get(&uuid).cloned();
let index = match index {
Some(Available(index)) => index,
Some(BeingDeleted) => return Err(Error::IndexNotFound(name.to_string())),
// since we're lazy, it's possible that the index has not been opened yet.
None => {
let mut index_map = self.index_map.write().unwrap();
// between the read lock and the write lock it's not impossible
// that someone already opened the index (eg if two search happens
// at the same time), thus before opening it we check a second time
// if it's not already there.
// Since there is a good chance it's not already there we can use
// the entry method.
match index_map.entry(uuid) {
Entry::Vacant(entry) => {
let index_path = self.base_path.join(uuid.to_string());
match index {
Some(Available(index)) => break index,
Some(BeingResized(ref resize_operation)) => {
// Avoiding deadlocks: no lock taken while doing this operation.
resize_operation.wait();
continue;
}
Some(BeingDeleted) => return Err(Error::IndexNotFound(name.to_string())),
// since we're lazy, it's possible that the index has not been opened yet.
None => {
let mut index_map = self.index_map.write().unwrap();
// between the read lock and the write lock it's not impossible
// that someone already opened the index (eg if two search happens
// at the same time), thus before opening it we check a second time
// if it's not already there.
// Since there is a good chance it's not already there we can use
// the entry method.
match index_map.entry(uuid) {
Entry::Vacant(entry) => {
let index_path = self.base_path.join(uuid.to_string());
let index =
self.create_or_open_index(&index_path, None, self.index_size)?;
entry.insert(Available(index.clone()));
break index;
}
Entry::Occupied(entry) => match entry.get() {
Available(index) => break index.clone(),
BeingResized(resize_operation) => {
// Avoiding the deadlock: we drop the lock before waiting
let resize_operation = resize_operation.clone();
drop(index_map);
resize_operation.wait();
continue;
}
BeingDeleted => return Err(Error::IndexNotFound(name.to_string())),
},
let index = self.create_or_open_index(&index_path, None)?;
entry.insert(Available(index.clone()));
index
}
Entry::Occupied(entry) => match entry.get() {
Available(index) => index.clone(),
BeingDeleted => return Err(Error::IndexNotFound(name.to_string())),
},
}
}
};

View File

@ -43,7 +43,6 @@ use file_store::FileStore;
use meilisearch_types::error::ResponseError;
use meilisearch_types::heed::types::{OwnedType, SerdeBincode, SerdeJson, Str};
use meilisearch_types::heed::{self, Database, Env, RoTxn};
use meilisearch_types::index_uid_pattern::IndexUidPattern;
use meilisearch_types::milli;
use meilisearch_types::milli::documents::DocumentsBatchBuilder;
use meilisearch_types::milli::update::IndexerConfig;
@ -423,12 +422,12 @@ impl IndexScheduler {
#[cfg(test)]
run.breakpoint(Breakpoint::Init);
run.wake_up.wait();
loop {
run.wake_up.wait();
match run.tick() {
Ok(TickOutcome::TickAgain(_)) => (),
Ok(TickOutcome::WaitForSignal) => run.wake_up.wait(),
Ok(0) => (),
Ok(_) => run.wake_up.signal(),
Err(e) => {
log::error!("{}", e);
// Wait one second when an irrecoverable error occurs.
@ -441,6 +440,7 @@ impl IndexScheduler {
) {
std::thread::sleep(Duration::from_secs(1));
}
run.wake_up.signal();
}
}
}
@ -630,13 +630,13 @@ impl IndexScheduler {
&self,
rtxn: &RoTxn,
query: &Query,
authorized_indexes: &Option<Vec<IndexUidPattern>>,
filters: &meilisearch_auth::AuthFilter,
) -> Result<RoaringBitmap> {
let mut tasks = self.get_task_ids(rtxn, query)?;
// If the query contains a list of index uid or there is a finite list of authorized indexes,
// then we must exclude all the kinds that aren't associated to one and only one index.
if query.index_uids.is_some() || authorized_indexes.is_some() {
if query.index_uids.is_some() || !filters.all_indexes_authorized() {
for kind in enum_iterator::all::<Kind>().filter(|kind| !kind.related_to_one_index()) {
tasks -= self.get_kind(rtxn, kind)?;
}
@ -644,11 +644,11 @@ impl IndexScheduler {
// Any task that is internally associated with a non-authorized index
// must be discarded.
if let Some(authorized_indexes) = authorized_indexes {
if !filters.all_indexes_authorized() {
let all_indexes_iter = self.index_tasks.iter(rtxn)?;
for result in all_indexes_iter {
let (index, index_tasks) = result?;
if !authorized_indexes.iter().any(|p| p.matches_str(index)) {
if !filters.is_index_authorized(index) {
tasks -= index_tasks;
}
}
@ -668,12 +668,11 @@ impl IndexScheduler {
pub fn get_tasks_from_authorized_indexes(
&self,
query: Query,
authorized_indexes: Option<Vec<IndexUidPattern>>,
filters: &meilisearch_auth::AuthFilter,
) -> Result<Vec<Task>> {
let rtxn = self.env.read_txn()?;
let tasks =
self.get_task_ids_from_authorized_indexes(&rtxn, &query, &authorized_indexes)?;
let tasks = self.get_task_ids_from_authorized_indexes(&rtxn, &query, filters)?;
let tasks = self.get_existing_tasks(
&rtxn,
@ -764,8 +763,8 @@ impl IndexScheduler {
Ok(task)
}
/// Register a new task coming from a dump in the scheduler.
/// By taking a mutable ref we're pretty sure no one will ever import a dump while actix is running.
/// Register a new task comming from a dump in the scheduler.
/// By takinig a mutable ref we're pretty sure no one will ever import a dump while actix is running.
pub fn register_dumped_task(
&mut self,
task: TaskDump,
@ -926,7 +925,7 @@ impl IndexScheduler {
/// 5. Reset the in-memory list of processed tasks.
///
/// Returns the number of processed tasks.
fn tick(&self) -> Result<TickOutcome> {
fn tick(&self) -> Result<usize> {
#[cfg(test)]
{
*self.run_loop_iteration.write().unwrap() += 1;
@ -937,9 +936,8 @@ impl IndexScheduler {
let batch =
match self.create_next_batch(&rtxn).map_err(|e| Error::CreateBatch(Box::new(e)))? {
Some(batch) => batch,
None => return Ok(TickOutcome::WaitForSignal),
None => return Ok(0),
};
let index_uid = batch.index_uid().map(ToOwned::to_owned);
drop(rtxn);
// 1. store the starting date with the bitmap of processing tasks.
@ -1010,23 +1008,7 @@ impl IndexScheduler {
// the `started_at` date times and `processings` of the current processing tasks.
// This date time is used by the task cancelation to store the right `started_at`
// date in the task on disk.
return Ok(TickOutcome::TickAgain(0));
}
// If an index said it was full, we need to:
// 1. identify which index is full
// 2. close the associated environment
// 3. resize it
// 4. re-schedule tasks
Err(Error::Milli(milli::Error::UserError(
milli::UserError::MaxDatabaseSizeReached,
))) if index_uid.is_some() => {
// fixme: add index_uid to match to avoid the unwrap
let index_uid = index_uid.unwrap();
// fixme: handle error more gracefully? not sure when this could happen
self.index_mapper.resize_index(&wtxn, &index_uid)?;
wtxn.abort().map_err(Error::HeedTransaction)?;
return Ok(TickOutcome::TickAgain(0));
return Ok(0);
}
// In case of a failure we must get back and patch all the tasks with the error.
Err(err) => {
@ -1066,7 +1048,7 @@ impl IndexScheduler {
#[cfg(test)]
self.breakpoint(Breakpoint::AfterProcessing);
Ok(TickOutcome::TickAgain(processed_tasks))
Ok(processed_tasks)
}
pub(crate) fn delete_persisted_task_data(&self, task: &Task) -> Result<()> {
@ -1101,16 +1083,6 @@ impl IndexScheduler {
}
}
/// The outcome of calling the [`IndexScheduler::tick`] function.
pub enum TickOutcome {
/// The scheduler should immediately attempt another `tick`.
///
/// The `usize` field contains the number of processed tasks.
TickAgain(usize),
/// The scheduler should wait for an external signal before attempting another `tick`.
WaitForSignal,
}
#[cfg(test)]
mod tests {
use std::io::{BufWriter, Seek, Write};
@ -1120,7 +1092,9 @@ mod tests {
use crossbeam::channel::RecvTimeoutError;
use file_store::File;
use meili_snap::snapshot;
use meilisearch_auth::AuthFilter;
use meilisearch_types::document_formats::DocumentFormatError;
use meilisearch_types::index_uid_pattern::IndexUidPattern;
use meilisearch_types::milli::obkv_to_json;
use meilisearch_types::milli::update::IndexDocumentsMethod::{
ReplaceDocuments, UpdateDocuments,
@ -1706,105 +1680,6 @@ mod tests {
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "both_task_succeeded");
}
#[test]
fn document_addition_and_document_deletion() {
let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]);
let content = r#"[
{ "id": 1, "doggo": "jean bob" },
{ "id": 2, "catto": "jorts" },
{ "id": 3, "doggo": "bork" }
]"#;
let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0).unwrap();
let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap();
file.persist().unwrap();
index_scheduler
.register(KindWithContent::DocumentAdditionOrUpdate {
index_uid: S("doggos"),
primary_key: Some(S("id")),
method: ReplaceDocuments,
content_file: uuid,
documents_count,
allow_index_creation: true,
})
.unwrap();
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task");
index_scheduler
.register(KindWithContent::DocumentDeletion {
index_uid: S("doggos"),
documents_ids: vec![S("1"), S("2")],
})
.unwrap();
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_second_task");
handle.advance_one_successful_batch(); // The addition AND deletion should've been batched together
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_processing_the_batch");
let index = index_scheduler.index("doggos").unwrap();
let rtxn = index.read_txn().unwrap();
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
let documents = index
.all_documents(&rtxn)
.unwrap()
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
.collect::<Vec<_>>();
snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents");
}
#[test]
fn document_deletion_and_document_addition() {
let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]);
index_scheduler
.register(KindWithContent::DocumentDeletion {
index_uid: S("doggos"),
documents_ids: vec![S("1"), S("2")],
})
.unwrap();
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task");
let content = r#"[
{ "id": 1, "doggo": "jean bob" },
{ "id": 2, "catto": "jorts" },
{ "id": 3, "doggo": "bork" }
]"#;
let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0).unwrap();
let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap();
file.persist().unwrap();
index_scheduler
.register(KindWithContent::DocumentAdditionOrUpdate {
index_uid: S("doggos"),
primary_key: Some(S("id")),
method: ReplaceDocuments,
content_file: uuid,
documents_count,
allow_index_creation: true,
})
.unwrap();
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_second_task");
// The deletion should have failed because it can't create an index
handle.advance_one_failed_batch();
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_failing_the_deletion");
// The addition should works
handle.advance_one_successful_batch();
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_last_successful_addition");
let index = index_scheduler.index("doggos").unwrap();
let rtxn = index.read_txn().unwrap();
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
let documents = index
.all_documents(&rtxn)
.unwrap()
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
.collect::<Vec<_>>();
snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents");
}
#[test]
fn do_not_batch_task_of_different_indexes() {
let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]);
@ -2371,38 +2246,45 @@ mod tests {
let rtxn = index_scheduler.env.read_txn().unwrap();
let query = Query { limit: Some(0), ..Default::default() };
let tasks =
index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap();
let tasks = index_scheduler
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default())
.unwrap();
snapshot!(snapshot_bitmap(&tasks), @"[]");
let query = Query { limit: Some(1), ..Default::default() };
let tasks =
index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap();
let tasks = index_scheduler
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default())
.unwrap();
snapshot!(snapshot_bitmap(&tasks), @"[2,]");
let query = Query { limit: Some(2), ..Default::default() };
let tasks =
index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap();
let tasks = index_scheduler
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default())
.unwrap();
snapshot!(snapshot_bitmap(&tasks), @"[1,2,]");
let query = Query { from: Some(1), ..Default::default() };
let tasks =
index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap();
let tasks = index_scheduler
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default())
.unwrap();
snapshot!(snapshot_bitmap(&tasks), @"[0,1,]");
let query = Query { from: Some(2), ..Default::default() };
let tasks =
index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap();
let tasks = index_scheduler
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default())
.unwrap();
snapshot!(snapshot_bitmap(&tasks), @"[0,1,2,]");
let query = Query { from: Some(1), limit: Some(1), ..Default::default() };
let tasks =
index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap();
let tasks = index_scheduler
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default())
.unwrap();
snapshot!(snapshot_bitmap(&tasks), @"[1,]");
let query = Query { from: Some(1), limit: Some(2), ..Default::default() };
let tasks =
index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap();
let tasks = index_scheduler
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default())
.unwrap();
snapshot!(snapshot_bitmap(&tasks), @"[0,1,]");
}
@ -2427,21 +2309,24 @@ mod tests {
let rtxn = index_scheduler.env.read_txn().unwrap();
let query = Query { statuses: Some(vec![Status::Processing]), ..Default::default() };
let tasks =
index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap();
let tasks = index_scheduler
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default())
.unwrap();
snapshot!(snapshot_bitmap(&tasks), @"[0,]"); // only the processing tasks in the first tick
let query = Query { statuses: Some(vec![Status::Enqueued]), ..Default::default() };
let tasks =
index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap();
let tasks = index_scheduler
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default())
.unwrap();
snapshot!(snapshot_bitmap(&tasks), @"[1,2,]"); // only the enqueued tasks in the first tick
let query = Query {
statuses: Some(vec![Status::Enqueued, Status::Processing]),
..Default::default()
};
let tasks =
index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap();
let tasks = index_scheduler
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default())
.unwrap();
snapshot!(snapshot_bitmap(&tasks), @"[0,1,2,]"); // both enqueued and processing tasks in the first tick
let query = Query {
@ -2449,8 +2334,9 @@ mod tests {
after_started_at: Some(start_time),
..Default::default()
};
let tasks =
index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap();
let tasks = index_scheduler
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default())
.unwrap();
// both enqueued and processing tasks in the first tick, but limited to those with a started_at
// that comes after the start of the test, which should excludes the enqueued tasks
snapshot!(snapshot_bitmap(&tasks), @"[0,]");
@ -2460,8 +2346,9 @@ mod tests {
before_started_at: Some(start_time),
..Default::default()
};
let tasks =
index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap();
let tasks = index_scheduler
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default())
.unwrap();
// both enqueued and processing tasks in the first tick, but limited to those with a started_at
// that comes before the start of the test, which should excludes all of them
snapshot!(snapshot_bitmap(&tasks), @"[]");
@ -2472,8 +2359,9 @@ mod tests {
before_started_at: Some(start_time + Duration::minutes(1)),
..Default::default()
};
let tasks =
index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap();
let tasks = index_scheduler
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default())
.unwrap();
// both enqueued and processing tasks in the first tick, but limited to those with a started_at
// that comes after the start of the test and before one minute after the start of the test,
// which should exclude the enqueued tasks and include the only processing task
@ -2498,8 +2386,9 @@ mod tests {
before_started_at: Some(start_time + Duration::minutes(1)),
..Default::default()
};
let tasks =
index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap();
let tasks = index_scheduler
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default())
.unwrap();
// both succeeded and processing tasks in the first tick, but limited to those with a started_at
// that comes after the start of the test and before one minute after the start of the test,
// which should include all tasks
@ -2510,8 +2399,9 @@ mod tests {
before_started_at: Some(start_time),
..Default::default()
};
let tasks =
index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap();
let tasks = index_scheduler
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default())
.unwrap();
// both succeeded and processing tasks in the first tick, but limited to those with a started_at
// that comes before the start of the test, which should exclude all tasks
snapshot!(snapshot_bitmap(&tasks), @"[]");
@ -2522,8 +2412,9 @@ mod tests {
before_started_at: Some(second_start_time + Duration::minutes(1)),
..Default::default()
};
let tasks =
index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap();
let tasks = index_scheduler
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default())
.unwrap();
// both succeeded and processing tasks in the first tick, but limited to those with a started_at
// that comes after the start of the second part of the test and before one minute after the
// second start of the test, which should exclude all tasks
@ -2541,8 +2432,9 @@ mod tests {
let rtxn = index_scheduler.env.read_txn().unwrap();
let tasks =
index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap();
let tasks = index_scheduler
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default())
.unwrap();
// we run the same query to verify that, and indeed find that the last task is matched
snapshot!(snapshot_bitmap(&tasks), @"[2,]");
@ -2552,8 +2444,9 @@ mod tests {
before_started_at: Some(second_start_time + Duration::minutes(1)),
..Default::default()
};
let tasks =
index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap();
let tasks = index_scheduler
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default())
.unwrap();
// enqueued, succeeded, or processing tasks started after the second part of the test, should
// again only return the last task
snapshot!(snapshot_bitmap(&tasks), @"[2,]");
@ -2563,8 +2456,9 @@ mod tests {
// now the last task should have failed
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "end");
let tasks =
index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap();
let tasks = index_scheduler
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default())
.unwrap();
// so running the last query should return nothing
snapshot!(snapshot_bitmap(&tasks), @"[]");
@ -2574,8 +2468,9 @@ mod tests {
before_started_at: Some(second_start_time + Duration::minutes(1)),
..Default::default()
};
let tasks =
index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap();
let tasks = index_scheduler
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default())
.unwrap();
// but the same query on failed tasks should return the last task
snapshot!(snapshot_bitmap(&tasks), @"[2,]");
@ -2585,8 +2480,9 @@ mod tests {
before_started_at: Some(second_start_time + Duration::minutes(1)),
..Default::default()
};
let tasks =
index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap();
let tasks = index_scheduler
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default())
.unwrap();
// but the same query on failed tasks should return the last task
snapshot!(snapshot_bitmap(&tasks), @"[2,]");
@ -2597,8 +2493,9 @@ mod tests {
before_started_at: Some(second_start_time + Duration::minutes(1)),
..Default::default()
};
let tasks =
index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap();
let tasks = index_scheduler
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default())
.unwrap();
// same query but with an invalid uid
snapshot!(snapshot_bitmap(&tasks), @"[]");
@ -2609,8 +2506,9 @@ mod tests {
before_started_at: Some(second_start_time + Duration::minutes(1)),
..Default::default()
};
let tasks =
index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap();
let tasks = index_scheduler
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default())
.unwrap();
// same query but with a valid uid
snapshot!(snapshot_bitmap(&tasks), @"[2,]");
}
@ -2640,8 +2538,9 @@ mod tests {
let rtxn = index_scheduler.env.read_txn().unwrap();
let query = Query { index_uids: Some(vec!["catto".to_owned()]), ..Default::default() };
let tasks =
index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap();
let tasks = index_scheduler
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default())
.unwrap();
// only the first task associated with catto is returned, the indexSwap tasks are excluded!
snapshot!(snapshot_bitmap(&tasks), @"[0,]");
@ -2650,7 +2549,9 @@ mod tests {
.get_task_ids_from_authorized_indexes(
&rtxn,
&query,
&Some(vec![IndexUidPattern::new_unchecked("doggo")]),
&AuthFilter::with_allowed_indexes(
vec![IndexUidPattern::new_unchecked("doggo")].into_iter().collect(),
),
)
.unwrap();
// we have asked for only the tasks associated with catto, but are only authorized to retrieve the tasks
@ -2662,7 +2563,9 @@ mod tests {
.get_task_ids_from_authorized_indexes(
&rtxn,
&query,
&Some(vec![IndexUidPattern::new_unchecked("doggo")]),
&AuthFilter::with_allowed_indexes(
vec![IndexUidPattern::new_unchecked("doggo")].into_iter().collect(),
),
)
.unwrap();
// we asked for all the tasks, but we are only authorized to retrieve the doggo tasks
@ -2674,10 +2577,14 @@ mod tests {
.get_task_ids_from_authorized_indexes(
&rtxn,
&query,
&Some(vec![
IndexUidPattern::new_unchecked("catto"),
IndexUidPattern::new_unchecked("doggo"),
]),
&AuthFilter::with_allowed_indexes(
vec![
IndexUidPattern::new_unchecked("catto"),
IndexUidPattern::new_unchecked("doggo"),
]
.into_iter()
.collect(),
),
)
.unwrap();
// we asked for all the tasks, but we are only authorized to retrieve the doggo and catto tasks
@ -2685,8 +2592,9 @@ mod tests {
snapshot!(snapshot_bitmap(&tasks), @"[0,1,]");
let query = Query::default();
let tasks =
index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap();
let tasks = index_scheduler
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default())
.unwrap();
// we asked for all the tasks with all index authorized -> all tasks returned
snapshot!(snapshot_bitmap(&tasks), @"[0,1,2,3,]");
}
@ -2717,8 +2625,9 @@ mod tests {
let rtxn = index_scheduler.read_txn().unwrap();
let query = Query { canceled_by: Some(vec![task_cancelation.uid]), ..Query::default() };
let tasks =
index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap();
let tasks = index_scheduler
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default())
.unwrap();
// 0 is not returned because it was not canceled, 3 is not returned because it is the uid of the
// taskCancelation itself
snapshot!(snapshot_bitmap(&tasks), @"[1,2,]");
@ -2728,7 +2637,9 @@ mod tests {
.get_task_ids_from_authorized_indexes(
&rtxn,
&query,
&Some(vec![IndexUidPattern::new_unchecked("doggo")]),
&AuthFilter::with_allowed_indexes(
vec![IndexUidPattern::new_unchecked("doggo")].into_iter().collect(),
),
)
.unwrap();
// Return only 1 because the user is not authorized to see task 2

View File

@ -1,42 +0,0 @@
---
source: index-scheduler/src/lib.rs
---
### Autobatching Enabled = true
### Processing Tasks:
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: succeeded, details: { received_documents: 3, indexed_documents: Some(3) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }}
1 {uid: 1, status: succeeded, details: { received_document_ids: 2, deleted_documents: Some(2) }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1", "2"] }}
----------------------------------------------------------------------
### Status:
enqueued []
succeeded [0,1,]
----------------------------------------------------------------------
### Kind:
"documentAdditionOrUpdate" [0,]
"documentDeletion" [1,]
----------------------------------------------------------------------
### Index Tasks:
doggos [0,1,]
----------------------------------------------------------------------
### Index Mapper:
["doggos"]
----------------------------------------------------------------------
### Canceled By:
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [0,]
[timestamp] [1,]
----------------------------------------------------------------------
### Started At:
[timestamp] [0,1,]
----------------------------------------------------------------------
### Finished At:
[timestamp] [0,1,]
----------------------------------------------------------------------
### File Store:
----------------------------------------------------------------------

View File

@ -1,9 +0,0 @@
---
source: index-scheduler/src/lib.rs
---
[
{
"id": 3,
"doggo": "bork"
}
]

View File

@ -1,37 +0,0 @@
---
source: index-scheduler/src/lib.rs
---
### Autobatching Enabled = true
### Processing Tasks:
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: enqueued, details: { received_documents: 3, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }}
----------------------------------------------------------------------
### Status:
enqueued [0,]
----------------------------------------------------------------------
### Kind:
"documentAdditionOrUpdate" [0,]
----------------------------------------------------------------------
### Index Tasks:
doggos [0,]
----------------------------------------------------------------------
### Index Mapper:
[]
----------------------------------------------------------------------
### Canceled By:
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [0,]
----------------------------------------------------------------------
### Started At:
----------------------------------------------------------------------
### Finished At:
----------------------------------------------------------------------
### File Store:
00000000-0000-0000-0000-000000000000
----------------------------------------------------------------------

View File

@ -1,40 +0,0 @@
---
source: index-scheduler/src/lib.rs
---
### Autobatching Enabled = true
### Processing Tasks:
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: enqueued, details: { received_documents: 3, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }}
1 {uid: 1, status: enqueued, details: { received_document_ids: 2, deleted_documents: None }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1", "2"] }}
----------------------------------------------------------------------
### Status:
enqueued [0,1,]
----------------------------------------------------------------------
### Kind:
"documentAdditionOrUpdate" [0,]
"documentDeletion" [1,]
----------------------------------------------------------------------
### Index Tasks:
doggos [0,1,]
----------------------------------------------------------------------
### Index Mapper:
[]
----------------------------------------------------------------------
### Canceled By:
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [0,]
[timestamp] [1,]
----------------------------------------------------------------------
### Started At:
----------------------------------------------------------------------
### Finished At:
----------------------------------------------------------------------
### File Store:
00000000-0000-0000-0000-000000000000
----------------------------------------------------------------------

View File

@ -1,43 +0,0 @@
---
source: index-scheduler/src/lib.rs
---
### Autobatching Enabled = true
### Processing Tasks:
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: failed, error: ResponseError { code: 200, message: "Index `doggos` not found.", error_code: "index_not_found", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_not_found" }, details: { received_document_ids: 2, deleted_documents: Some(0) }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1", "2"] }}
1 {uid: 1, status: enqueued, details: { received_documents: 3, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }}
----------------------------------------------------------------------
### Status:
enqueued [1,]
failed [0,]
----------------------------------------------------------------------
### Kind:
"documentAdditionOrUpdate" [1,]
"documentDeletion" [0,]
----------------------------------------------------------------------
### Index Tasks:
doggos [0,1,]
----------------------------------------------------------------------
### Index Mapper:
[]
----------------------------------------------------------------------
### Canceled By:
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [0,]
[timestamp] [1,]
----------------------------------------------------------------------
### Started At:
[timestamp] [0,]
----------------------------------------------------------------------
### Finished At:
[timestamp] [0,]
----------------------------------------------------------------------
### File Store:
00000000-0000-0000-0000-000000000000
----------------------------------------------------------------------

View File

@ -1,45 +0,0 @@
---
source: index-scheduler/src/lib.rs
---
### Autobatching Enabled = true
### Processing Tasks:
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: failed, error: ResponseError { code: 200, message: "Index `doggos` not found.", error_code: "index_not_found", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_not_found" }, details: { received_document_ids: 2, deleted_documents: Some(0) }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1", "2"] }}
1 {uid: 1, status: succeeded, details: { received_documents: 3, indexed_documents: Some(3) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }}
----------------------------------------------------------------------
### Status:
enqueued []
succeeded [1,]
failed [0,]
----------------------------------------------------------------------
### Kind:
"documentAdditionOrUpdate" [1,]
"documentDeletion" [0,]
----------------------------------------------------------------------
### Index Tasks:
doggos [0,1,]
----------------------------------------------------------------------
### Index Mapper:
["doggos"]
----------------------------------------------------------------------
### Canceled By:
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [0,]
[timestamp] [1,]
----------------------------------------------------------------------
### Started At:
[timestamp] [0,]
[timestamp] [1,]
----------------------------------------------------------------------
### Finished At:
[timestamp] [0,]
[timestamp] [1,]
----------------------------------------------------------------------
### File Store:
----------------------------------------------------------------------

View File

@ -1,17 +0,0 @@
---
source: index-scheduler/src/lib.rs
---
[
{
"id": 1,
"doggo": "jean bob"
},
{
"id": 2,
"catto": "jorts"
},
{
"id": 3,
"doggo": "bork"
}
]

View File

@ -1,36 +0,0 @@
---
source: index-scheduler/src/lib.rs
---
### Autobatching Enabled = true
### Processing Tasks:
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: enqueued, details: { received_document_ids: 2, deleted_documents: None }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1", "2"] }}
----------------------------------------------------------------------
### Status:
enqueued [0,]
----------------------------------------------------------------------
### Kind:
"documentDeletion" [0,]
----------------------------------------------------------------------
### Index Tasks:
doggos [0,]
----------------------------------------------------------------------
### Index Mapper:
[]
----------------------------------------------------------------------
### Canceled By:
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [0,]
----------------------------------------------------------------------
### Started At:
----------------------------------------------------------------------
### Finished At:
----------------------------------------------------------------------
### File Store:
----------------------------------------------------------------------

View File

@ -1,40 +0,0 @@
---
source: index-scheduler/src/lib.rs
---
### Autobatching Enabled = true
### Processing Tasks:
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: enqueued, details: { received_document_ids: 2, deleted_documents: None }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1", "2"] }}
1 {uid: 1, status: enqueued, details: { received_documents: 3, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }}
----------------------------------------------------------------------
### Status:
enqueued [0,1,]
----------------------------------------------------------------------
### Kind:
"documentAdditionOrUpdate" [1,]
"documentDeletion" [0,]
----------------------------------------------------------------------
### Index Tasks:
doggos [0,1,]
----------------------------------------------------------------------
### Index Mapper:
[]
----------------------------------------------------------------------
### Canceled By:
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [0,]
[timestamp] [1,]
----------------------------------------------------------------------
### Started At:
----------------------------------------------------------------------
### Finished At:
----------------------------------------------------------------------
### File Store:
00000000-0000-0000-0000-000000000000
----------------------------------------------------------------------

View File

@ -439,29 +439,20 @@ impl IndexScheduler {
provided_ids: received_document_ids,
deleted_documents,
} => {
assert_eq!(kind.as_kind(), Kind::DocumentDeletion);
let (index_uid, documents_ids) =
if let KindWithContent::DocumentDeletion {
ref index_uid,
ref documents_ids,
} = kind
{
(index_uid, documents_ids)
} else {
unreachable!()
};
assert_eq!(&task_index_uid.unwrap(), index_uid);
if let Some(deleted_documents) = deleted_documents {
assert_eq!(status, Status::Succeeded);
assert!(deleted_documents <= received_document_ids as u64);
assert_eq!(kind.as_kind(), Kind::DocumentDeletion);
match status {
Status::Enqueued | Status::Processing => (),
Status::Succeeded => {
assert!(deleted_documents.unwrap() <= received_document_ids as u64);
assert!(documents_ids.len() == received_document_ids);
}
Status::Failed | Status::Canceled => {
assert!(deleted_documents == Some(0));
assert!(documents_ids.len() == received_document_ids);
match &kind {
KindWithContent::DocumentDeletion { index_uid, documents_ids } => {
assert_eq!(&task_index_uid.unwrap(), index_uid);
assert!(documents_ids.len() >= received_document_ids);
}
_ => panic!(),
}
} else {
assert_ne!(status, Status::Succeeded);
}
}
Details::ClearAll { deleted_documents } => {

View File

@ -85,17 +85,13 @@ impl AuthController {
uid: Uuid,
search_rules: Option<SearchRules>,
) -> Result<AuthFilter> {
let mut filters = AuthFilter::default();
let key = self.get_key(uid)?;
filters.search_rules = match search_rules {
Some(search_rules) => search_rules,
None => SearchRules::Set(key.indexes.into_iter().collect()),
};
let key_authorized_indexes = SearchRules::Set(key.indexes.into_iter().collect());
filters.allow_index_creation = self.is_key_authorized(uid, Action::IndexesAdd, None)?;
let allow_index_creation = self.is_key_authorized(uid, Action::IndexesAdd, None)?;
Ok(filters)
Ok(AuthFilter { search_rules, key_authorized_indexes, allow_index_creation })
}
pub fn list_keys(&self) -> Result<Vec<Key>> {
@ -160,13 +156,59 @@ impl AuthController {
}
pub struct AuthFilter {
pub search_rules: SearchRules,
pub allow_index_creation: bool,
search_rules: Option<SearchRules>,
key_authorized_indexes: SearchRules,
allow_index_creation: bool,
}
impl Default for AuthFilter {
fn default() -> Self {
Self { search_rules: SearchRules::default(), allow_index_creation: true }
Self {
search_rules: None,
key_authorized_indexes: SearchRules::default(),
allow_index_creation: true,
}
}
}
impl AuthFilter {
#[inline]
pub fn allow_index_creation(&self) -> bool {
self.allow_index_creation
}
pub fn with_allowed_indexes(allowed_indexes: HashSet<IndexUidPattern>) -> Self {
Self {
search_rules: None,
key_authorized_indexes: SearchRules::Set(allowed_indexes),
allow_index_creation: false,
}
}
pub fn all_indexes_authorized(&self) -> bool {
self.key_authorized_indexes.all_indexes_authorized()
&& self
.search_rules
.as_ref()
.map(|search_rules| search_rules.all_indexes_authorized())
.unwrap_or(true)
}
pub fn is_index_authorized(&self, index: &str) -> bool {
self.key_authorized_indexes.is_index_authorized(index)
&& self
.search_rules
.as_ref()
.map(|search_rules| search_rules.is_index_authorized(index))
.unwrap_or(true)
}
pub fn get_index_search_rules(&self, index: &str) -> Option<IndexSearchRules> {
if !self.is_index_authorized(index) {
return None;
}
let search_rules = self.search_rules.as_ref().unwrap_or(&self.key_authorized_indexes);
search_rules.get_index_search_rules(index)
}
}
@ -185,7 +227,7 @@ impl Default for SearchRules {
}
impl SearchRules {
pub fn is_index_authorized(&self, index: &str) -> bool {
fn is_index_authorized(&self, index: &str) -> bool {
match self {
Self::Set(set) => {
set.contains("*")
@ -200,7 +242,7 @@ impl SearchRules {
}
}
pub fn get_index_search_rules(&self, index: &str) -> Option<IndexSearchRules> {
fn get_index_search_rules(&self, index: &str) -> Option<IndexSearchRules> {
match self {
Self::Set(_) => {
if self.is_index_authorized(index) {
@ -219,24 +261,10 @@ impl SearchRules {
}
}
/// Return the list of indexes such that `self.is_index_authorized(index) == true`,
/// or `None` if all indexes satisfy this condition.
pub fn authorized_indexes(&self) -> Option<Vec<IndexUidPattern>> {
fn all_indexes_authorized(&self) -> bool {
match self {
SearchRules::Set(set) => {
if set.contains("*") {
None
} else {
Some(set.iter().cloned().collect())
}
}
SearchRules::Map(map) => {
if map.contains_key("*") {
None
} else {
Some(map.keys().cloned().collect())
}
}
SearchRules::Set(set) => set.contains("*"),
SearchRules::Map(map) => map.contains_key("*"),
}
}
}

View File

@ -15,7 +15,7 @@ actix-web = { version = "4.2.1", default-features = false }
anyhow = "1.0.65"
convert_case = "0.6.0"
csv = "1.1.6"
deserr = "0.5.0"
deserr = "0.4.1"
either = { version = "1.6.1", features = ["serde"] }
enum-iterator = "1.1.3"
file-store = { path = "../file-store" }

View File

@ -19,7 +19,7 @@ type Result<T> = std::result::Result<T, DocumentFormatError>;
pub enum PayloadType {
Ndjson,
Json,
Csv { delimiter: u8 },
Csv,
}
impl fmt::Display for PayloadType {
@ -27,7 +27,7 @@ impl fmt::Display for PayloadType {
match self {
PayloadType::Ndjson => f.write_str("ndjson"),
PayloadType::Json => f.write_str("json"),
PayloadType::Csv { .. } => f.write_str("csv"),
PayloadType::Csv => f.write_str("csv"),
}
}
}
@ -105,11 +105,11 @@ impl ErrorCode for DocumentFormatError {
}
/// Reads CSV from input and write an obkv batch to writer.
pub fn read_csv(file: &File, writer: impl Write + Seek, delimiter: u8) -> Result<u64> {
pub fn read_csv(file: &File, writer: impl Write + Seek) -> Result<u64> {
let mut builder = DocumentsBatchBuilder::new(writer);
let mmap = unsafe { MmapOptions::new().map(file)? };
let csv = csv::ReaderBuilder::new().delimiter(delimiter).from_reader(mmap.as_ref());
builder.append_csv(csv).map_err(|e| (PayloadType::Csv { delimiter }, e))?;
let csv = csv::Reader::from_reader(mmap.as_ref());
builder.append_csv(csv).map_err(|e| (PayloadType::Csv, e))?;
let count = builder.documents_count();
let _ = builder.into_inner().map_err(DocumentFormatError::Io)?;

View File

@ -220,7 +220,6 @@ InvalidDocumentOffset , InvalidRequest , BAD_REQUEST ;
InvalidIndexLimit , InvalidRequest , BAD_REQUEST ;
InvalidIndexOffset , InvalidRequest , BAD_REQUEST ;
InvalidIndexPrimaryKey , InvalidRequest , BAD_REQUEST ;
InvalidIndexCsvDelimiter , InvalidRequest , BAD_REQUEST ;
InvalidIndexUid , InvalidRequest , BAD_REQUEST ;
InvalidSearchAttributesToCrop , InvalidRequest , BAD_REQUEST ;
InvalidSearchAttributesToHighlight , InvalidRequest , BAD_REQUEST ;

View File

@ -25,7 +25,7 @@ byte-unit = { version = "4.0.14", default-features = false, features = ["std", "
bytes = "1.2.1"
clap = { version = "4.0.9", features = ["derive", "env"] }
crossbeam-channel = "0.5.6"
deserr = "0.5.0"
deserr = "0.4.1"
dump = { path = "../dump" }
either = "1.8.0"
env_logger = "0.9.1"
@ -116,5 +116,5 @@ japanese = ["meilisearch-types/japanese"]
thai = ["meilisearch-types/thai"]
[package.metadata.mini-dashboard]
assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.6/build.zip"
sha1 = "dce0aba16bceab5549edf9f01de89858800f7422"
assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.5/build.zip"
sha1 = "6fe959b78511b32e9ff857fd9fd31740633b9fce"

View File

@ -26,6 +26,18 @@ impl SearchAggregator {
pub fn succeed(&mut self, _: &dyn Any) {}
}
#[derive(Default)]
pub struct MultiSearchAggregator;
#[allow(dead_code)]
impl MultiSearchAggregator {
pub fn from_queries(_: &dyn Any, _: &dyn Any) -> Self {
Self::default()
}
pub fn succeed(&mut self) {}
}
impl MockAnalytics {
#[allow(clippy::new_ret_no_self)]
pub fn new(opt: &Opt) -> Arc<dyn Analytics> {
@ -43,6 +55,7 @@ impl Analytics for MockAnalytics {
fn publish(&self, _event_name: String, _send: Value, _request: Option<&HttpRequest>) {}
fn get_search(&self, _aggregate: super::SearchAggregator) {}
fn post_search(&self, _aggregate: super::SearchAggregator) {}
fn post_multi_search(&self, _aggregate: super::MultiSearchAggregator) {}
fn add_documents(
&self,
_documents_query: &UpdateDocumentsQuery,

View File

@ -23,6 +23,8 @@ use crate::routes::tasks::TasksFilterQuery;
pub type SegmentAnalytics = mock_analytics::MockAnalytics;
#[cfg(any(debug_assertions, not(feature = "analytics")))]
pub type SearchAggregator = mock_analytics::SearchAggregator;
#[cfg(any(debug_assertions, not(feature = "analytics")))]
pub type MultiSearchAggregator = mock_analytics::MultiSearchAggregator;
// if we are in release mode and the feature analytics was enabled
// we use the real analytics
@ -30,6 +32,8 @@ pub type SearchAggregator = mock_analytics::SearchAggregator;
pub type SegmentAnalytics = segment_analytics::SegmentAnalytics;
#[cfg(all(not(debug_assertions), feature = "analytics"))]
pub type SearchAggregator = segment_analytics::SearchAggregator;
#[cfg(all(not(debug_assertions), feature = "analytics"))]
pub type MultiSearchAggregator = segment_analytics::MultiSearchAggregator;
/// The Meilisearch config dir:
/// `~/.config/Meilisearch` on *NIX or *BSD.
@ -74,6 +78,9 @@ pub trait Analytics: Sync + Send {
/// This method should be called to aggregate a post search
fn post_search(&self, aggregate: SearchAggregator);
/// This method should be called to aggregate a post array of searches
fn post_multi_search(&self, aggregate: MultiSearchAggregator);
// this method should be called to aggregate a add documents request
fn add_documents(
&self,

View File

@ -9,7 +9,7 @@ use actix_web::HttpRequest;
use byte_unit::Byte;
use http::header::CONTENT_TYPE;
use index_scheduler::IndexScheduler;
use meilisearch_auth::{AuthController, SearchRules};
use meilisearch_auth::{AuthController, AuthFilter};
use meilisearch_types::InstanceUid;
use once_cell::sync::Lazy;
use regex::Regex;
@ -30,7 +30,7 @@ use crate::routes::indexes::documents::UpdateDocumentsQuery;
use crate::routes::tasks::TasksFilterQuery;
use crate::routes::{create_all_stats, Stats};
use crate::search::{
SearchQuery, SearchResult, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
SearchQuery, SearchQueryWithIndex, SearchResult, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT,
};
use crate::Opt;
@ -68,6 +68,7 @@ pub enum AnalyticsMsg {
BatchMessage(Track),
AggregateGetSearch(SearchAggregator),
AggregatePostSearch(SearchAggregator),
AggregatePostMultiSearch(MultiSearchAggregator),
AggregateAddDocuments(DocumentsAggregator),
AggregateDeleteDocuments(DocumentsDeletionAggregator),
AggregateUpdateDocuments(DocumentsAggregator),
@ -133,6 +134,7 @@ impl SegmentAnalytics {
opt: opt.clone(),
batcher,
post_search_aggregator: SearchAggregator::default(),
post_multi_search_aggregator: MultiSearchAggregator::default(),
get_search_aggregator: SearchAggregator::default(),
add_documents_aggregator: DocumentsAggregator::default(),
delete_documents_aggregator: DocumentsDeletionAggregator::default(),
@ -174,6 +176,10 @@ impl super::Analytics for SegmentAnalytics {
let _ = self.sender.try_send(AnalyticsMsg::AggregatePostSearch(aggregate));
}
fn post_multi_search(&self, aggregate: MultiSearchAggregator) {
let _ = self.sender.try_send(AnalyticsMsg::AggregatePostMultiSearch(aggregate));
}
fn add_documents(
&self,
documents_query: &UpdateDocumentsQuery,
@ -324,6 +330,7 @@ pub struct Segment {
batcher: AutoBatcher,
get_search_aggregator: SearchAggregator,
post_search_aggregator: SearchAggregator,
post_multi_search_aggregator: MultiSearchAggregator,
add_documents_aggregator: DocumentsAggregator,
delete_documents_aggregator: DocumentsDeletionAggregator,
update_documents_aggregator: DocumentsAggregator,
@ -381,6 +388,7 @@ impl Segment {
Some(AnalyticsMsg::BatchMessage(msg)) => drop(self.batcher.push(msg).await),
Some(AnalyticsMsg::AggregateGetSearch(agreg)) => self.get_search_aggregator.aggregate(agreg),
Some(AnalyticsMsg::AggregatePostSearch(agreg)) => self.post_search_aggregator.aggregate(agreg),
Some(AnalyticsMsg::AggregatePostMultiSearch(agreg)) => self.post_multi_search_aggregator.aggregate(agreg),
Some(AnalyticsMsg::AggregateAddDocuments(agreg)) => self.add_documents_aggregator.aggregate(agreg),
Some(AnalyticsMsg::AggregateDeleteDocuments(agreg)) => self.delete_documents_aggregator.aggregate(agreg),
Some(AnalyticsMsg::AggregateUpdateDocuments(agreg)) => self.update_documents_aggregator.aggregate(agreg),
@ -399,7 +407,7 @@ impl Segment {
auth_controller: AuthController,
) {
if let Ok(stats) =
create_all_stats(index_scheduler.into(), auth_controller, &SearchRules::default())
create_all_stats(index_scheduler.into(), auth_controller, &AuthFilter::default())
{
// Replace the version number with the prototype name if any.
let version = if let Some(prototype) = crate::prototype_name() {
@ -426,6 +434,8 @@ impl Segment {
.into_event(&self.user, "Documents Searched GET");
let post_search = std::mem::take(&mut self.post_search_aggregator)
.into_event(&self.user, "Documents Searched POST");
let post_multi_search = std::mem::take(&mut self.post_multi_search_aggregator)
.into_event(&self.user, "Documents Searched by Multi-Search POST");
let add_documents = std::mem::take(&mut self.add_documents_aggregator)
.into_event(&self.user, "Documents Added");
let delete_documents = std::mem::take(&mut self.delete_documents_aggregator)
@ -443,6 +453,9 @@ impl Segment {
if let Some(post_search) = post_search {
let _ = self.batcher.push(post_search).await;
}
if let Some(post_multi_search) = post_multi_search {
let _ = self.batcher.push(post_multi_search).await;
}
if let Some(add_documents) = add_documents {
let _ = self.batcher.push(add_documents).await;
}
@ -716,6 +729,118 @@ impl SearchAggregator {
}
}
#[derive(Default)]
pub struct MultiSearchAggregator {
timestamp: Option<OffsetDateTime>,
// requests
total_received: usize,
total_succeeded: usize,
// sum of the number of distinct indexes in each single request, use with total_received to compute an avg
total_distinct_index_count: usize,
// number of queries with a single index, use with total_received to compute a proportion
total_single_index: usize,
// sum of the number of search queries in the requests, use with total_received to compute an average
total_search_count: usize,
// context
user_agents: HashSet<String>,
}
impl MultiSearchAggregator {
pub fn from_queries(query: &[SearchQueryWithIndex], request: &HttpRequest) -> Self {
let timestamp = Some(OffsetDateTime::now_utc());
let user_agents = extract_user_agents(request).into_iter().collect();
let distinct_indexes: HashSet<_> =
query.iter().map(|query| query.index_uid.as_str()).collect();
Self {
timestamp,
total_received: 1,
total_succeeded: 0,
total_distinct_index_count: distinct_indexes.len(),
total_single_index: if distinct_indexes.len() == 1 { 1 } else { 0 },
total_search_count: query.len(),
user_agents,
}
}
pub fn succeed(&mut self) {
self.total_succeeded = self.total_succeeded.saturating_add(1);
}
pub fn aggregate(&mut self, other: Self) {
// write the aggregate in a way that will cause a compilation error if a field is added.
// get ownership of self, replacing it by a default value.
let this = std::mem::take(self);
let timestamp = this.timestamp.or(other.timestamp);
let total_received = this.total_received.saturating_add(other.total_received);
let total_succeeded = this.total_succeeded.saturating_add(other.total_succeeded);
let total_distinct_index_count =
this.total_distinct_index_count.saturating_add(other.total_distinct_index_count);
let total_single_index = this.total_single_index.saturating_add(other.total_single_index);
let total_search_count = this.total_search_count.saturating_add(other.total_search_count);
let mut user_agents = this.user_agents;
for user_agent in other.user_agents.into_iter() {
user_agents.insert(user_agent);
}
// need all fields or compile error
let mut aggregated = Self {
timestamp,
total_received,
total_succeeded,
total_distinct_index_count,
total_single_index,
total_search_count,
user_agents,
// do not add _ or ..Default::default() here
};
// replace the default self with the aggregated value
std::mem::swap(self, &mut aggregated);
}
pub fn into_event(self, user: &User, event_name: &str) -> Option<Track> {
if self.total_received == 0 {
None
} else {
let properties = json!({
"user-agent": self.user_agents,
"requests": {
"total_succeeded": self.total_succeeded,
"total_failed": self.total_received.saturating_sub(self.total_succeeded), // just to be sure we never panics
"total_received": self.total_received,
},
"indexes": {
"total_single_index": self.total_single_index,
"total_distinct_index_count": self.total_distinct_index_count,
"avg_distinct_index_count": (self.total_distinct_index_count as f64) / (self.total_received as f64), // not 0 else returned early
},
"searches": {
"total_search_count": self.total_search_count,
"avg_search_count": (self.total_search_count as f64) / (self.total_received as f64),
}
});
Some(Track {
timestamp: self.timestamp,
user: user.clone(),
event: event_name.to_string(),
properties,
..Default::default()
})
}
}
}
#[derive(Default)]
pub struct DocumentsAggregator {
timestamp: Option<OffsetDateTime>,

View File

@ -11,8 +11,6 @@ pub enum MeilisearchHttpError {
#[error("A Content-Type header is missing. Accepted values for the Content-Type header are: {}",
.0.iter().map(|s| format!("`{}`", s)).collect::<Vec<_>>().join(", "))]
MissingContentType(Vec<String>),
#[error("The Content-Type `{0}` does not support the use of a csv delimiter. The csv delimiter can only be used with the Content-Type `text/csv`.")]
CsvDelimiterWithWrongContentType(String),
#[error(
"The Content-Type `{0}` is invalid. Accepted values for the Content-Type header are: {}",
.1.iter().map(|s| format!("`{}`", s)).collect::<Vec<_>>().join(", ")
@ -54,7 +52,6 @@ impl ErrorCode for MeilisearchHttpError {
fn error_code(&self) -> Code {
match self {
MeilisearchHttpError::MissingContentType(_) => Code::MissingContentType,
MeilisearchHttpError::CsvDelimiterWithWrongContentType(_) => Code::InvalidContentType,
MeilisearchHttpError::MissingPayload(_) => Code::MissingPayload,
MeilisearchHttpError::InvalidContentType(_, _) => Code::InvalidContentType,
MeilisearchHttpError::DocumentNotFound(_) => Code::DocumentNotFound,

View File

@ -136,6 +136,13 @@ pub mod policies {
use crate::extractors::authentication::Policy;
enum TenantTokenOutcome {
NotATenantToken,
Invalid,
Expired,
Valid(Uuid, SearchRules),
}
fn tenant_token_validation() -> Validation {
let mut validation = Validation::default();
validation.validate_exp = false;
@ -164,29 +171,42 @@ pub mod policies {
pub struct ActionPolicy<const A: u8>;
impl<const A: u8> Policy for ActionPolicy<A> {
/// Attempts to grant authentication from a bearer token (that can be a tenant token or an API key), the requested Action,
/// and a list of requested indexes.
///
/// If the bearer token is not allowed for the specified indexes and action, returns `None`.
/// Otherwise, returns an object containing the generated permissions: the search filters to add to a search, and the list of allowed indexes
/// (that may contain more indexes than requested).
fn authenticate(
auth: AuthController,
token: &str,
index: Option<&str>,
) -> Option<AuthFilter> {
// authenticate if token is the master key.
// master key can only have access to keys routes.
// if master key is None only keys routes are inaccessible.
// Without a master key, all routes are accessible except the key-related routes.
if auth.get_master_key().map_or_else(|| !is_keys_action(A), |mk| mk == token) {
return Some(AuthFilter::default());
}
// Tenant token
if let Some(filters) = ActionPolicy::<A>::authenticate_tenant_token(&auth, token, index)
{
return Some(filters);
} else if let Some(action) = Action::from_repr(A) {
// API key
if let Ok(Some(uid)) = auth.get_optional_uid_from_encoded_key(token.as_bytes()) {
if let Ok(true) = auth.is_key_authorized(uid, action, index) {
return auth.get_key_filters(uid, None).ok();
let (key_uuid, search_rules) =
match ActionPolicy::<A>::authenticate_tenant_token(&auth, token) {
TenantTokenOutcome::Valid(key_uuid, search_rules) => {
(key_uuid, Some(search_rules))
}
}
TenantTokenOutcome::Expired => return None,
TenantTokenOutcome::Invalid => return None,
TenantTokenOutcome::NotATenantToken => {
(auth.get_optional_uid_from_encoded_key(token.as_bytes()).ok()??, None)
}
};
// check that the indexes are allowed
let action = Action::from_repr(A)?;
let auth_filter = auth.get_key_filters(key_uuid, search_rules).ok()?;
if auth.is_key_authorized(key_uuid, action, index).unwrap_or(false)
&& index.map(|index| auth_filter.is_index_authorized(index)).unwrap_or(true)
{
return Some(auth_filter);
}
None
@ -194,50 +214,43 @@ pub mod policies {
}
impl<const A: u8> ActionPolicy<A> {
fn authenticate_tenant_token(
auth: &AuthController,
token: &str,
index: Option<&str>,
) -> Option<AuthFilter> {
// A tenant token only has access to the search route which always defines an index.
let index = index?;
fn authenticate_tenant_token(auth: &AuthController, token: &str) -> TenantTokenOutcome {
// Only search action can be accessed by a tenant token.
if A != actions::SEARCH {
return None;
return TenantTokenOutcome::NotATenantToken;
}
let uid = extract_key_id(token)?;
// check if parent key is authorized to do the action.
if auth.is_key_authorized(uid, Action::Search, Some(index)).ok()? {
// Check if tenant token is valid.
let key = auth.generate_key(uid)?;
let data = decode::<Claims>(
token,
&DecodingKey::from_secret(key.as_bytes()),
&tenant_token_validation(),
)
.ok()?;
let uid = if let Some(uid) = extract_key_id(token) {
uid
} else {
return TenantTokenOutcome::NotATenantToken;
};
// Check index access if an index restriction is provided.
if !data.claims.search_rules.is_index_authorized(index) {
return None;
// Check if tenant token is valid.
let key = if let Some(key) = auth.generate_key(uid) {
key
} else {
return TenantTokenOutcome::Invalid;
};
let data = if let Ok(data) = decode::<Claims>(
token,
&DecodingKey::from_secret(key.as_bytes()),
&tenant_token_validation(),
) {
data
} else {
return TenantTokenOutcome::Invalid;
};
// Check if token is expired.
if let Some(exp) = data.claims.exp {
if OffsetDateTime::now_utc().unix_timestamp() > exp {
return TenantTokenOutcome::Expired;
}
// Check if token is expired.
if let Some(exp) = data.claims.exp {
if OffsetDateTime::now_utc().unix_timestamp() > exp {
return None;
}
}
return match auth.get_key_filters(uid, Some(data.claims.search_rules)) {
Ok(auth) if auth.search_rules.is_index_authorized(index) => Some(auth),
_ => None,
};
}
None
TenantTokenOutcome::Valid(uid, data.claims.search_rules)
}
}

View File

@ -10,10 +10,10 @@ use futures::StreamExt;
use index_scheduler::IndexScheduler;
use log::debug;
use meilisearch_types::deserr::query_params::Param;
use meilisearch_types::deserr::DeserrQueryParamError;
use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError};
use meilisearch_types::document_formats::{read_csv, read_json, read_ndjson, PayloadType};
use meilisearch_types::error::deserr_codes::*;
use meilisearch_types::error::{Code, ResponseError};
use meilisearch_types::error::ResponseError;
use meilisearch_types::heed::RoTxn;
use meilisearch_types::index_uid::IndexUid;
use meilisearch_types::milli::update::IndexDocumentsMethod;
@ -67,7 +67,7 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
cfg.service(
web::resource("")
.route(web::get().to(SeqHandler(get_all_documents)))
.route(web::post().to(SeqHandler(replace_documents)))
.route(web::post().to(SeqHandler(add_documents)))
.route(web::put().to(SeqHandler(update_documents)))
.route(web::delete().to(SeqHandler(clear_all_documents))),
)
@ -156,31 +156,16 @@ pub async fn get_all_documents(
}
#[derive(Deserialize, Debug, Deserr)]
#[deserr(error = DeserrQueryParamError, rename_all = camelCase, deny_unknown_fields)]
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
pub struct UpdateDocumentsQuery {
#[deserr(default, error = DeserrQueryParamError<InvalidIndexPrimaryKey>)]
#[deserr(default, error = DeserrJsonError<InvalidIndexPrimaryKey>)]
pub primary_key: Option<String>,
#[deserr(default, try_from(char) = from_char_csv_delimiter -> DeserrQueryParamError<InvalidIndexCsvDelimiter>, error = DeserrQueryParamError<InvalidIndexCsvDelimiter>)]
pub csv_delimiter: Option<u8>,
}
fn from_char_csv_delimiter(
c: char,
) -> Result<Option<u8>, DeserrQueryParamError<InvalidIndexCsvDelimiter>> {
if c.is_ascii() {
Ok(Some(c as u8))
} else {
Err(DeserrQueryParamError::new(
format!("csv delimiter must be an ascii character. Found: `{}`", c),
Code::InvalidIndexCsvDelimiter,
))
}
}
pub async fn replace_documents(
pub async fn add_documents(
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_ADD }>, Data<IndexScheduler>>,
index_uid: web::Path<String>,
params: AwebQueryParameter<UpdateDocumentsQuery, DeserrQueryParamError>,
params: AwebQueryParameter<UpdateDocumentsQuery, DeserrJsonError>,
body: Payload,
req: HttpRequest,
analytics: web::Data<dyn Analytics>,
@ -192,13 +177,12 @@ pub async fn replace_documents(
analytics.add_documents(&params, index_scheduler.index(&index_uid).is_err(), &req);
let allow_index_creation = index_scheduler.filters().allow_index_creation;
let allow_index_creation = index_scheduler.filters().allow_index_creation();
let task = document_addition(
extract_mime_type(&req)?,
index_scheduler,
index_uid,
params.primary_key,
params.csv_delimiter,
body,
IndexDocumentsMethod::ReplaceDocuments,
allow_index_creation,
@ -211,7 +195,7 @@ pub async fn replace_documents(
pub async fn update_documents(
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_ADD }>, Data<IndexScheduler>>,
index_uid: web::Path<String>,
params: AwebQueryParameter<UpdateDocumentsQuery, DeserrQueryParamError>,
params: AwebQueryParameter<UpdateDocumentsQuery, DeserrJsonError>,
body: Payload,
req: HttpRequest,
analytics: web::Data<dyn Analytics>,
@ -219,17 +203,15 @@ pub async fn update_documents(
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
debug!("called with params: {:?}", params);
let params = params.into_inner();
analytics.update_documents(&params, index_scheduler.index(&index_uid).is_err(), &req);
let allow_index_creation = index_scheduler.filters().allow_index_creation;
let allow_index_creation = index_scheduler.filters().allow_index_creation();
let task = document_addition(
extract_mime_type(&req)?,
index_scheduler,
index_uid,
params.primary_key,
params.csv_delimiter,
params.into_inner().primary_key,
body,
IndexDocumentsMethod::UpdateDocuments,
allow_index_creation,
@ -239,43 +221,26 @@ pub async fn update_documents(
Ok(HttpResponse::Accepted().json(task))
}
#[allow(clippy::too_many_arguments)]
async fn document_addition(
mime_type: Option<Mime>,
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_ADD }>, Data<IndexScheduler>>,
index_uid: IndexUid,
primary_key: Option<String>,
csv_delimiter: Option<u8>,
mut body: Payload,
method: IndexDocumentsMethod,
allow_index_creation: bool,
) -> Result<SummarizedTaskView, MeilisearchHttpError> {
let format = match (
mime_type.as_ref().map(|m| (m.type_().as_str(), m.subtype().as_str())),
csv_delimiter,
) {
(Some(("application", "json")), None) => PayloadType::Json,
(Some(("application", "x-ndjson")), None) => PayloadType::Ndjson,
(Some(("text", "csv")), None) => PayloadType::Csv { delimiter: b',' },
(Some(("text", "csv")), Some(delimiter)) => PayloadType::Csv { delimiter },
(Some(("application", "json")), Some(_)) => {
return Err(MeilisearchHttpError::CsvDelimiterWithWrongContentType(String::from(
"application/json",
)))
}
(Some(("application", "x-ndjson")), Some(_)) => {
return Err(MeilisearchHttpError::CsvDelimiterWithWrongContentType(String::from(
"application/x-ndjson",
)))
}
(Some((type_, subtype)), _) => {
let format = match mime_type.as_ref().map(|m| (m.type_().as_str(), m.subtype().as_str())) {
Some(("application", "json")) => PayloadType::Json,
Some(("application", "x-ndjson")) => PayloadType::Ndjson,
Some(("text", "csv")) => PayloadType::Csv,
Some((type_, subtype)) => {
return Err(MeilisearchHttpError::InvalidContentType(
format!("{}/{}", type_, subtype),
ACCEPTED_CONTENT_TYPE.clone(),
))
}
(None, _) => {
None => {
return Err(MeilisearchHttpError::MissingContentType(ACCEPTED_CONTENT_TYPE.clone()))
}
};
@ -320,9 +285,7 @@ async fn document_addition(
let documents_count = tokio::task::spawn_blocking(move || {
let documents_count = match format {
PayloadType::Json => read_json(&read_file, update_file.as_file_mut())?,
PayloadType::Csv { delimiter } => {
read_csv(&read_file, update_file.as_file_mut(), delimiter)?
}
PayloadType::Csv => read_csv(&read_file, update_file.as_file_mut())?,
PayloadType::Ndjson => read_ndjson(&read_file, update_file.as_file_mut())?,
};
// we NEED to persist the file here because we moved the `udpate_file` in another task.

View File

@ -89,11 +89,11 @@ pub async fn list_indexes(
index_scheduler: GuardedData<ActionPolicy<{ actions::INDEXES_GET }>, Data<IndexScheduler>>,
paginate: AwebQueryParameter<ListIndexes, DeserrQueryParamError>,
) -> Result<HttpResponse, ResponseError> {
let search_rules = &index_scheduler.filters().search_rules;
let filters = index_scheduler.filters();
let indexes: Vec<_> = index_scheduler.indexes()?;
let indexes = indexes
.into_iter()
.filter(|(name, _)| search_rules.is_index_authorized(name))
.filter(|(name, _)| filters.is_index_authorized(name))
.map(|(name, index)| IndexView::new(name, &index))
.collect::<Result<Vec<_>, _>>()?;
@ -120,7 +120,8 @@ pub async fn create_index(
) -> Result<HttpResponse, ResponseError> {
let IndexCreateRequest { primary_key, uid } = body.into_inner();
let allow_index_creation = index_scheduler.filters().search_rules.is_index_authorized(&uid);
// FIXME: allow_index_creation?
let allow_index_creation = index_scheduler.filters().is_index_authorized(&uid);
if allow_index_creation {
analytics.publish(
"Index Created".to_string(),

View File

@ -3,7 +3,6 @@ use actix_web::{web, HttpRequest, HttpResponse};
use deserr::actix_web::{AwebJson, AwebQueryParameter};
use index_scheduler::IndexScheduler;
use log::debug;
use meilisearch_auth::IndexSearchRules;
use meilisearch_types::deserr::query_params::Param;
use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError};
use meilisearch_types::error::deserr_codes::*;
@ -17,9 +16,9 @@ use crate::extractors::authentication::policies::*;
use crate::extractors::authentication::GuardedData;
use crate::extractors::sequential_extractor::SeqHandler;
use crate::search::{
perform_search, MatchingStrategy, SearchQuery, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT,
DEFAULT_SEARCH_OFFSET,
add_search_rules, perform_search, MatchingStrategy, SearchQuery, DEFAULT_CROP_LENGTH,
DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG,
DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET,
};
pub fn configure(cfg: &mut web::ServiceConfig) {
@ -101,26 +100,6 @@ impl From<SearchQueryGet> for SearchQuery {
}
}
/// Incorporate search rules in search query
fn add_search_rules(query: &mut SearchQuery, rules: IndexSearchRules) {
query.filter = match (query.filter.take(), rules.filter) {
(None, rules_filter) => rules_filter,
(filter, None) => filter,
(Some(filter), Some(rules_filter)) => {
let filter = match filter {
Value::Array(filter) => filter,
filter => vec![filter],
};
let rules_filter = match rules_filter {
Value::Array(rules_filter) => rules_filter,
rules_filter => vec![rules_filter],
};
Some(Value::Array([filter, rules_filter].concat()))
}
}
}
// TODO: TAMO: split on :asc, and :desc, instead of doing some weird things
/// Transform the sort query parameter into something that matches the post expected format.
@ -159,9 +138,7 @@ pub async fn search_with_url_query(
let mut query: SearchQuery = params.into_inner().into();
// Tenant token search_rules.
if let Some(search_rules) =
index_scheduler.filters().search_rules.get_index_search_rules(&index_uid)
{
if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid) {
add_search_rules(&mut query, search_rules);
}
@ -193,9 +170,7 @@ pub async fn search_with_post(
debug!("search called with params: {:?}", query);
// Tenant token search_rules.
if let Some(search_rules) =
index_scheduler.filters().search_rules.get_index_search_rules(&index_uid)
{
if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid) {
add_search_rules(&mut query, search_rules);
}

View File

@ -45,7 +45,7 @@ macro_rules! make_setting_route {
let new_settings = Settings { $attr: Setting::Reset.into(), ..Default::default() };
let allow_index_creation = index_scheduler.filters().allow_index_creation;
let allow_index_creation = index_scheduler.filters().allow_index_creation();
let task = KindWithContent::SettingsUpdate {
index_uid: index_uid.to_string(),
@ -86,7 +86,7 @@ macro_rules! make_setting_route {
..Default::default()
};
let allow_index_creation = index_scheduler.filters().allow_index_creation;
let allow_index_creation = index_scheduler.filters().allow_index_creation();
let task = KindWithContent::SettingsUpdate {
index_uid: index_uid.to_string(),
@ -560,7 +560,7 @@ pub async fn update_all(
Some(&req),
);
let allow_index_creation = index_scheduler.filters().allow_index_creation;
let allow_index_creation = index_scheduler.filters().allow_index_creation();
let index_uid = IndexUid::try_from(index_uid.into_inner())?.into_inner();
let task = KindWithContent::SettingsUpdate {
index_uid,
@ -596,7 +596,7 @@ pub async fn delete_all(
let new_settings = Settings::cleared().into_unchecked();
let allow_index_creation = index_scheduler.filters().allow_index_creation;
let allow_index_creation = index_scheduler.filters().allow_index_creation();
let index_uid = IndexUid::try_from(index_uid.into_inner())?.into_inner();
let task = KindWithContent::SettingsUpdate {
index_uid,

View File

@ -33,6 +33,7 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
.service(web::resource("/stats").route(web::get().to(get_stats)))
.service(web::resource("/version").route(web::get().to(get_version)))
.service(web::scope("/indexes").configure(indexes::configure))
.service(web::scope("/multi-search").configure(multi_search::configure))
.service(web::scope("/swap-indexes").configure(swap_indexes::configure));
}
@ -237,10 +238,9 @@ async fn get_stats(
analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {
analytics.publish("Stats Seen".to_string(), json!({ "per_index_uid": false }), Some(&req));
let search_rules = &index_scheduler.filters().search_rules;
let filters = index_scheduler.filters();
let stats =
create_all_stats((*index_scheduler).clone(), (*auth_controller).clone(), search_rules)?;
let stats = create_all_stats((*index_scheduler).clone(), (*auth_controller).clone(), filters)?;
debug!("returns: {:?}", stats);
Ok(HttpResponse::Ok().json(stats))
@ -249,19 +249,19 @@ async fn get_stats(
pub fn create_all_stats(
index_scheduler: Data<IndexScheduler>,
auth_controller: AuthController,
search_rules: &meilisearch_auth::SearchRules,
filters: &meilisearch_auth::AuthFilter,
) -> Result<Stats, ResponseError> {
let mut last_task: Option<OffsetDateTime> = None;
let mut indexes = BTreeMap::new();
let mut database_size = 0;
let processing_task = index_scheduler.get_tasks_from_authorized_indexes(
Query { statuses: Some(vec![Status::Processing]), limit: Some(1), ..Query::default() },
search_rules.authorized_indexes(),
filters,
)?;
// accumulate the size of each indexes
let processing_index = processing_task.first().and_then(|task| task.index_uid());
for (name, index) in index_scheduler.indexes()? {
if !search_rules.is_index_authorized(&name) {
if !filters.is_index_authorized(&name) {
continue;
}
@ -327,3 +327,5 @@ pub async fn get_health(
Ok(HttpResponse::Ok().json(serde_json::json!({ "status": "available" })))
}
mod multi_search;

View File

@ -0,0 +1,87 @@
use actix_web::web::{self, Data};
use actix_web::{HttpRequest, HttpResponse};
use deserr::actix_web::AwebJson;
use index_scheduler::IndexScheduler;
use log::debug;
use meilisearch_types::deserr::DeserrJsonError;
use meilisearch_types::error::ResponseError;
use meilisearch_types::keys::actions;
use serde::Serialize;
use crate::analytics::{Analytics, MultiSearchAggregator};
use crate::extractors::authentication::policies::ActionPolicy;
use crate::extractors::authentication::{AuthenticationError, GuardedData};
use crate::extractors::sequential_extractor::SeqHandler;
use crate::search::{
add_search_rules, perform_search, SearchQueryWithIndex, SearchResultWithIndex,
};
pub fn configure(cfg: &mut web::ServiceConfig) {
cfg.service(web::resource("").route(web::post().to(SeqHandler(multi_search_with_post))));
}
#[derive(Serialize)]
struct SearchResults {
results: Vec<SearchResultWithIndex>,
}
#[derive(Debug, deserr::Deserr)]
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
pub struct SearchQueries {
queries: Vec<SearchQueryWithIndex>,
}
pub async fn multi_search_with_post(
index_scheduler: GuardedData<ActionPolicy<{ actions::SEARCH }>, Data<IndexScheduler>>,
params: AwebJson<SearchQueries, DeserrJsonError>,
req: HttpRequest,
analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {
let queries = params.into_inner().queries;
let mut multi_aggregate = MultiSearchAggregator::from_queries(&queries, &req);
let search_results: Result<_, ResponseError> = (|| {
async {
let mut search_results = Vec::with_capacity(queries.len());
for (index_uid, mut query) in
queries.into_iter().map(SearchQueryWithIndex::into_index_query)
{
debug!("search called with params: {:?}", query);
// Check index from API key
if !index_scheduler.filters().is_index_authorized(&index_uid) {
return Err(AuthenticationError::InvalidToken.into());
}
// Apply search rules from tenant token
if let Some(search_rules) =
index_scheduler.filters().get_index_search_rules(&index_uid)
{
add_search_rules(&mut query, search_rules);
}
let index = index_scheduler.index(&index_uid)?;
let search_result =
tokio::task::spawn_blocking(move || perform_search(&index, query)).await?;
search_results.push(SearchResultWithIndex {
index_uid: index_uid.into_inner(),
result: search_result?,
});
}
Ok(search_results)
}
})()
.await;
if search_results.is_ok() {
multi_aggregate.succeed();
}
analytics.post_multi_search(multi_aggregate);
let search_results = search_results?;
debug!("returns: {:?}", search_results);
Ok(HttpResponse::Ok().json(SearchResults { results: search_results }))
}

View File

@ -42,7 +42,7 @@ pub async fn swap_indexes(
}),
Some(&req),
);
let search_rules = &index_scheduler.filters().search_rules;
let filters = index_scheduler.filters();
let mut swaps = vec![];
for SwapIndexesPayload { indexes } in params.into_iter() {
@ -53,7 +53,7 @@ pub async fn swap_indexes(
return Err(MeilisearchHttpError::SwapIndexPayloadWrongLength(indexes).into());
}
};
if !search_rules.is_index_authorized(lhs) || !search_rules.is_index_authorized(rhs) {
if !filters.is_index_authorized(lhs) || !filters.is_index_authorized(rhs) {
return Err(AuthenticationError::InvalidToken.into());
}
swaps.push(IndexSwap { indexes: (lhs.to_string(), rhs.to_string()) });

View File

@ -319,7 +319,7 @@ async fn cancel_tasks(
let tasks = index_scheduler.get_task_ids_from_authorized_indexes(
&index_scheduler.read_txn()?,
&query,
&index_scheduler.filters().search_rules.authorized_indexes(),
index_scheduler.filters(),
)?;
let task_cancelation =
KindWithContent::TaskCancelation { query: format!("?{}", req.query_string()), tasks };
@ -364,7 +364,7 @@ async fn delete_tasks(
let tasks = index_scheduler.get_task_ids_from_authorized_indexes(
&index_scheduler.read_txn()?,
&query,
&index_scheduler.filters().search_rules.authorized_indexes(),
index_scheduler.filters(),
)?;
let task_deletion =
KindWithContent::TaskDeletion { query: format!("?{}", req.query_string()), tasks };
@ -398,10 +398,7 @@ async fn get_tasks(
let query = params.into_query();
let mut tasks_results: Vec<TaskView> = index_scheduler
.get_tasks_from_authorized_indexes(
query,
index_scheduler.filters().search_rules.authorized_indexes(),
)?
.get_tasks_from_authorized_indexes(query, index_scheduler.filters())?
.into_iter()
.map(|t| TaskView::from_task(&t))
.collect();
@ -439,12 +436,8 @@ async fn get_task(
let query = index_scheduler::Query { uids: Some(vec![task_uid]), ..Query::default() };
if let Some(task) = index_scheduler
.get_tasks_from_authorized_indexes(
query,
index_scheduler.filters().search_rules.authorized_indexes(),
)?
.first()
if let Some(task) =
index_scheduler.get_tasks_from_authorized_indexes(query, index_scheduler.filters())?.first()
{
let task_view = TaskView::from_task(task);
Ok(HttpResponse::Ok().json(task_view))

View File

@ -5,8 +5,10 @@ use std::time::Instant;
use deserr::Deserr;
use either::Either;
use meilisearch_auth::IndexSearchRules;
use meilisearch_types::deserr::DeserrJsonError;
use meilisearch_types::error::deserr_codes::*;
use meilisearch_types::index_uid::IndexUid;
use meilisearch_types::settings::DEFAULT_PAGINATION_MAX_TOTAL_HITS;
use meilisearch_types::{milli, Document};
use milli::tokenizer::TokenizerBuilder;
@ -74,6 +76,100 @@ impl SearchQuery {
}
}
/// A `SearchQuery` + an index UID.
// This struct contains the fields of `SearchQuery` inline.
// This is because neither deserr nor serde support `flatten` when using `deny_unknown_fields.
// The `From<SearchQueryWithIndex>` implementation ensures both structs remain up to date.
#[derive(Debug, Clone, PartialEq, Eq, Deserr)]
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
pub struct SearchQueryWithIndex {
#[deserr(error = DeserrJsonError<InvalidIndexUid>, missing_field_error = DeserrJsonError::missing_index_uid)]
pub index_uid: IndexUid,
#[deserr(default, error = DeserrJsonError<InvalidSearchQ>)]
pub q: Option<String>,
#[deserr(default = DEFAULT_SEARCH_OFFSET(), error = DeserrJsonError<InvalidSearchOffset>)]
pub offset: usize,
#[deserr(default = DEFAULT_SEARCH_LIMIT(), error = DeserrJsonError<InvalidSearchLimit>)]
pub limit: usize,
#[deserr(default, error = DeserrJsonError<InvalidSearchPage>)]
pub page: Option<usize>,
#[deserr(default, error = DeserrJsonError<InvalidSearchHitsPerPage>)]
pub hits_per_page: Option<usize>,
#[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToRetrieve>)]
pub attributes_to_retrieve: Option<BTreeSet<String>>,
#[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToCrop>)]
pub attributes_to_crop: Option<Vec<String>>,
#[deserr(default, error = DeserrJsonError<InvalidSearchCropLength>, default = DEFAULT_CROP_LENGTH())]
pub crop_length: usize,
#[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToHighlight>)]
pub attributes_to_highlight: Option<HashSet<String>>,
#[deserr(default, error = DeserrJsonError<InvalidSearchShowMatchesPosition>, default)]
pub show_matches_position: bool,
#[deserr(default, error = DeserrJsonError<InvalidSearchFilter>)]
pub filter: Option<Value>,
#[deserr(default, error = DeserrJsonError<InvalidSearchSort>)]
pub sort: Option<Vec<String>>,
#[deserr(default, error = DeserrJsonError<InvalidSearchFacets>)]
pub facets: Option<Vec<String>>,
#[deserr(default, error = DeserrJsonError<InvalidSearchHighlightPreTag>, default = DEFAULT_HIGHLIGHT_PRE_TAG())]
pub highlight_pre_tag: String,
#[deserr(default, error = DeserrJsonError<InvalidSearchHighlightPostTag>, default = DEFAULT_HIGHLIGHT_POST_TAG())]
pub highlight_post_tag: String,
#[deserr(default, error = DeserrJsonError<InvalidSearchCropMarker>, default = DEFAULT_CROP_MARKER())]
pub crop_marker: String,
#[deserr(default, error = DeserrJsonError<InvalidSearchMatchingStrategy>, default)]
pub matching_strategy: MatchingStrategy,
}
impl SearchQueryWithIndex {
pub fn into_index_query(self) -> (IndexUid, SearchQuery) {
let SearchQueryWithIndex {
index_uid,
q,
offset,
limit,
page,
hits_per_page,
attributes_to_retrieve,
attributes_to_crop,
crop_length,
attributes_to_highlight,
show_matches_position,
filter,
sort,
facets,
highlight_pre_tag,
highlight_post_tag,
crop_marker,
matching_strategy,
} = self;
(
index_uid,
SearchQuery {
q,
offset,
limit,
page,
hits_per_page,
attributes_to_retrieve,
attributes_to_crop,
crop_length,
attributes_to_highlight,
show_matches_position,
filter,
sort,
facets,
highlight_pre_tag,
highlight_post_tag,
crop_marker,
matching_strategy,
// do not use ..Default::default() here,
// rather add any missing field from `SearchQuery` to `SearchQueryWithIndex`
},
)
}
}
#[derive(Debug, Clone, PartialEq, Eq, Deserr)]
#[deserr(rename_all = camelCase)]
pub enum MatchingStrategy {
@ -120,6 +216,14 @@ pub struct SearchResult {
pub facet_distribution: Option<BTreeMap<String, BTreeMap<String, u64>>>,
}
#[derive(Serialize, Debug, Clone, PartialEq, Eq)]
#[serde(rename_all = "camelCase")]
pub struct SearchResultWithIndex {
pub index_uid: String,
#[serde(flatten)]
pub result: SearchResult,
}
#[derive(Serialize, Debug, Clone, PartialEq, Eq)]
#[serde(untagged)]
pub enum HitsInfo {
@ -129,6 +233,26 @@ pub enum HitsInfo {
OffsetLimit { limit: usize, offset: usize, estimated_total_hits: usize },
}
/// Incorporate search rules in search query
pub fn add_search_rules(query: &mut SearchQuery, rules: IndexSearchRules) {
query.filter = match (query.filter.take(), rules.filter) {
(None, rules_filter) => rules_filter,
(filter, None) => filter,
(Some(filter), Some(rules_filter)) => {
let filter = match filter {
Value::Array(filter) => filter,
filter => vec![filter],
};
let rules_filter = match rules_filter {
Value::Array(rules_filter) => rules_filter,
rules_filter => vec![rules_filter],
};
Some(Value::Array([filter, rules_filter].concat()))
}
}
}
pub fn perform_search(
index: &Index,
query: SearchQuery,

View File

@ -11,6 +11,7 @@ use crate::common::Server;
pub static AUTHORIZATIONS: Lazy<HashMap<(&'static str, &'static str), HashSet<&'static str>>> =
Lazy::new(|| {
let mut authorizations = hashmap! {
("POST", "/multi-search") => hashset!{"search", "*"},
("POST", "/indexes/products/search") => hashset!{"search", "*"},
("GET", "/indexes/products/search") => hashset!{"search", "*"},
("POST", "/indexes/products/documents") => hashset!{"documents.add", "documents.*", "*"},

View File

@ -4,6 +4,8 @@ mod errors;
mod payload;
mod tenant_token;
mod tenant_token_multi_search;
use actix_web::http::StatusCode;
use serde_json::{json, Value};

File diff suppressed because it is too large Load Diff

View File

@ -30,7 +30,7 @@ impl Index<'_> {
.post_str(
url,
include_str!("../assets/test_set.json"),
vec![("content-type", "application/json")],
("content-type", "application/json"),
)
.await;
assert_eq!(code, 202);
@ -46,7 +46,7 @@ impl Index<'_> {
.post_str(
url,
include_str!("../assets/test_set.ndjson"),
vec![("content-type", "application/x-ndjson")],
("content-type", "application/x-ndjson"),
)
.await;
assert_eq!(code, 202);
@ -96,21 +96,6 @@ impl Index<'_> {
self.service.post_encoded(url, documents, self.encoder).await
}
pub async fn raw_add_documents(
&self,
payload: &str,
content_type: Option<&str>,
query_parameter: &str,
) -> (Value, StatusCode) {
let url = format!("/indexes/{}/documents{}", urlencode(self.uid.as_ref()), query_parameter);
if let Some(content_type) = content_type {
self.service.post_str(url, payload, vec![("Content-Type", content_type)]).await
} else {
self.service.post_str(url, payload, Vec::new()).await
}
}
pub async fn update_documents(
&self,
documents: Value,
@ -125,21 +110,6 @@ impl Index<'_> {
self.service.put_encoded(url, documents, self.encoder).await
}
pub async fn raw_update_documents(
&self,
payload: &str,
content_type: Option<&str>,
query_parameter: &str,
) -> (Value, StatusCode) {
let url = format!("/indexes/{}/documents{}", urlencode(self.uid.as_ref()), query_parameter);
if let Some(content_type) = content_type {
self.service.put_str(url, payload, vec![("Content-Type", content_type)]).await
} else {
self.service.put_str(url, payload, Vec::new()).await
}
}
pub async fn wait_task(&self, update_id: u64) -> Value {
// try several times to get status, or panic to not wait forever
let url = format!("/tasks/{}", update_id);

View File

@ -103,6 +103,10 @@ impl Server {
Index { uid: uid.as_ref().to_string(), service: &self.service, encoder }
}
pub async fn multi_search(&self, queries: Value) -> (Value, StatusCode) {
self.service.post("/multi-search", queries).await
}
pub async fn list_indexes_raw(&self, parameters: &str) -> (Value, StatusCode) {
self.service.get(format!("/indexes{parameters}")).await
}

View File

@ -34,18 +34,17 @@ impl Service {
self.request(req).await
}
/// Send a test post request from a text body.
/// Send a test post request from a text body, with a `content-type:application/json` header.
pub async fn post_str(
&self,
url: impl AsRef<str>,
body: impl AsRef<str>,
headers: Vec<(&str, &str)>,
header: (&str, &str),
) -> (Value, StatusCode) {
let mut req =
test::TestRequest::post().uri(url.as_ref()).set_payload(body.as_ref().to_string());
for header in headers {
req = req.insert_header(header);
}
let req = test::TestRequest::post()
.uri(url.as_ref())
.set_payload(body.as_ref().to_string())
.insert_header(header);
self.request(req).await
}
@ -58,21 +57,6 @@ impl Service {
self.put_encoded(url, body, Encoder::Plain).await
}
/// Send a test put request from a text body.
pub async fn put_str(
&self,
url: impl AsRef<str>,
body: impl AsRef<str>,
headers: Vec<(&str, &str)>,
) -> (Value, StatusCode) {
let mut req =
test::TestRequest::put().uri(url.as_ref()).set_payload(body.as_ref().to_string());
for header in headers {
req = req.insert_header(header);
}
self.request(req).await
}
pub async fn put_encoded(
&self,
url: impl AsRef<str>,

View File

@ -216,133 +216,6 @@ async fn add_single_document_with_every_encoding() {
}
}
#[actix_rt::test]
async fn add_csv_document() {
let server = Server::new().await;
let index = server.index("pets");
let document = "#id,name,race
0,jean,bernese mountain
1,jorts,orange cat";
let (response, code) = index.raw_update_documents(document, Some("text/csv"), "").await;
snapshot!(code, @"202 Accepted");
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
{
"taskUid": 0,
"indexUid": "pets",
"status": "enqueued",
"type": "documentAdditionOrUpdate",
"enqueuedAt": "[date]"
}
"###);
let response = index.wait_task(response["taskUid"].as_u64().unwrap()).await;
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]" }), @r###"
{
"uid": 0,
"indexUid": "pets",
"status": "succeeded",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 2,
"indexedDocuments": 2
},
"error": null,
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
let (documents, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await;
snapshot!(code, @"200 OK");
snapshot!(json_string!(documents), @r###"
{
"results": [
{
"#id": "0",
"name": "jean",
"race": "bernese mountain"
},
{
"#id": "1",
"name": "jorts",
"race": "orange cat"
}
],
"offset": 0,
"limit": 20,
"total": 2
}
"###);
}
#[actix_rt::test]
async fn add_csv_document_with_custom_delimiter() {
let server = Server::new().await;
let index = server.index("pets");
let document = "#id|name|race
0|jean|bernese mountain
1|jorts|orange cat";
let (response, code) =
index.raw_update_documents(document, Some("text/csv"), "?csvDelimiter=|").await;
snapshot!(code, @"202 Accepted");
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
{
"taskUid": 0,
"indexUid": "pets",
"status": "enqueued",
"type": "documentAdditionOrUpdate",
"enqueuedAt": "[date]"
}
"###);
let response = index.wait_task(response["taskUid"].as_u64().unwrap()).await;
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]" }), @r###"
{
"uid": 0,
"indexUid": "pets",
"status": "succeeded",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 2,
"indexedDocuments": 2
},
"error": null,
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
let (documents, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await;
snapshot!(code, @"200 OK");
snapshot!(json_string!(documents), @r###"
{
"results": [
{
"#id": "0",
"name": "jean",
"race": "bernese mountain"
},
{
"#id": "1",
"name": "jorts",
"race": "orange cat"
}
],
"offset": 0,
"limit": 20,
"total": 2
}
"###);
}
/// any other content-type is must be refused
#[actix_rt::test]
async fn error_add_documents_test_bad_content_types() {
@ -1154,53 +1027,6 @@ async fn error_document_field_limit_reached() {
@"");
}
#[actix_rt::test]
async fn add_documents_with_geo_field() {
let server = Server::new().await;
let index = server.index("doggo");
index.update_settings(json!({"sortableAttributes": ["_geo"]})).await;
let documents = json!([
{
"id": "1",
},
{
"id": "2",
"_geo": null,
},
{
"id": "3",
"_geo": { "lat": 1, "lng": 1 },
},
{
"id": "4",
"_geo": { "lat": "1", "lng": "1" },
},
]);
index.add_documents(documents, None).await;
let response = index.wait_task(1).await;
snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }),
@r###"
{
"uid": 1,
"indexUid": "doggo",
"status": "succeeded",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 4,
"indexedDocuments": 4
},
"error": null,
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
}
#[actix_rt::test]
async fn add_documents_invalid_geo_field() {
let server = Server::new().await;

View File

@ -1,6 +1,5 @@
use meili_snap::*;
use serde_json::json;
use urlencoding::encode;
use crate::common::Server;
@ -98,323 +97,3 @@ async fn delete_documents_batch() {
}
"###);
}
#[actix_rt::test]
async fn replace_documents_missing_payload() {
let server = Server::new().await;
let index = server.index("test");
let (response, code) = index.raw_add_documents("", Some("application/json"), "").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "A json payload is missing.",
"code": "missing_payload",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#missing_payload"
}
"###);
let (response, code) = index.raw_add_documents("", Some("application/x-ndjson"), "").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "A ndjson payload is missing.",
"code": "missing_payload",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#missing_payload"
}
"###);
let (response, code) = index.raw_add_documents("", Some("text/csv"), "").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "A csv payload is missing.",
"code": "missing_payload",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#missing_payload"
}
"###);
}
#[actix_rt::test]
async fn update_documents_missing_payload() {
let server = Server::new().await;
let index = server.index("test");
let (response, code) = index.raw_update_documents("", Some("application/json"), "").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "A json payload is missing.",
"code": "missing_payload",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#missing_payload"
}
"###);
let (response, code) = index.raw_update_documents("", Some("application/x-ndjson"), "").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "A ndjson payload is missing.",
"code": "missing_payload",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#missing_payload"
}
"###);
let (response, code) = index.raw_update_documents("", Some("text/csv"), "").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "A csv payload is missing.",
"code": "missing_payload",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#missing_payload"
}
"###);
}
#[actix_rt::test]
async fn replace_documents_missing_content_type() {
let server = Server::new().await;
let index = server.index("test");
let (response, code) = index.raw_add_documents("", None, "").await;
snapshot!(code, @"415 Unsupported Media Type");
snapshot!(json_string!(response), @r###"
{
"message": "A Content-Type header is missing. Accepted values for the Content-Type header are: `application/json`, `application/x-ndjson`, `text/csv`",
"code": "missing_content_type",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#missing_content_type"
}
"###);
// even with a csv delimiter specified this error is triggered first
let (response, code) = index.raw_add_documents("", None, "?csvDelimiter=;").await;
snapshot!(code, @"415 Unsupported Media Type");
snapshot!(json_string!(response), @r###"
{
"message": "A Content-Type header is missing. Accepted values for the Content-Type header are: `application/json`, `application/x-ndjson`, `text/csv`",
"code": "missing_content_type",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#missing_content_type"
}
"###);
}
#[actix_rt::test]
async fn update_documents_missing_content_type() {
let server = Server::new().await;
let index = server.index("test");
let (response, code) = index.raw_update_documents("", None, "").await;
snapshot!(code, @"415 Unsupported Media Type");
snapshot!(json_string!(response), @r###"
{
"message": "A Content-Type header is missing. Accepted values for the Content-Type header are: `application/json`, `application/x-ndjson`, `text/csv`",
"code": "missing_content_type",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#missing_content_type"
}
"###);
// even with a csv delimiter specified this error is triggered first
let (response, code) = index.raw_update_documents("", None, "?csvDelimiter=;").await;
snapshot!(code, @"415 Unsupported Media Type");
snapshot!(json_string!(response), @r###"
{
"message": "A Content-Type header is missing. Accepted values for the Content-Type header are: `application/json`, `application/x-ndjson`, `text/csv`",
"code": "missing_content_type",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#missing_content_type"
}
"###);
}
#[actix_rt::test]
async fn replace_documents_bad_content_type() {
let server = Server::new().await;
let index = server.index("test");
let (response, code) = index.raw_add_documents("", Some("doggo"), "").await;
snapshot!(code, @"415 Unsupported Media Type");
snapshot!(json_string!(response), @r###"
{
"message": "The Content-Type `doggo` is invalid. Accepted values for the Content-Type header are: `application/json`, `application/x-ndjson`, `text/csv`",
"code": "invalid_content_type",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_content_type"
}
"###);
}
#[actix_rt::test]
async fn update_documents_bad_content_type() {
let server = Server::new().await;
let index = server.index("test");
let (response, code) = index.raw_update_documents("", Some("doggo"), "").await;
snapshot!(code, @"415 Unsupported Media Type");
snapshot!(json_string!(response), @r###"
{
"message": "The Content-Type `doggo` is invalid. Accepted values for the Content-Type header are: `application/json`, `application/x-ndjson`, `text/csv`",
"code": "invalid_content_type",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_content_type"
}
"###);
}
#[actix_rt::test]
async fn replace_documents_bad_csv_delimiter() {
let server = Server::new().await;
let index = server.index("test");
let (response, code) =
index.raw_add_documents("", Some("application/json"), "?csvDelimiter").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Invalid value in parameter `csvDelimiter`: expected a string of one character, but found an empty string",
"code": "invalid_index_csv_delimiter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_index_csv_delimiter"
}
"###);
let (response, code) =
index.raw_add_documents("", Some("application/json"), "?csvDelimiter=doggo").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Invalid value in parameter `csvDelimiter`: expected a string of one character, but found the following string of 5 characters: `doggo`",
"code": "invalid_index_csv_delimiter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_index_csv_delimiter"
}
"###);
let (response, code) = index
.raw_add_documents("", Some("application/json"), &format!("?csvDelimiter={}", encode("🍰")))
.await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "csv delimiter must be an ascii character. Found: `🍰`",
"code": "invalid_index_csv_delimiter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_index_csv_delimiter"
}
"###);
}
#[actix_rt::test]
async fn update_documents_bad_csv_delimiter() {
let server = Server::new().await;
let index = server.index("test");
let (response, code) =
index.raw_update_documents("", Some("application/json"), "?csvDelimiter").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Invalid value in parameter `csvDelimiter`: expected a string of one character, but found an empty string",
"code": "invalid_index_csv_delimiter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_index_csv_delimiter"
}
"###);
let (response, code) =
index.raw_update_documents("", Some("application/json"), "?csvDelimiter=doggo").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Invalid value in parameter `csvDelimiter`: expected a string of one character, but found the following string of 5 characters: `doggo`",
"code": "invalid_index_csv_delimiter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_index_csv_delimiter"
}
"###);
let (response, code) = index
.raw_update_documents(
"",
Some("application/json"),
&format!("?csvDelimiter={}", encode("🍰")),
)
.await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "csv delimiter must be an ascii character. Found: `🍰`",
"code": "invalid_index_csv_delimiter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_index_csv_delimiter"
}
"###);
}
#[actix_rt::test]
async fn replace_documents_csv_delimiter_with_bad_content_type() {
let server = Server::new().await;
let index = server.index("test");
let (response, code) =
index.raw_add_documents("", Some("application/json"), "?csvDelimiter=a").await;
snapshot!(code, @"415 Unsupported Media Type");
snapshot!(json_string!(response), @r###"
{
"message": "The Content-Type `application/json` does not support the use of a csv delimiter. The csv delimiter can only be used with the Content-Type `text/csv`.",
"code": "invalid_content_type",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_content_type"
}
"###);
let (response, code) =
index.raw_add_documents("", Some("application/x-ndjson"), "?csvDelimiter=a").await;
snapshot!(code, @"415 Unsupported Media Type");
snapshot!(json_string!(response), @r###"
{
"message": "The Content-Type `application/x-ndjson` does not support the use of a csv delimiter. The csv delimiter can only be used with the Content-Type `text/csv`.",
"code": "invalid_content_type",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_content_type"
}
"###);
}
#[actix_rt::test]
async fn update_documents_csv_delimiter_with_bad_content_type() {
let server = Server::new().await;
let index = server.index("test");
let (response, code) =
index.raw_update_documents("", Some("application/json"), "?csvDelimiter=a").await;
snapshot!(code, @"415 Unsupported Media Type");
snapshot!(json_string!(response), @r###"
{
"message": "The Content-Type `application/json` does not support the use of a csv delimiter. The csv delimiter can only be used with the Content-Type `text/csv`.",
"code": "invalid_content_type",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_content_type"
}
"###);
let (response, code) =
index.raw_update_documents("", Some("application/x-ndjson"), "?csvDelimiter=a").await;
snapshot!(code, @"415 Unsupported Media Type");
snapshot!(json_string!(response), @r###"
{
"message": "The Content-Type `application/x-ndjson` does not support the use of a csv delimiter. The csv delimiter can only be used with the Content-Type `text/csv`.",
"code": "invalid_content_type",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_content_type"
}
"###);
}

View File

@ -3,6 +3,7 @@
mod errors;
mod formatted;
mod multi;
mod pagination;
use once_cell::sync::Lazy;

View File

@ -0,0 +1,343 @@
use meili_snap::{json_string, snapshot};
use serde_json::json;
use super::{DOCUMENTS, NESTED_DOCUMENTS};
use crate::common::Server;
#[actix_rt::test]
async fn search_empty_list() {
let server = Server::new().await;
let (response, code) = server.multi_search(json!({"queries": []})).await;
snapshot!(code, @"200 OK");
snapshot!(json_string!(response), @r###"
{
"results": []
}
"###);
}
#[actix_rt::test]
async fn search_json_object() {
let server = Server::new().await;
let (response, code) = server.multi_search(json!({})).await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Missing field `queries`",
"code": "bad_request",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request"
}
"###);
}
#[actix_rt::test]
async fn search_json_array() {
let server = Server::new().await;
let (response, code) = server.multi_search(json!([])).await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Invalid value type: expected an object, but found an array: `[]`",
"code": "bad_request",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request"
}
"###);
}
#[actix_rt::test]
async fn simple_search_single_index() {
let server = Server::new().await;
let index = server.index("test");
let documents = DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(0).await;
let (response, code) = server
.multi_search(json!({"queries": [
{"indexUid" : "test", "q": "glass"},
{"indexUid": "test", "q": "captain"},
]}))
.await;
snapshot!(code, @"200 OK");
insta::assert_json_snapshot!(response["results"], { "[].processingTimeMs" => "[time]" }, @r###"
[
{
"indexUid": "test",
"hits": [
{
"title": "Glass",
"id": "450465"
}
],
"query": "glass",
"processingTimeMs": "[time]",
"limit": 20,
"offset": 0,
"estimatedTotalHits": 1
},
{
"indexUid": "test",
"hits": [
{
"title": "Captain Marvel",
"id": "299537"
}
],
"query": "captain",
"processingTimeMs": "[time]",
"limit": 20,
"offset": 0,
"estimatedTotalHits": 1
}
]
"###);
}
#[actix_rt::test]
async fn simple_search_missing_index_uid() {
let server = Server::new().await;
let index = server.index("test");
let documents = DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(0).await;
let (response, code) = server
.multi_search(json!({"queries": [
{"q": "glass"},
]}))
.await;
snapshot!(code, @"400 Bad Request");
insta::assert_json_snapshot!(response, @r###"
{
"message": "Missing field `indexUid` inside `.queries[0]`",
"code": "missing_index_uid",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#missing_index_uid"
}
"###);
}
#[actix_rt::test]
async fn simple_search_illegal_index_uid() {
let server = Server::new().await;
let index = server.index("test");
let documents = DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(0).await;
let (response, code) = server
.multi_search(json!({"queries": [
{"indexUid": "hé", "q": "glass"},
]}))
.await;
snapshot!(code, @"400 Bad Request");
insta::assert_json_snapshot!(response, @r###"
{
"message": "Invalid value at `.queries[0].indexUid`: `hé` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_).",
"code": "invalid_index_uid",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_index_uid"
}
"###);
}
#[actix_rt::test]
async fn simple_search_two_indexes() {
let server = Server::new().await;
let index = server.index("test");
let documents = DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(0).await;
let index = server.index("nested");
let documents = NESTED_DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(1).await;
let (response, code) = server
.multi_search(json!({"queries": [
{"indexUid" : "test", "q": "glass"},
{"indexUid": "nested", "q": "pesti"},
]}))
.await;
snapshot!(code, @"200 OK");
insta::assert_json_snapshot!(response["results"], { "[].processingTimeMs" => "[time]" }, @r###"
[
{
"indexUid": "test",
"hits": [
{
"title": "Glass",
"id": "450465"
}
],
"query": "glass",
"processingTimeMs": "[time]",
"limit": 20,
"offset": 0,
"estimatedTotalHits": 1
},
{
"indexUid": "nested",
"hits": [
{
"id": 852,
"father": "jean",
"mother": "michelle",
"doggos": [
{
"name": "bobby",
"age": 2
},
{
"name": "buddy",
"age": 4
}
],
"cattos": "pesti"
},
{
"id": 654,
"father": "pierre",
"mother": "sabine",
"doggos": [
{
"name": "gros bill",
"age": 8
}
],
"cattos": [
"simba",
"pestiféré"
]
}
],
"query": "pesti",
"processingTimeMs": "[time]",
"limit": 20,
"offset": 0,
"estimatedTotalHits": 2
}
]
"###);
}
#[actix_rt::test]
async fn search_one_index_doesnt_exist() {
let server = Server::new().await;
let index = server.index("test");
let documents = DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(0).await;
let (response, code) = server
.multi_search(json!({"queries": [
{"indexUid" : "test", "q": "glass"},
{"indexUid": "nested", "q": "pesti"},
]}))
.await;
snapshot!(code, @"404 Not Found");
snapshot!(json_string!(response), @r###"
{
"message": "Index `nested` not found.",
"code": "index_not_found",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#index_not_found"
}
"###);
}
#[actix_rt::test]
async fn search_multiple_indexes_dont_exist() {
let server = Server::new().await;
let (response, code) = server
.multi_search(json!({"queries": [
{"indexUid" : "test", "q": "glass"},
{"indexUid": "nested", "q": "pesti"},
]}))
.await;
snapshot!(code, @"404 Not Found");
snapshot!(json_string!(response), @r###"
{
"message": "Index `test` not found.",
"code": "index_not_found",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#index_not_found"
}
"###);
}
#[actix_rt::test]
async fn search_one_query_error() {
let server = Server::new().await;
let index = server.index("test");
let documents = DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(0).await;
let index = server.index("nested");
let documents = NESTED_DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(1).await;
let (response, code) = server
.multi_search(json!({"queries": [
{"indexUid" : "test", "q": "glass", "facets": ["title"]},
{"indexUid": "nested", "q": "pesti"},
]}))
.await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Invalid facet distribution, this index does not have configured filterable attributes.",
"code": "invalid_search_facets",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_facets"
}
"###);
}
#[actix_rt::test]
async fn search_multiple_query_errors() {
let server = Server::new().await;
let index = server.index("test");
let documents = DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(0).await;
let index = server.index("nested");
let documents = NESTED_DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(1).await;
let (response, code) = server
.multi_search(json!({"queries": [
{"indexUid" : "test", "q": "glass", "facets": ["title"]},
{"indexUid": "nested", "q": "pesti", "facets": ["doggos"]},
]}))
.await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Invalid facet distribution, this index does not have configured filterable attributes.",
"code": "invalid_search_facets",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_facets"
}
"###);
}

View File

@ -19,7 +19,7 @@ byteorder = "1.4.3"
charabia = { version = "0.7.0", default-features = false }
concat-arrays = "0.1.2"
crossbeam-channel = "0.5.6"
deserr = "0.5.0"
deserr = "0.4.1"
either = "1.8.0"
flatten-serde-json = { path = "../flatten-serde-json" }
fst = "0.4.7"

View File

@ -6,7 +6,7 @@ use roaring::RoaringBitmap;
use crate::facet::FacetType;
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupValue};
use crate::{make_db_snap_from_iter, obkv_to_json, ExternalDocumentsIds, Index};
use crate::{make_db_snap_from_iter, ExternalDocumentsIds, Index};
#[track_caller]
pub fn default_db_snapshot_settings_for_test(name: Option<&str>) -> (insta::Settings, String) {
@ -427,26 +427,8 @@ pub fn snap_settings(index: &Index) -> String {
snap
}
pub fn snap_documents(index: &Index) -> String {
let mut snap = String::new();
let rtxn = index.read_txn().unwrap();
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
let display = fields_ids_map.ids().collect::<Vec<_>>();
for document in index.all_documents(&rtxn).unwrap() {
let doc = obkv_to_json(&display, &fields_ids_map, document.unwrap().1).unwrap();
snap.push_str(&serde_json::to_string(&doc).unwrap());
snap.push('\n');
}
snap
}
#[macro_export]
macro_rules! full_snap_of_db {
($index:ident, documents) => {{
$crate::snapshot_tests::snap_documents(&$index)
}};
($index:ident, settings) => {{
$crate::snapshot_tests::snap_settings(&$index)
}};

View File

@ -395,7 +395,6 @@ pub fn validate_geo_from_json(id: &DocumentId, bytes: &[u8]) -> Result<StdResult
(Some(_), None) => Ok(Err(MissingLongitude { document_id: debug_id() })),
(None, None) => Ok(Err(MissingLatitudeAndLongitude { document_id: debug_id() })),
},
Value::Null => Ok(Ok(())),
value => Ok(Err(NotAnObject { document_id: debug_id(), value })),
}
}

View File

@ -59,7 +59,6 @@ pub fn extract_geo_points<R: io::Read + io::Seek>(
} else if lat.is_some() && lng.is_none() {
return Err(GeoError::MissingLongitude { document_id: document_id() })?;
}
// else => the _geo object was `null`, there is nothing to do
}
writer_into_reader(writer)

View File

@ -6,7 +6,6 @@ use roaring::RoaringBitmap;
use super::read_u32_ne_bytes;
use crate::heed_codec::CboRoaringBitmapCodec;
use crate::update::index_documents::transform::Operation;
use crate::Result;
pub type MergeFn = for<'a> fn(&[u8], &[Cow<'a, [u8]>]) -> Result<Cow<'a, [u8]>>;
@ -58,6 +57,21 @@ pub fn keep_latest_obkv<'a>(_key: &[u8], obkvs: &[Cow<'a, [u8]>]) -> Result<Cow<
Ok(obkvs.last().unwrap().clone())
}
/// Merge all the obks in the order we see them.
pub fn merge_obkvs<'a>(_key: &[u8], obkvs: &[Cow<'a, [u8]>]) -> Result<Cow<'a, [u8]>> {
Ok(obkvs
.iter()
.cloned()
.reduce(|acc, current| {
let first = obkv::KvReader::new(&acc);
let second = obkv::KvReader::new(&current);
let mut buffer = Vec::new();
merge_two_obkvs(first, second, &mut buffer);
Cow::from(buffer)
})
.unwrap())
}
pub fn merge_two_obkvs(base: obkv::KvReaderU16, update: obkv::KvReaderU16, buffer: &mut Vec<u8>) {
use itertools::merge_join_by;
use itertools::EitherOrBoth::{Both, Left, Right};
@ -74,41 +88,6 @@ pub fn merge_two_obkvs(base: obkv::KvReaderU16, update: obkv::KvReaderU16, buffe
writer.finish().unwrap();
}
/// Merge all the obks in the order we see them.
pub fn merge_obkvs_and_operations<'a>(
_key: &[u8],
obkvs: &[Cow<'a, [u8]>],
) -> Result<Cow<'a, [u8]>> {
// [add, add, delete, add, add]
// we can ignore everything that happened before the last delete.
let starting_position =
obkvs.iter().rposition(|obkv| obkv[0] == Operation::Deletion as u8).unwrap_or(0);
// [add, add, delete]
// if the last operation was a deletion then we simply return the deletion
if starting_position == obkvs.len() - 1 && obkvs.last().unwrap()[0] == Operation::Deletion as u8
{
return Ok(obkvs[obkvs.len() - 1].clone());
}
let mut buffer = Vec::new();
// (add, add, delete) [add, add]
// in the other case, no deletion will be encountered during the merge
let mut ret =
obkvs[starting_position..].iter().cloned().fold(Vec::new(), |mut acc, current| {
let first = obkv::KvReader::new(&acc);
let second = obkv::KvReader::new(&current[1..]);
merge_two_obkvs(first, second, &mut buffer);
// we want the result of the merge into our accumulator
std::mem::swap(&mut acc, &mut buffer);
acc
});
ret.insert(0, Operation::Addition as u8);
Ok(Cow::from(ret))
}
pub fn merge_cbo_roaring_bitmaps<'a>(
_key: &[u8],
values: &[Cow<'a, [u8]>],

View File

@ -13,9 +13,9 @@ pub use grenad_helpers::{
GrenadParameters, MergeableReader,
};
pub use merge_functions::{
concat_u32s_array, keep_first, keep_latest_obkv, merge_cbo_roaring_bitmaps,
merge_obkvs_and_operations, merge_roaring_bitmaps, merge_two_obkvs,
roaring_bitmap_from_u32s_array, serialize_roaring_bitmap, MergeFn,
concat_u32s_array, keep_first, keep_latest_obkv, merge_cbo_roaring_bitmaps, merge_obkvs,
merge_roaring_bitmaps, merge_two_obkvs, roaring_bitmap_from_u32s_array,
serialize_roaring_bitmap, MergeFn,
};
use crate::MAX_WORD_LENGTH;

View File

@ -79,7 +79,6 @@ pub struct IndexDocuments<'t, 'u, 'i, 'a, FP, FA> {
progress: FP,
should_abort: FA,
added_documents: u64,
deleted_documents: u64,
}
#[derive(Default, Debug, Clone)]
@ -123,7 +122,6 @@ where
wtxn,
index,
added_documents: 0,
deleted_documents: 0,
})
}
@ -168,30 +166,6 @@ where
Ok((self, Ok(indexed_documents)))
}
/// Remove a batch of documents from the current builder.
///
/// Returns the number of documents deleted from the builder.
pub fn remove_documents(
mut self,
to_delete: Vec<String>,
) -> Result<(Self, StdResult<u64, UserError>)> {
// Early return when there is no document to add
if to_delete.is_empty() {
return Ok((self, Ok(0)));
}
let deleted_documents = self
.transform
.as_mut()
.expect("Invalid document deletion state")
.remove_documents(to_delete, self.wtxn, &self.should_abort)?
as u64;
self.deleted_documents += deleted_documents;
Ok((self, Ok(deleted_documents)))
}
#[logging_timer::time("IndexDocuments::{}")]
pub fn execute(mut self) -> Result<DocumentAdditionResult> {
if self.added_documents == 0 {
@ -1905,328 +1879,4 @@ mod tests {
index.add_documents(doc1).unwrap();
}
#[test]
fn add_and_delete_documents_in_single_transform() {
let mut index = TempIndex::new();
index.index_documents_config.update_method = IndexDocumentsMethod::UpdateDocuments;
let mut wtxn = index.write_txn().unwrap();
let builder = IndexDocuments::new(
&mut wtxn,
&index,
&index.indexer_config,
index.index_documents_config.clone(),
|_| (),
|| false,
)
.unwrap();
let documents = documents!([
{ "id": 1, "doggo": "kevin" },
{ "id": 2, "doggo": { "name": "bob", "age": 20 } },
{ "id": 3, "name": "jean", "age": 25 },
]);
let (builder, added) = builder.add_documents(documents).unwrap();
insta::assert_display_snapshot!(added.unwrap(), @"3");
let (builder, removed) = builder.remove_documents(vec![S("2")]).unwrap();
insta::assert_display_snapshot!(removed.unwrap(), @"1");
let addition = builder.execute().unwrap();
insta::assert_debug_snapshot!(addition, @r###"
DocumentAdditionResult {
indexed_documents: 3,
number_of_documents: 2,
}
"###);
wtxn.commit().unwrap();
db_snap!(index, documents, @r###"
{"id":1,"doggo":"kevin"}
{"id":3,"name":"jean","age":25}
"###);
}
#[test]
fn add_update_and_delete_documents_in_single_transform() {
let mut index = TempIndex::new();
index.index_documents_config.update_method = IndexDocumentsMethod::UpdateDocuments;
let mut wtxn = index.write_txn().unwrap();
let builder = IndexDocuments::new(
&mut wtxn,
&index,
&index.indexer_config,
index.index_documents_config.clone(),
|_| (),
|| false,
)
.unwrap();
let documents = documents!([
{ "id": 1, "doggo": "kevin" },
{ "id": 2, "doggo": { "name": "bob", "age": 20 } },
{ "id": 3, "name": "jean", "age": 25 },
]);
let (builder, added) = builder.add_documents(documents).unwrap();
insta::assert_display_snapshot!(added.unwrap(), @"3");
let documents = documents!([
{ "id": 2, "catto": "jorts" },
{ "id": 3, "legs": 4 },
]);
let (builder, added) = builder.add_documents(documents).unwrap();
insta::assert_display_snapshot!(added.unwrap(), @"2");
let (builder, removed) = builder.remove_documents(vec![S("1"), S("2")]).unwrap();
insta::assert_display_snapshot!(removed.unwrap(), @"2");
let addition = builder.execute().unwrap();
insta::assert_debug_snapshot!(addition, @r###"
DocumentAdditionResult {
indexed_documents: 5,
number_of_documents: 1,
}
"###);
wtxn.commit().unwrap();
db_snap!(index, documents, @r###"
{"id":3,"name":"jean","age":25,"legs":4}
"###);
}
#[test]
fn add_document_and_in_another_transform_update_and_delete_documents() {
let mut index = TempIndex::new();
index.index_documents_config.update_method = IndexDocumentsMethod::UpdateDocuments;
let mut wtxn = index.write_txn().unwrap();
let builder = IndexDocuments::new(
&mut wtxn,
&index,
&index.indexer_config,
index.index_documents_config.clone(),
|_| (),
|| false,
)
.unwrap();
let documents = documents!([
{ "id": 1, "doggo": "kevin" },
{ "id": 2, "doggo": { "name": "bob", "age": 20 } },
{ "id": 3, "name": "jean", "age": 25 },
]);
let (builder, added) = builder.add_documents(documents).unwrap();
insta::assert_display_snapshot!(added.unwrap(), @"3");
let addition = builder.execute().unwrap();
insta::assert_debug_snapshot!(addition, @r###"
DocumentAdditionResult {
indexed_documents: 3,
number_of_documents: 3,
}
"###);
wtxn.commit().unwrap();
db_snap!(index, documents, @r###"
{"id":1,"doggo":"kevin"}
{"id":2,"doggo":{"name":"bob","age":20}}
{"id":3,"name":"jean","age":25}
"###);
// A first batch of documents has been inserted
let mut wtxn = index.write_txn().unwrap();
let builder = IndexDocuments::new(
&mut wtxn,
&index,
&index.indexer_config,
index.index_documents_config.clone(),
|_| (),
|| false,
)
.unwrap();
let documents = documents!([
{ "id": 2, "catto": "jorts" },
{ "id": 3, "legs": 4 },
]);
let (builder, added) = builder.add_documents(documents).unwrap();
insta::assert_display_snapshot!(added.unwrap(), @"2");
let (builder, removed) = builder.remove_documents(vec![S("1"), S("2")]).unwrap();
insta::assert_display_snapshot!(removed.unwrap(), @"2");
let addition = builder.execute().unwrap();
insta::assert_debug_snapshot!(addition, @r###"
DocumentAdditionResult {
indexed_documents: 2,
number_of_documents: 1,
}
"###);
wtxn.commit().unwrap();
db_snap!(index, documents, @r###"
{"id":3,"name":"jean","age":25,"legs":4}
"###);
}
#[test]
fn delete_document_and_then_add_documents_in_the_same_transform() {
let mut index = TempIndex::new();
index.index_documents_config.update_method = IndexDocumentsMethod::UpdateDocuments;
let mut wtxn = index.write_txn().unwrap();
let builder = IndexDocuments::new(
&mut wtxn,
&index,
&index.indexer_config,
index.index_documents_config.clone(),
|_| (),
|| false,
)
.unwrap();
let (builder, removed) = builder.remove_documents(vec![S("1"), S("2")]).unwrap();
insta::assert_display_snapshot!(removed.unwrap(), @"0");
let documents = documents!([
{ "id": 2, "doggo": { "name": "jean", "age": 20 } },
{ "id": 3, "name": "bob", "age": 25 },
]);
let (builder, added) = builder.add_documents(documents).unwrap();
insta::assert_display_snapshot!(added.unwrap(), @"2");
let addition = builder.execute().unwrap();
insta::assert_debug_snapshot!(addition, @r###"
DocumentAdditionResult {
indexed_documents: 2,
number_of_documents: 2,
}
"###);
wtxn.commit().unwrap();
db_snap!(index, documents, @r###"
{"id":2,"doggo":{"name":"jean","age":20}}
{"id":3,"name":"bob","age":25}
"###);
}
#[test]
fn delete_the_same_document_multiple_time() {
let mut index = TempIndex::new();
index.index_documents_config.update_method = IndexDocumentsMethod::UpdateDocuments;
let mut wtxn = index.write_txn().unwrap();
let builder = IndexDocuments::new(
&mut wtxn,
&index,
&index.indexer_config,
index.index_documents_config.clone(),
|_| (),
|| false,
)
.unwrap();
let (builder, removed) =
builder.remove_documents(vec![S("1"), S("2"), S("1"), S("2")]).unwrap();
insta::assert_display_snapshot!(removed.unwrap(), @"0");
let documents = documents!([
{ "id": 1, "doggo": "kevin" },
{ "id": 2, "doggo": { "name": "jean", "age": 20 } },
{ "id": 3, "name": "bob", "age": 25 },
]);
let (builder, added) = builder.add_documents(documents).unwrap();
insta::assert_display_snapshot!(added.unwrap(), @"3");
let (builder, removed) =
builder.remove_documents(vec![S("1"), S("2"), S("1"), S("2")]).unwrap();
insta::assert_display_snapshot!(removed.unwrap(), @"2");
let addition = builder.execute().unwrap();
insta::assert_debug_snapshot!(addition, @r###"
DocumentAdditionResult {
indexed_documents: 3,
number_of_documents: 1,
}
"###);
wtxn.commit().unwrap();
db_snap!(index, documents, @r###"
{"id":3,"name":"bob","age":25}
"###);
}
#[test]
fn add_document_and_in_another_transform_delete_the_document_then_add_it_again() {
let mut index = TempIndex::new();
index.index_documents_config.update_method = IndexDocumentsMethod::UpdateDocuments;
let mut wtxn = index.write_txn().unwrap();
let builder = IndexDocuments::new(
&mut wtxn,
&index,
&index.indexer_config,
index.index_documents_config.clone(),
|_| (),
|| false,
)
.unwrap();
let documents = documents!([
{ "id": 1, "doggo": "kevin" },
]);
let (builder, added) = builder.add_documents(documents).unwrap();
insta::assert_display_snapshot!(added.unwrap(), @"1");
let addition = builder.execute().unwrap();
insta::assert_debug_snapshot!(addition, @r###"
DocumentAdditionResult {
indexed_documents: 1,
number_of_documents: 1,
}
"###);
wtxn.commit().unwrap();
db_snap!(index, documents, @r###"
{"id":1,"doggo":"kevin"}
"###);
// A first batch of documents has been inserted
let mut wtxn = index.write_txn().unwrap();
let builder = IndexDocuments::new(
&mut wtxn,
&index,
&index.indexer_config,
index.index_documents_config.clone(),
|_| (),
|| false,
)
.unwrap();
let (builder, removed) = builder.remove_documents(vec![S("1")]).unwrap();
insta::assert_display_snapshot!(removed.unwrap(), @"1");
let documents = documents!([
{ "id": 1, "catto": "jorts" },
]);
let (builder, added) = builder.add_documents(documents).unwrap();
insta::assert_display_snapshot!(added.unwrap(), @"1");
let addition = builder.execute().unwrap();
insta::assert_debug_snapshot!(addition, @r###"
DocumentAdditionResult {
indexed_documents: 1,
number_of_documents: 1,
}
"###);
wtxn.commit().unwrap();
db_snap!(index, documents, @r###"
{"id":1,"catto":"jorts"}
"###);
}
}

View File

@ -12,9 +12,7 @@ use roaring::RoaringBitmap;
use serde_json::Value;
use smartstring::SmartString;
use super::helpers::{
create_sorter, create_writer, keep_latest_obkv, merge_obkvs_and_operations, MergeFn,
};
use super::helpers::{create_sorter, create_writer, keep_latest_obkv, merge_obkvs, MergeFn};
use super::{IndexDocumentsMethod, IndexerConfig};
use crate::documents::{DocumentsBatchIndex, EnrichedDocument, EnrichedDocumentsBatchReader};
use crate::error::{Error, InternalError, UserError};
@ -52,12 +50,8 @@ pub struct Transform<'a, 'i> {
pub index_documents_method: IndexDocumentsMethod,
available_documents_ids: AvailableDocumentsIds,
// Both grenad follows the same format:
// key | value
// u32 | 1 byte for the Operation byte, the rest is the obkv of the document stored
original_sorter: grenad::Sorter<MergeFn>,
flattened_sorter: grenad::Sorter<MergeFn>,
replaced_documents_ids: RoaringBitmap,
new_documents_ids: RoaringBitmap,
// To increase the cache locality and decrease the heap usage we use compact smartstring.
@ -65,14 +59,6 @@ pub struct Transform<'a, 'i> {
documents_count: usize,
}
/// This enum is specific to the grenad sorter stored in the transform.
/// It's used as the first byte of the grenads and tells you if the document id was an addition or a deletion.
#[repr(u8)]
pub enum Operation {
Addition,
Deletion,
}
/// Create a mapping between the field ids found in the document batch and the one that were
/// already present in the index.
///
@ -108,7 +94,7 @@ impl<'a, 'i> Transform<'a, 'i> {
// with the same user id must be merged or fully replaced in the same batch.
let merge_function = match index_documents_method {
IndexDocumentsMethod::ReplaceDocuments => keep_latest_obkv,
IndexDocumentsMethod::UpdateDocuments => merge_obkvs_and_operations,
IndexDocumentsMethod::UpdateDocuments => merge_obkvs,
};
// We initialize the sorter with the user indexing settings.
@ -165,7 +151,9 @@ impl<'a, 'i> Transform<'a, 'i> {
FA: Fn() -> bool + Sync,
{
let (mut cursor, fields_index) = reader.into_cursor_and_fields_index();
let external_documents_ids = self.index.external_documents_ids(wtxn)?;
let mapping = create_fields_mapping(&mut self.fields_ids_map, &fields_index)?;
let primary_key = cursor.primary_key().to_string();
@ -173,7 +161,6 @@ impl<'a, 'i> Transform<'a, 'i> {
self.fields_ids_map.insert(&primary_key).ok_or(UserError::AttributeLimitReached)?;
let mut obkv_buffer = Vec::new();
let mut document_sorter_buffer = Vec::new();
let mut documents_count = 0;
let mut docid_buffer: Vec<u8> = Vec::new();
let mut field_buffer: Vec<(u16, Cow<[u8]>)> = Vec::new();
@ -225,13 +212,10 @@ impl<'a, 'i> Transform<'a, 'i> {
Entry::Occupied(entry) => *entry.get() as u32,
Entry::Vacant(entry) => {
// If the document was already in the db we mark it as a replaced document.
// It'll be deleted later.
// It'll be deleted later. We keep its original docid to insert it in the grenad.
if let Some(docid) = external_documents_ids.get(entry.key()) {
// If it was already in the list of replaced documents it means it was deleted
// by the remove_document method. We should starts as if it never existed.
if self.replaced_documents_ids.insert(docid) {
original_docid = Some(docid);
}
self.replaced_documents_ids.insert(docid);
original_docid = Some(docid);
}
let docid = self
.available_documents_ids
@ -264,46 +248,26 @@ impl<'a, 'i> Transform<'a, 'i> {
skip_insertion = true;
} else {
// we associate the base document with the new key, everything will get merged later.
document_sorter_buffer.clear();
document_sorter_buffer.push(Operation::Addition as u8);
document_sorter_buffer.extend_from_slice(base_obkv);
self.original_sorter.insert(docid.to_be_bytes(), &document_sorter_buffer)?;
self.original_sorter.insert(docid.to_be_bytes(), base_obkv)?;
match self.flatten_from_fields_ids_map(KvReader::new(base_obkv))? {
Some(flattened_obkv) => {
// we recreate our buffer with the flattened documents
document_sorter_buffer.clear();
document_sorter_buffer.push(Operation::Addition as u8);
document_sorter_buffer.extend_from_slice(&flattened_obkv);
self.flattened_sorter
.insert(docid.to_be_bytes(), &document_sorter_buffer)?
Some(buffer) => {
self.flattened_sorter.insert(docid.to_be_bytes(), &buffer)?
}
None => self
.flattened_sorter
.insert(docid.to_be_bytes(), &document_sorter_buffer)?,
None => self.flattened_sorter.insert(docid.to_be_bytes(), base_obkv)?,
}
}
}
if !skip_insertion {
self.new_documents_ids.insert(docid);
document_sorter_buffer.clear();
document_sorter_buffer.push(Operation::Addition as u8);
document_sorter_buffer.extend_from_slice(&obkv_buffer);
// We use the extracted/generated user id as the key for this document.
self.original_sorter.insert(docid.to_be_bytes(), &document_sorter_buffer)?;
self.original_sorter.insert(docid.to_be_bytes(), obkv_buffer.clone())?;
match self.flatten_from_fields_ids_map(KvReader::new(&obkv_buffer))? {
Some(flattened_obkv) => {
document_sorter_buffer.clear();
document_sorter_buffer.push(Operation::Addition as u8);
document_sorter_buffer.extend_from_slice(&flattened_obkv);
self.flattened_sorter
.insert(docid.to_be_bytes(), &document_sorter_buffer)?
Some(buffer) => self.flattened_sorter.insert(docid.to_be_bytes(), &buffer)?,
None => {
self.flattened_sorter.insert(docid.to_be_bytes(), obkv_buffer.clone())?
}
None => self
.flattened_sorter
.insert(docid.to_be_bytes(), &document_sorter_buffer)?,
}
}
documents_count += 1;
@ -329,73 +293,6 @@ impl<'a, 'i> Transform<'a, 'i> {
Ok(documents_count)
}
/// The counter part of `read_documents` that removes documents either from the transform or the database.
/// It can be called before, after or in between two calls of the `read_documents`.
///
/// It needs to update all the internal datastructure in the transform.
/// - If the document is coming from the database -> it's marked as a to_delete document
/// - If the document to remove was inserted by the `read_documents` method before AND was present in the db,
/// it's marked as `to_delete` + added into the grenad to ensure we don't reinsert it.
/// - If the document to remove was inserted by the `read_documents` method before but was NOT present in the db,
/// it's added into the grenad to ensure we don't insert it + removed from the list of new documents ids.
/// - If the document to remove was not present in either the db or the transform we do nothing.
pub fn remove_documents<FA>(
&mut self,
mut to_remove: Vec<String>,
wtxn: &mut heed::RwTxn,
should_abort: FA,
) -> Result<usize>
where
FA: Fn() -> bool + Sync,
{
// there may be duplicates in the documents to remove.
to_remove.sort_unstable();
to_remove.dedup();
let external_documents_ids = self.index.external_documents_ids(wtxn)?;
let mut documents_deleted = 0;
for to_remove in to_remove {
if should_abort() {
return Err(Error::InternalError(InternalError::AbortedIndexation));
}
match self.new_external_documents_ids_builder.entry((*to_remove).into()) {
// if the document was added in a previous iteration of the transform we make it as deleted in the sorters.
Entry::Occupied(entry) => {
let doc_id = *entry.get() as u32;
self.original_sorter
.insert(doc_id.to_be_bytes(), [Operation::Deletion as u8])?;
self.flattened_sorter
.insert(doc_id.to_be_bytes(), [Operation::Deletion as u8])?;
// we must NOT update the list of replaced_documents_ids
// Either:
// 1. It's already in it and there is nothing to do
// 2. It wasn't in it because the document was created by a previous batch and since
// we're removing it there is nothing to do.
self.new_documents_ids.remove(doc_id);
entry.remove_entry();
}
Entry::Vacant(entry) => {
// If the document was already in the db we mark it as a `to_delete` document.
// It'll be deleted later. We don't need to push anything to the sorters.
if let Some(docid) = external_documents_ids.get(entry.key()) {
self.replaced_documents_ids.insert(docid);
} else {
// if the document is nowehere to be found, there is nothing to do and we must NOT
// increment the count of documents_deleted
continue;
}
}
};
documents_deleted += 1;
}
Ok(documents_deleted)
}
// Flatten a document from the fields ids map contained in self and insert the new
// created fields. Returns `None` if the document doesn't need to be flattened.
fn flatten_from_fields_ids_map(&mut self, obkv: KvReader<FieldId>) -> Result<Option<Vec<u8>>> {
@ -590,11 +487,6 @@ impl<'a, 'i> Transform<'a, 'i> {
let mut documents_count = 0;
while let Some((key, val)) = iter.next()? {
if val[0] == Operation::Deletion as u8 {
continue;
}
let val = &val[1..];
// send a callback to show at which step we are
documents_count += 1;
progress_callback(UpdateIndexingStep::ComputeIdsAndMergeDocuments {
@ -626,18 +518,9 @@ impl<'a, 'i> Transform<'a, 'i> {
self.indexer_settings.chunk_compression_level,
tempfile::tempfile()?,
);
// Once we have written all the documents into the final sorter, we write the nested documents
// into this writer.
// We get rids of the `Operation` byte and skip the deleted documents as well.
let mut iter = self.flattened_sorter.into_stream_merger_iter()?;
while let Some((key, val)) = iter.next()? {
if val[0] == Operation::Deletion as u8 {
continue;
}
let val = &val[1..];
writer.insert(key, val)?;
}
// Once we have written all the documents into the final sorter, we write the documents
// into this writer, extract the file and reset the seek to be able to read it again.
self.flattened_sorter.write_into_stream_writer(&mut writer)?;
let mut flattened_documents = writer.into_inner()?;
flattened_documents.rewind()?;
@ -818,45 +701,3 @@ impl TransformOutput {
.collect())
}
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn merge_obkvs() {
let mut doc_0 = Vec::new();
let mut kv_writer = KvWriter::new(&mut doc_0);
kv_writer.insert(0_u8, [0]).unwrap();
kv_writer.finish().unwrap();
doc_0.insert(0, Operation::Addition as u8);
let ret = merge_obkvs_and_operations(&[], &[Cow::from(doc_0.as_slice())]).unwrap();
assert_eq!(*ret, doc_0);
let ret = merge_obkvs_and_operations(
&[],
&[Cow::from([Operation::Deletion as u8].as_slice()), Cow::from(doc_0.as_slice())],
)
.unwrap();
assert_eq!(*ret, doc_0);
let ret = merge_obkvs_and_operations(
&[],
&[Cow::from(doc_0.as_slice()), Cow::from([Operation::Deletion as u8].as_slice())],
)
.unwrap();
assert_eq!(*ret, [Operation::Deletion as u8]);
let ret = merge_obkvs_and_operations(
&[],
&[
Cow::from([Operation::Addition as u8, 1].as_slice()),
Cow::from([Operation::Deletion as u8].as_slice()),
Cow::from(doc_0.as_slice()),
],
)
.unwrap();
assert_eq!(*ret, doc_0);
}
}