Compare commits

..

38 Commits

Author SHA1 Message Date
94d8484581 Update Dockerfile
Co-authored-by: Markus Machatschek <markus.machatschek@hey.com>
2023-03-06 14:39:20 +01:00
5333edd1db Update Dockerfile
Co-authored-by: Markus Machatschek <markus.machatschek@hey.com>
2023-02-23 17:39:31 +01:00
bddf3f96e6 Use Debian instead of Alpine in Dockerfile 2023-02-20 19:38:22 +01:00
1e9ac00800 Merge #3505
3505: Csv delimiter r=irevoire a=irevoire

Fixes https://github.com/meilisearch/meilisearch/issues/3442
Closes https://github.com/meilisearch/meilisearch/pull/2803
Specified in https://github.com/meilisearch/specifications/pull/221

This PR is a reimplementation of https://github.com/meilisearch/meilisearch/pull/2803, on the new engine. Thanks for your idea and initial PR `@MixusMinimax;` sorry I couldn’t update/merge your PR. Way too many changes happened on the engine in the meantime.

**Attention to reviewer**; I had to update deserr to implement the support of deserializing `char`s

-------

It introduces four new error messages;
- Invalid value in parameter csvDelimiter: expected a string of one character, but found an empty string
- Invalid value in parameter csvDelimiter: expected a string of one character, but found the following string of 5 characters: doggo
- csv delimiter must be an ascii character. Found: 🍰 
- The Content-Type application/json does not support the use of a csv delimiter. The csv delimiter can only be used with the Content-Type text/csv.

And one error code;
- `invalid_index_csv_delimiter`

The `invalid_content_type` error code is now also used when we encounter the `csvDelimiter` query parameter with a non-csv content type.

Co-authored-by: Tamo <tamo@meilisearch.com>
2023-02-20 17:01:36 +00:00
b08a49a16e Merge #3319 #3470
3319: Transparently resize indexes on MaxDatabaseSizeReached errors r=Kerollmops a=dureuill

# Pull Request

## Related issue
Related to https://github.com/meilisearch/meilisearch/discussions/3280, depends on https://github.com/meilisearch/milli/pull/760

## What does this PR do?

### User standpoint

- Meilisearch no longer fails tasks that encounter the `milli::UserError(MaxDatabaseSizeReached)` error.
- Instead, these tasks are retried after increasing the maximum size allocated to the index where the failure occurred.

### Implementation standpoint

- Add `Batch::index_uid` to get the `index_uid` of a batch of task if there is one
- `IndexMapper::create_or_open_index` now takes an additional `size` argument that allows to (re)open indexes with a size different from the base `IndexScheduler::index_size` field
- `IndexScheduler::tick` now returns a `Result<TickOutcome>` instead of a `Result<usize>`. This offers more explicit control over what the behavior should be wrt the next tick.
- Add `IndexStatus::BeingResized` that contains a handle that a thread can use to await for the resize operation to complete and the index to be available again.
- Add `IndexMapper::resize_index` to increase the size of an index.
- In `IndexScheduler::tick`, intercept task batches that failed due to `MaxDatabaseSizeReached` and resize the index that caused the error, then request a new tick that will eventually handle the still enqueued task.

## Testing the PR

The following diff can be applied to this branch to make testing the PR easier:

<details>


```diff
diff --git a/index-scheduler/src/index_mapper.rs b/index-scheduler/src/index_mapper.rs
index 553ab45a..022b2f00 100644
--- a/index-scheduler/src/index_mapper.rs
+++ b/index-scheduler/src/index_mapper.rs
`@@` -228,13 +228,15 `@@` impl IndexMapper {
 
         drop(lock);
 
+        std:đź§µ:sleep_ms(2000);
+
         let current_size = index.map_size()?;
         let closing_event = index.prepare_for_closing();
-        log::info!("Resizing index {} from {} to {} bytes", name, current_size, current_size * 2);
+        log::error!("Resizing index {} from {} to {} bytes", name, current_size, current_size * 2);
 
         closing_event.wait();
 
-        log::info!("Resized index {} from {} to {} bytes", name, current_size, current_size * 2);
+        log::error!("Resized index {} from {} to {} bytes", name, current_size, current_size * 2);
 
         let index_path = self.base_path.join(uuid.to_string());
         let index = self.create_or_open_index(&index_path, None, 2 * current_size)?;
`@@` -268,8 +270,10 `@@` impl IndexMapper {
             match index {
                 Some(Available(index)) => break index,
                 Some(BeingResized(ref resize_operation)) => {
+                    log::error!("waiting for resize end");
                     // Deadlock: no lock taken while doing this operation.
                     resize_operation.wait();
+                    log::error!("trying our luck again!");
                     continue;
                 }
                 Some(BeingDeleted) => return Err(Error::IndexNotFound(name.to_string())),
diff --git a/index-scheduler/src/lib.rs b/index-scheduler/src/lib.rs
index 11b17d05..242dc095 100644
--- a/index-scheduler/src/lib.rs
+++ b/index-scheduler/src/lib.rs
`@@` -908,6 +908,7 `@@` impl IndexScheduler {
     ///
     /// Returns the number of processed tasks.
     fn tick(&self) -> Result<TickOutcome> {
+        log::error!("ticking!");
         #[cfg(test)]
         {
             *self.run_loop_iteration.write().unwrap() += 1;
diff --git a/meilisearch/src/main.rs b/meilisearch/src/main.rs
index 050c825a..63f312f6 100644
--- a/meilisearch/src/main.rs
+++ b/meilisearch/src/main.rs
`@@` -25,7 +25,7 `@@` fn setup(opt: &Opt) -> anyhow::Result<()> {
 
 #[actix_web::main]
 async fn main() -> anyhow::Result<()> {
-    let (opt, config_read_from) = Opt::try_build()?;
+    let (mut opt, config_read_from) = Opt::try_build()?;
 
     setup(&opt)?;
 
`@@` -56,6 +56,8 `@@` We generated a secure master key for you (you can safely copy this token):
         _ => (),
     }
 
+    opt.max_index_size = byte_unit::Byte::from_str("1MB").unwrap();
+
     let (index_scheduler, auth_controller) = setup_meilisearch(&opt)?;
 
     #[cfg(all(not(debug_assertions), feature = "analytics"))]
```
</details>

Mainly, these debug changes do the following:

- Set the default index size to 1MiB so that index resizes are initially frequent
- Turn some logs from info to error so that they can be displayed with `--log-level ERROR` (hiding the other infos)
- Add a long sleep between the beginning and the end of the resize so that we can observe the `BeingResized` index status (otherwise it would never come up in my tests)

## Open questions

- Is the growth factor of x2 the correct solution? For a `Vec` in memory it makes sense, but here we're manipulating quantities that are potentially in the order of 500GiBs. For bigger indexes it may make more sense to add at most e.g. 100GiB on each resize operation, avoiding big steps like 500GiB -> 1TiB.

## PR checklist
Please check if your PR fulfills the following requirements:
- [ ] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [ ] Have you read the contributing guidelines?
- [ ] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


3470: Autobatch addition and deletion r=irevoire a=irevoire

This PR adds the capability to meilisearch to batch document addition and deletion together.

Fix https://github.com/meilisearch/meilisearch/issues/3440

--------------

Things to check before merging;

- [x] What happens if we delete multiple time the same documents -> add a test
- [x] If a documentDeletion gets batched with a documentAddition but the index doesn't exist yet? It should not work

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
Co-authored-by: Tamo <tamo@meilisearch.com>
2023-02-20 15:00:19 +00:00
a8f6f108e0 Merge #3515
3515: Consider null as a valid geo field r=irevoire a=irevoire

Fix #3497
Associated spec; https://github.com/meilisearch/specifications/pull/222

Co-authored-by: Tamo <tamo@meilisearch.com>
2023-02-20 14:12:55 +00:00
1479050f7a apply review suggestions 2023-02-20 14:53:37 +01:00
97b8c32e22 Merge #3514
3514: Bump version of mini-dashboard to v0.2.6 r=irevoire a=bidoubiwa

Update the version of the mini-dashboard to v0.2.6.

See [release notes](https://github.com/meilisearch/mini-dashboard/releases/tag/v0.2.6).

Co-authored-by: Charlotte Vermandel <charlottevermandel@gmail.com>
2023-02-20 13:21:00 +00:00
35f6c624bc Make sure we don't leave the in memory hashmap in an inconsistent state 2023-02-20 13:55:32 +01:00
1116788475 Resize indexes when they're full 2023-02-20 13:55:32 +01:00
951a5b5832 Add IndexMapper::resize_index fn 2023-02-20 13:55:32 +01:00
1c670d7fa0 Add IndexStatus::BeingResized 2023-02-20 13:55:32 +01:00
6cc3797aa1 IndexScheduler::tick returns a TickOutcome 2023-02-20 13:55:31 +01:00
faf1e17a27 create_or_open_index takes a map_size argument 2023-02-20 13:55:31 +01:00
4c519c2ab3 Add Batch::index_uid 2023-02-20 13:55:31 +01:00
dd120e0e16 Bump version of mini-dashboard to v0.2.6 2023-02-20 13:45:57 +01:00
18796d6e6a Consider null as a valid geo object 2023-02-20 13:45:51 +01:00
895ab2906c apply review suggestions 2023-02-16 18:42:47 +01:00
e79f6f87f6 make cargo fmt&clippy happy 2023-02-16 18:00:40 +01:00
5367d8f05a add two tests on the indexing of csvs 2023-02-16 17:37:11 +01:00
52686da028 test various error on the document ressource 2023-02-16 17:37:10 +01:00
8c074f5028 implements the csv delimiter without tests
Co-authored-by: Maxi Barmetler <maxi.barmetler@gmail.com>
2023-02-16 17:35:36 +01:00
74dcfe9676 Fix a bug when you update a document that was already present in the db, deleted and then inserted again in the same transform 2023-02-14 19:09:40 +01:00
1b1703a609 make a small optimization to merge obkvs a little bit faster 2023-02-14 18:32:41 +01:00
fb5e4957a6 fix and test the early exit in case a grenad ends with a deletion 2023-02-14 18:23:57 +01:00
8de3c9f737 Update milli/src/update/index_documents/transform.rs
Co-authored-by: Clément Renault <clement@meilisearch.com>
2023-02-14 17:57:14 +01:00
43a19d0709 document the operation enum + the grenads 2023-02-14 17:55:26 +01:00
29d14bed90 get rids of the let/else syntax 2023-02-14 17:45:46 +01:00
746b31c1ce makes clippy happy 2023-02-09 12:23:01 +01:00
eaad84bd1d fix the test to handle the document deletion correctly 2023-02-09 11:29:13 +01:00
ea9ac46f28 stop autobatching the deletion without the index creation right with the addition 2023-02-08 21:24:27 +01:00
93db755d57 add a test to ensure we handle correctly a deletion of multiple time the same document 2023-02-08 21:03:34 +01:00
93f130a400 fix all warnings 2023-02-08 20:57:35 +01:00
860c993ef7 Handle the autobatching of deletion and addition in the scheduler 2023-02-08 20:53:19 +01:00
67dda0678f cleanup the autobatcher a little bit 2023-02-08 18:10:59 +01:00
2db6347686 update the autobatcher to batch the addition and deletion together 2023-02-08 18:07:59 +01:00
421a9cf05e provide a new method on the transform to remove documents 2023-02-08 16:06:09 +01:00
8f64fba1ce rewrite the current transform to handle a new byte specifying the kind of operation it's merging 2023-02-08 12:53:38 +01:00
54 changed files with 2383 additions and 2414 deletions

9
Cargo.lock generated
View File

@ -1113,9 +1113,9 @@ dependencies = [
[[package]]
name = "deserr"
version = "0.4.1"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6eee2844f21cf7fb5693aae1fb8f1658127acfdb2fc072167d68a9152584ae64"
checksum = "c71c14985c842bf1e520b1ebcd22daff6aeece32f510e11f063cecf9b308c04b"
dependencies = [
"actix-http",
"actix-utils",
@ -1130,9 +1130,9 @@ dependencies = [
[[package]]
name = "deserr-internal"
version = "0.4.1"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c27246f8ca9eeba9dd70d614b664dc43b529251ed7bd9e633131010d340da4b9"
checksum = "cae1c51b191528c9e4e5d6cff671de94f61fcda1c206cc891251e0cf438c941a"
dependencies = [
"convert_case 0.5.0",
"proc-macro2",
@ -1921,7 +1921,6 @@ dependencies = [
"insta",
"log",
"meili-snap",
"meilisearch-auth",
"meilisearch-types",
"nelson",
"page_size 0.5.0",

View File

@ -1,7 +1,5 @@
# Compile
FROM rust:alpine3.16 AS compiler
RUN apk add -q --update-cache --no-cache build-base openssl-dev
FROM rust:bullseye AS compiler
WORKDIR /meilisearch
@ -13,20 +11,22 @@ ENV RUSTFLAGS="-C target-feature=-crt-static"
COPY . .
RUN set -eux; \
apkArch="$(apk --print-arch)"; \
if [ "$apkArch" = "aarch64" ]; then \
arch="$(dpkg --print-architecture)"; \
if [ "$arch" = "arm64" ]; then \
export JEMALLOC_SYS_WITH_LG_PAGE=16; \
fi && \
cargo build --release
# Run
FROM alpine:3.16
FROM debian:11.6
ENV MEILI_HTTP_ADDR 0.0.0.0:7700
ENV MEILI_SERVER_PROVIDER docker
RUN apk update --quiet \
&& apk add -q --no-cache libgcc tini curl
RUN set -ex; \
apt-get update -q; \
apt-get install -q -y --no-install-recommends tini; \
rm -rf /var/lib/apt/lists/*
# add meilisearch to the `/bin` so you can run it from anywhere and it's easy
# to find.

View File

@ -19,7 +19,6 @@ dump = { path = "../dump" }
enum-iterator = "1.1.3"
file-store = { path = "../file-store" }
log = "0.4.14"
meilisearch-auth = { path = "../meilisearch-auth" }
meilisearch-types = { path = "../meilisearch-types" }
page_size = "0.5.0"
roaring = { version = "0.10.0", features = ["serde"] }

View File

@ -88,11 +88,11 @@ pub enum BatchKind {
DocumentClear {
ids: Vec<TaskId>,
},
DocumentImport {
DocumentOperation {
method: IndexDocumentsMethod,
allow_index_creation: bool,
primary_key: Option<String>,
import_ids: Vec<TaskId>,
operation_ids: Vec<TaskId>,
},
DocumentDeletion {
deletion_ids: Vec<TaskId>,
@ -102,12 +102,12 @@ pub enum BatchKind {
allow_index_creation: bool,
settings_ids: Vec<TaskId>,
},
SettingsAndDocumentImport {
SettingsAndDocumentOperation {
settings_ids: Vec<TaskId>,
method: IndexDocumentsMethod,
allow_index_creation: bool,
primary_key: Option<String>,
import_ids: Vec<TaskId>,
operation_ids: Vec<TaskId>,
},
Settings {
allow_index_creation: bool,
@ -131,9 +131,9 @@ impl BatchKind {
#[rustfmt::skip]
fn allow_index_creation(&self) -> Option<bool> {
match self {
BatchKind::DocumentImport { allow_index_creation, .. }
BatchKind::DocumentOperation { allow_index_creation, .. }
| BatchKind::ClearAndSettings { allow_index_creation, .. }
| BatchKind::SettingsAndDocumentImport { allow_index_creation, .. }
| BatchKind::SettingsAndDocumentOperation { allow_index_creation, .. }
| BatchKind::Settings { allow_index_creation, .. } => Some(*allow_index_creation),
_ => None,
}
@ -141,8 +141,8 @@ impl BatchKind {
fn primary_key(&self) -> Option<Option<&str>> {
match self {
BatchKind::DocumentImport { primary_key, .. }
| BatchKind::SettingsAndDocumentImport { primary_key, .. } => {
BatchKind::DocumentOperation { primary_key, .. }
| BatchKind::SettingsAndDocumentOperation { primary_key, .. } => {
Some(primary_key.as_deref())
}
_ => None,
@ -173,22 +173,22 @@ impl BatchKind {
if primary_key.is_none() || pk.is_none() || primary_key == pk.as_deref() =>
{
(
Continue(BatchKind::DocumentImport {
Continue(BatchKind::DocumentOperation {
method,
allow_index_creation,
primary_key: pk,
import_ids: vec![task_id],
operation_ids: vec![task_id],
}),
allow_index_creation,
)
}
// if the primary key set in the task was different than ours we should stop and make this batch fail asap.
K::DocumentImport { method, allow_index_creation, primary_key } => (
Break(BatchKind::DocumentImport {
Break(BatchKind::DocumentOperation {
method,
allow_index_creation,
primary_key,
import_ids: vec![task_id],
operation_ids: vec![task_id],
}),
allow_index_creation,
),
@ -249,7 +249,7 @@ impl BatchKind {
(
BatchKind::DocumentClear { mut ids }
| BatchKind::DocumentDeletion { deletion_ids: mut ids }
| BatchKind::DocumentImport { method: _, allow_index_creation: _, primary_key: _, import_ids: mut ids }
| BatchKind::DocumentOperation { method: _, allow_index_creation: _, primary_key: _, operation_ids: mut ids }
| BatchKind::Settings { allow_index_creation: _, settings_ids: mut ids },
K::IndexDeletion,
) => {
@ -258,7 +258,7 @@ impl BatchKind {
}
(
BatchKind::ClearAndSettings { settings_ids: mut ids, allow_index_creation: _, mut other }
| BatchKind::SettingsAndDocumentImport { import_ids: mut ids, method: _, allow_index_creation: _, primary_key: _, settings_ids: mut other },
| BatchKind::SettingsAndDocumentOperation { operation_ids: mut ids, method: _, allow_index_creation: _, primary_key: _, settings_ids: mut other },
K::IndexDeletion,
) => {
ids.push(id);
@ -278,63 +278,108 @@ impl BatchKind {
K::DocumentImport { .. } | K::Settings { .. },
) => Break(this),
(
BatchKind::DocumentImport { method: _, allow_index_creation: _, primary_key: _, import_ids: mut ids },
BatchKind::DocumentOperation { method: _, allow_index_creation: _, primary_key: _, mut operation_ids },
K::DocumentClear,
) => {
ids.push(id);
Continue(BatchKind::DocumentClear { ids })
operation_ids.push(id);
Continue(BatchKind::DocumentClear { ids: operation_ids })
}
// we can autobatch the same kind of document additions / updates
(
BatchKind::DocumentImport { method: ReplaceDocuments, allow_index_creation, primary_key: _, mut import_ids },
BatchKind::DocumentOperation { method: ReplaceDocuments, allow_index_creation, primary_key: _, mut operation_ids },
K::DocumentImport { method: ReplaceDocuments, primary_key: pk, .. },
) => {
import_ids.push(id);
Continue(BatchKind::DocumentImport {
operation_ids.push(id);
Continue(BatchKind::DocumentOperation {
method: ReplaceDocuments,
allow_index_creation,
import_ids,
operation_ids,
primary_key: pk,
})
}
(
BatchKind::DocumentImport { method: UpdateDocuments, allow_index_creation, primary_key: _, mut import_ids },
BatchKind::DocumentOperation { method: UpdateDocuments, allow_index_creation, primary_key: _, mut operation_ids },
K::DocumentImport { method: UpdateDocuments, primary_key: pk, .. },
) => {
import_ids.push(id);
Continue(BatchKind::DocumentImport {
operation_ids.push(id);
Continue(BatchKind::DocumentOperation {
method: UpdateDocuments,
allow_index_creation,
primary_key: pk,
import_ids,
operation_ids,
})
}
(
BatchKind::DocumentOperation { method, allow_index_creation, primary_key, mut operation_ids },
K::DocumentDeletion,
) => {
operation_ids.push(id);
Continue(BatchKind::DocumentOperation {
method,
allow_index_creation,
primary_key,
operation_ids,
})
}
// but we can't autobatch documents if it's not the same kind
// this match branch MUST be AFTER the previous one
(
this @ BatchKind::DocumentImport { .. },
K::DocumentDeletion | K::DocumentImport { .. },
this @ BatchKind::DocumentOperation { .. },
K::DocumentImport { .. },
) => Break(this),
(
BatchKind::DocumentImport { method, allow_index_creation, primary_key, import_ids },
BatchKind::DocumentOperation { method, allow_index_creation, primary_key, operation_ids },
K::Settings { .. },
) => Continue(BatchKind::SettingsAndDocumentImport {
) => Continue(BatchKind::SettingsAndDocumentOperation {
settings_ids: vec![id],
method,
allow_index_creation,
primary_key,
import_ids,
operation_ids,
}),
(BatchKind::DocumentDeletion { mut deletion_ids }, K::DocumentClear) => {
deletion_ids.push(id);
Continue(BatchKind::DocumentClear { ids: deletion_ids })
}
(this @ BatchKind::DocumentDeletion { .. }, K::DocumentImport { .. }) => Break(this),
// we can autobatch the deletion and import if the index already exists
(
BatchKind::DocumentDeletion { mut deletion_ids },
K::DocumentImport { method, allow_index_creation, primary_key }
) if index_already_exists => {
deletion_ids.push(id);
Continue(BatchKind::DocumentOperation {
method,
allow_index_creation,
primary_key,
operation_ids: deletion_ids,
})
}
// we can autobatch the deletion and import if both can't create an index
(
BatchKind::DocumentDeletion { mut deletion_ids },
K::DocumentImport { method, allow_index_creation, primary_key }
) if !allow_index_creation => {
deletion_ids.push(id);
Continue(BatchKind::DocumentOperation {
method,
allow_index_creation,
primary_key,
operation_ids: deletion_ids,
})
}
// we can't autobatch a deletion and an import if the index does not exists but would be created by an addition
(
this @ BatchKind::DocumentDeletion { .. },
K::DocumentImport { .. }
) => {
Break(this)
}
(BatchKind::DocumentDeletion { mut deletion_ids }, K::DocumentDeletion) => {
deletion_ids.push(id);
Continue(BatchKind::DocumentDeletion { deletion_ids })
@ -403,60 +448,60 @@ impl BatchKind {
})
}
(
BatchKind::SettingsAndDocumentImport { settings_ids, method: _, import_ids: mut other, allow_index_creation, primary_key: _ },
BatchKind::SettingsAndDocumentOperation { settings_ids, method: _, mut operation_ids, allow_index_creation, primary_key: _ },
K::DocumentClear,
) => {
other.push(id);
operation_ids.push(id);
Continue(BatchKind::ClearAndSettings {
settings_ids,
other,
other: operation_ids,
allow_index_creation,
})
}
(
BatchKind::SettingsAndDocumentImport { settings_ids, method: ReplaceDocuments, mut import_ids, allow_index_creation, primary_key: _},
BatchKind::SettingsAndDocumentOperation { settings_ids, method: ReplaceDocuments, mut operation_ids, allow_index_creation, primary_key: _},
K::DocumentImport { method: ReplaceDocuments, primary_key: pk2, .. },
) => {
import_ids.push(id);
Continue(BatchKind::SettingsAndDocumentImport {
operation_ids.push(id);
Continue(BatchKind::SettingsAndDocumentOperation {
settings_ids,
method: ReplaceDocuments,
allow_index_creation,
primary_key: pk2,
import_ids,
operation_ids,
})
}
(
BatchKind::SettingsAndDocumentImport { settings_ids, method: UpdateDocuments, allow_index_creation, primary_key: _, mut import_ids },
BatchKind::SettingsAndDocumentOperation { settings_ids, method: UpdateDocuments, allow_index_creation, primary_key: _, mut operation_ids },
K::DocumentImport { method: UpdateDocuments, primary_key: pk2, .. },
) => {
import_ids.push(id);
Continue(BatchKind::SettingsAndDocumentImport {
operation_ids.push(id);
Continue(BatchKind::SettingsAndDocumentOperation {
settings_ids,
method: UpdateDocuments,
allow_index_creation,
primary_key: pk2,
import_ids,
operation_ids,
})
}
// But we can't batch a settings and a doc op with another doc op
// this MUST be AFTER the two previous branch
(
this @ BatchKind::SettingsAndDocumentImport { .. },
this @ BatchKind::SettingsAndDocumentOperation { .. },
K::DocumentDeletion | K::DocumentImport { .. },
) => Break(this),
(
BatchKind::SettingsAndDocumentImport { mut settings_ids, method, allow_index_creation,primary_key, import_ids },
BatchKind::SettingsAndDocumentOperation { mut settings_ids, method, allow_index_creation,primary_key, operation_ids },
K::Settings { .. },
) => {
settings_ids.push(id);
Continue(BatchKind::SettingsAndDocumentImport {
Continue(BatchKind::SettingsAndDocumentOperation {
settings_ids,
method,
allow_index_creation,
primary_key,
import_ids,
operation_ids,
})
}
(
@ -588,29 +633,29 @@ mod tests {
fn autobatch_simple_operation_together() {
// we can autobatch one or multiple `ReplaceDocuments` together.
// if the index exists.
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, import_ids: [0] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp( ReplaceDocuments, true , None), doc_imp(ReplaceDocuments, true , None)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0, 1, 2] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_imp( ReplaceDocuments, false , None), doc_imp(ReplaceDocuments, false , None)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, import_ids: [0, 1, 2] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp( ReplaceDocuments, true , None), doc_imp(ReplaceDocuments, true , None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1, 2] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_imp( ReplaceDocuments, false , None), doc_imp(ReplaceDocuments, false , None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1, 2] }, false))");
// if it doesn't exists.
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, import_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None), doc_imp( ReplaceDocuments, true , None), doc_imp(ReplaceDocuments, true , None)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0, 1, 2] }, true))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), doc_imp( ReplaceDocuments, true , None), doc_imp(ReplaceDocuments, true , None)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, import_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None), doc_imp( ReplaceDocuments, true , None), doc_imp(ReplaceDocuments, true , None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1, 2] }, true))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), doc_imp( ReplaceDocuments, true , None), doc_imp(ReplaceDocuments, true , None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
// we can autobatch one or multiple `UpdateDocuments` together.
// if the index exists.
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: true, primary_key: None, import_ids: [0, 1, 2] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: false, primary_key: None, import_ids: [0] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None), doc_imp(UpdateDocuments, false, None), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: false, primary_key: None, import_ids: [0, 1, 2] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1, 2] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None), doc_imp(UpdateDocuments, false, None), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1, 2] }, false))");
// if it doesn't exists.
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: true, primary_key: None, import_ids: [0, 1, 2] }, true))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: false, primary_key: None, import_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, false, None), doc_imp(UpdateDocuments, false, None), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: false, primary_key: None, import_ids: [0, 1, 2] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1, 2] }, true))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, false, None), doc_imp(UpdateDocuments, false, None), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1, 2] }, false))");
// we can autobatch one or multiple DocumentDeletion together
debug_snapshot!(autobatch_from(true, None, [doc_del()]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))");
@ -628,56 +673,83 @@ mod tests {
debug_snapshot!(autobatch_from(false,None, [settings(true), settings(true), settings(true)]), @"Some((Settings { allow_index_creation: true, settings_ids: [0, 1, 2] }, true))");
debug_snapshot!(autobatch_from(false,None, [settings(false)]), @"Some((Settings { allow_index_creation: false, settings_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [settings(false), settings(false), settings(false)]), @"Some((Settings { allow_index_creation: false, settings_ids: [0, 1, 2] }, false))");
// We can autobatch document addition with document deletion
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None), doc_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, true))"###);
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, true))"###);
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, true, None), doc_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, true, None), doc_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, false, None), doc_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, false, None), doc_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, true, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, true))"###);
debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, true, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, true))"###);
debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, false, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, false, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
// And the other way around
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(ReplaceDocuments, true, Some("catto"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(UpdateDocuments, true, Some("catto"))]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(ReplaceDocuments, false, Some("catto"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(UpdateDocuments, false, Some("catto"))]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(ReplaceDocuments, false, Some("catto"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(UpdateDocuments, false, Some("catto"))]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
}
#[test]
fn simple_document_operation_dont_autobatch_with_other() {
// addition, updates and deletion can't batch together
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_del()]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_del()]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), idx_create()]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), idx_create()]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), idx_create()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), idx_create()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_del(), idx_create()]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), idx_update()]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), idx_update()]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), idx_update()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), idx_update()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_del(), idx_update()]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), idx_swap()]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), idx_swap()]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), idx_swap()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), idx_swap()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_del(), idx_swap()]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))");
}
#[test]
fn document_addition_batch_with_settings() {
// simple case
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true)]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true)]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
// multiple settings and doc addition
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None), settings(true), settings(true)]), @"Some((SettingsAndDocumentImport { settings_ids: [2, 3], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None), settings(true), settings(true)]), @"Some((SettingsAndDocumentImport { settings_ids: [2, 3], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None), settings(true), settings(true)]), @"Some((SettingsAndDocumentOperation { settings_ids: [2, 3], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None), settings(true), settings(true)]), @"Some((SettingsAndDocumentOperation { settings_ids: [2, 3], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
// addition and setting unordered
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((SettingsAndDocumentImport { settings_ids: [1, 3], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0, 2] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_imp(UpdateDocuments, true, None), settings(true)]), @"Some((SettingsAndDocumentImport { settings_ids: [1, 3], method: UpdateDocuments, allow_index_creation: true, primary_key: None, import_ids: [0, 2] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((SettingsAndDocumentOperation { settings_ids: [1, 3], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 2] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_imp(UpdateDocuments, true, None), settings(true)]), @"Some((SettingsAndDocumentOperation { settings_ids: [1, 3], method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 2] }, true))");
// We ensure this kind of batch doesn't batch with forbidden operations
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_imp(UpdateDocuments, true, None)]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_imp(ReplaceDocuments, true, None)]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_del()]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_del()]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), idx_create()]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), idx_create()]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), idx_update()]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), idx_update()]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), idx_swap()]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), idx_swap()]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_imp(UpdateDocuments, true, None)]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_imp(ReplaceDocuments, true, None)]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_del()]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_del()]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), idx_create()]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), idx_create()]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), idx_update()]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), idx_update()]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), idx_swap()]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), idx_swap()]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
}
#[test]
@ -789,67 +861,73 @@ mod tests {
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, false, None), settings(false), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, false))");
// The third and final case is when the first task doesn't create an index but is directly followed by a task creating an index. In this case we can't batch whith what
// follows because we first need to process the erronous batch.
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments,false, None), settings(true), idx_del()]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, import_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, false, None), settings(true), idx_del()]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: false, primary_key: None, import_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments,false, None), settings(true), doc_clr(), idx_del()]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, import_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, false, None), settings(true), doc_clr(), idx_del()]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: false, primary_key: None, import_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments,false, None), settings(true), idx_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, false, None), settings(true), idx_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments,false, None), settings(true), doc_clr(), idx_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, false, None), settings(true), doc_clr(), idx_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
}
#[test]
fn allowed_and_disallowed_index_creation() {
// `DocumentImport` can't be mixed with those disallowed to do so except if the index already exists.
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, import_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, import_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), settings(true)]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: false, primary_key: None, import_ids: [0] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), settings(true)]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, import_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, import_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), settings(true)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, import_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), settings(true)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
// batch deletion and addition
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(ReplaceDocuments, true, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(UpdateDocuments, true, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))");
}
#[test]
fn autobatch_primary_key() {
// ==> If I have a pk
// With a single update
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), import_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other"))]), @r###"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), import_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###);
// With a multiple updates
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), import_ids: [0, 1] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), import_ids: [0, 1] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("other"))]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("id"))]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0, 1] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0, 1] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("other"))]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("id"))]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), import_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), import_ids: [0, 1] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), import_ids: [0, 1] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, Some("other"))]), @r###"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), import_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), import_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), import_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0, 1] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0, 1] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, Some("other"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), import_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), import_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), import_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("other"))]), @r###"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), import_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), import_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), import_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("other"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###);
// ==> If I don't have a pk
// With a single update
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), import_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, Some("other"))]), @r###"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), import_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, Some("other"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###);
// With a multiple updates
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("id"))]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), import_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("id"))]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###);
}
}

View File

@ -28,8 +28,7 @@ use meilisearch_types::heed::{RoTxn, RwTxn};
use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader};
use meilisearch_types::milli::heed::CompactionOption;
use meilisearch_types::milli::update::{
DocumentAdditionResult, DocumentDeletionResult, IndexDocumentsConfig, IndexDocumentsMethod,
Settings as MilliSettings,
DocumentDeletionResult, IndexDocumentsConfig, IndexDocumentsMethod, Settings as MilliSettings,
};
use meilisearch_types::milli::{self, BEU32};
use meilisearch_types::settings::{apply_settings_to_builder, Settings, Unchecked};
@ -86,15 +85,21 @@ pub(crate) enum Batch {
},
}
#[derive(Debug)]
pub(crate) enum DocumentOperation {
Add(Uuid),
Delete(Vec<String>),
}
/// A [batch](Batch) that combines multiple tasks operating on an index.
#[derive(Debug)]
pub(crate) enum IndexOperation {
DocumentImport {
DocumentOperation {
index_uid: String,
primary_key: Option<String>,
method: IndexDocumentsMethod,
documents_counts: Vec<u64>,
content_files: Vec<Uuid>,
operations: Vec<DocumentOperation>,
tasks: Vec<Task>,
},
DocumentDeletion {
@ -121,13 +126,13 @@ pub(crate) enum IndexOperation {
settings: Vec<(bool, Settings<Unchecked>)>,
settings_tasks: Vec<Task>,
},
SettingsAndDocumentImport {
SettingsAndDocumentOperation {
index_uid: String,
primary_key: Option<String>,
method: IndexDocumentsMethod,
documents_counts: Vec<u64>,
content_files: Vec<Uuid>,
operations: Vec<DocumentOperation>,
document_import_tasks: Vec<Task>,
// The boolean indicates if it's a settings deletion or creation.
@ -149,13 +154,13 @@ impl Batch {
tasks.iter().map(|task| task.uid).collect()
}
Batch::IndexOperation { op, .. } => match op {
IndexOperation::DocumentImport { tasks, .. }
IndexOperation::DocumentOperation { tasks, .. }
| IndexOperation::DocumentDeletion { tasks, .. }
| IndexOperation::Settings { tasks, .. }
| IndexOperation::DocumentClear { tasks, .. } => {
tasks.iter().map(|task| task.uid).collect()
}
IndexOperation::SettingsAndDocumentImport {
IndexOperation::SettingsAndDocumentOperation {
document_import_tasks: tasks,
settings_tasks: other,
..
@ -169,17 +174,33 @@ impl Batch {
Batch::IndexSwap { task } => vec![task.uid],
}
}
/// Return the index UID associated with this batch
pub fn index_uid(&self) -> Option<&str> {
use Batch::*;
match self {
TaskCancelation { .. }
| TaskDeletion(_)
| SnapshotCreation(_)
| Dump(_)
| IndexSwap { .. } => None,
IndexOperation { op, .. } => Some(op.index_uid()),
IndexCreation { index_uid, .. }
| IndexUpdate { index_uid, .. }
| IndexDeletion { index_uid, .. } => Some(index_uid),
}
}
}
impl IndexOperation {
pub fn index_uid(&self) -> &str {
match self {
IndexOperation::DocumentImport { index_uid, .. }
IndexOperation::DocumentOperation { index_uid, .. }
| IndexOperation::DocumentDeletion { index_uid, .. }
| IndexOperation::DocumentClear { index_uid, .. }
| IndexOperation::Settings { index_uid, .. }
| IndexOperation::DocumentClearAndSetting { index_uid, .. }
| IndexOperation::SettingsAndDocumentImport { index_uid, .. } => index_uid,
| IndexOperation::SettingsAndDocumentOperation { index_uid, .. } => index_uid,
}
}
}
@ -206,17 +227,22 @@ impl IndexScheduler {
},
must_create_index,
})),
BatchKind::DocumentImport { method, import_ids, .. } => {
let tasks = self.get_existing_tasks(rtxn, import_ids)?;
let primary_key = match &tasks[0].kind {
KindWithContent::DocumentAdditionOrUpdate { primary_key, .. } => {
primary_key.clone()
}
_ => unreachable!(),
};
BatchKind::DocumentOperation { method, operation_ids, .. } => {
let tasks = self.get_existing_tasks(rtxn, operation_ids)?;
let primary_key = tasks
.iter()
.find_map(|task| match task.kind {
KindWithContent::DocumentAdditionOrUpdate { ref primary_key, .. } => {
// we want to stop on the first document addition
Some(primary_key.clone())
}
KindWithContent::DocumentDeletion { .. } => None,
_ => unreachable!(),
})
.flatten();
let mut documents_counts = Vec::new();
let mut content_files = Vec::new();
let mut operations = Vec::new();
for task in tasks.iter() {
match task.kind {
@ -226,19 +252,23 @@ impl IndexScheduler {
..
} => {
documents_counts.push(documents_count);
content_files.push(content_file);
operations.push(DocumentOperation::Add(content_file));
}
KindWithContent::DocumentDeletion { ref documents_ids, .. } => {
documents_counts.push(documents_ids.len() as u64);
operations.push(DocumentOperation::Delete(documents_ids.clone()));
}
_ => unreachable!(),
}
}
Ok(Some(Batch::IndexOperation {
op: IndexOperation::DocumentImport {
op: IndexOperation::DocumentOperation {
index_uid,
primary_key,
method,
documents_counts,
content_files,
operations,
tasks,
},
must_create_index,
@ -322,12 +352,12 @@ impl IndexScheduler {
must_create_index,
}))
}
BatchKind::SettingsAndDocumentImport {
BatchKind::SettingsAndDocumentOperation {
settings_ids,
method,
allow_index_creation,
primary_key,
import_ids,
operation_ids,
} => {
let settings = self.create_next_batch_index(
rtxn,
@ -339,11 +369,11 @@ impl IndexScheduler {
let document_import = self.create_next_batch_index(
rtxn,
index_uid.clone(),
BatchKind::DocumentImport {
BatchKind::DocumentOperation {
method,
allow_index_creation,
primary_key,
import_ids,
operation_ids,
},
must_create_index,
)?;
@ -352,10 +382,10 @@ impl IndexScheduler {
(
Some(Batch::IndexOperation {
op:
IndexOperation::DocumentImport {
IndexOperation::DocumentOperation {
primary_key,
documents_counts,
content_files,
operations,
tasks: document_import_tasks,
..
},
@ -366,12 +396,12 @@ impl IndexScheduler {
..
}),
) => Ok(Some(Batch::IndexOperation {
op: IndexOperation::SettingsAndDocumentImport {
op: IndexOperation::SettingsAndDocumentOperation {
index_uid,
primary_key,
method,
documents_counts,
content_files,
operations,
document_import_tasks,
settings,
settings_tasks,
@ -987,12 +1017,12 @@ impl IndexScheduler {
Ok(tasks)
}
IndexOperation::DocumentImport {
IndexOperation::DocumentOperation {
index_uid: _,
primary_key,
method,
documents_counts,
content_files,
documents_counts: _,
operations,
mut tasks,
} => {
let mut primary_key_has_been_set = false;
@ -1037,26 +1067,82 @@ impl IndexScheduler {
|| must_stop_processing.get(),
)?;
let mut results = Vec::new();
for content_uuid in content_files.into_iter() {
let content_file = self.file_store.get_update(content_uuid)?;
let reader = DocumentsBatchReader::from_reader(content_file)
.map_err(milli::Error::from)?;
let (new_builder, user_result) = builder.add_documents(reader)?;
builder = new_builder;
for (operation, task) in operations.into_iter().zip(tasks.iter_mut()) {
match operation {
DocumentOperation::Add(content_uuid) => {
let content_file = self.file_store.get_update(content_uuid)?;
let reader = DocumentsBatchReader::from_reader(content_file)
.map_err(milli::Error::from)?;
let (new_builder, user_result) = builder.add_documents(reader)?;
builder = new_builder;
let user_result = match user_result {
Ok(count) => Ok(DocumentAdditionResult {
indexed_documents: count,
number_of_documents: count, // TODO: this is wrong, we should use the value stored in the Details.
}),
Err(e) => Err(milli::Error::from(e)),
};
let received_documents =
if let Some(Details::DocumentAdditionOrUpdate {
received_documents,
..
}) = task.details
{
received_documents
} else {
// In the case of a `documentAdditionOrUpdate` the details MUST be set
unreachable!();
};
results.push(user_result);
match user_result {
Ok(count) => {
task.status = Status::Succeeded;
task.details = Some(Details::DocumentAdditionOrUpdate {
received_documents,
indexed_documents: Some(count),
})
}
Err(e) => {
task.status = Status::Failed;
task.details = Some(Details::DocumentAdditionOrUpdate {
received_documents,
indexed_documents: Some(0),
});
task.error = Some(milli::Error::from(e).into());
}
}
}
DocumentOperation::Delete(document_ids) => {
let (new_builder, user_result) =
builder.remove_documents(document_ids)?;
builder = new_builder;
let provided_ids =
if let Some(Details::DocumentDeletion { provided_ids, .. }) =
task.details
{
provided_ids
} else {
// In the case of a `documentAdditionOrUpdate` the details MUST be set
unreachable!();
};
match user_result {
Ok(count) => {
task.status = Status::Succeeded;
task.details = Some(Details::DocumentDeletion {
provided_ids,
deleted_documents: Some(count),
});
}
Err(e) => {
task.status = Status::Failed;
task.details = Some(Details::DocumentDeletion {
provided_ids,
deleted_documents: Some(0),
});
task.error = Some(milli::Error::from(e).into());
}
}
}
}
}
if results.iter().any(|res| res.is_ok()) {
if !tasks.iter().all(|res| res.error.is_some()) {
let addition = builder.execute()?;
info!("document addition done: {:?}", addition);
} else if primary_key_has_been_set {
@ -1071,29 +1157,6 @@ impl IndexScheduler {
)?;
}
for (task, (ret, count)) in
tasks.iter_mut().zip(results.into_iter().zip(documents_counts))
{
match ret {
Ok(DocumentAdditionResult { indexed_documents, number_of_documents }) => {
task.status = Status::Succeeded;
task.details = Some(Details::DocumentAdditionOrUpdate {
received_documents: number_of_documents,
indexed_documents: Some(indexed_documents),
});
}
Err(error) => {
task.status = Status::Failed;
task.details = Some(Details::DocumentAdditionOrUpdate {
received_documents: count,
// if there was an error we indexed 0 documents.
indexed_documents: Some(0),
});
task.error = Some(error.into())
}
}
}
Ok(tasks)
}
IndexOperation::DocumentDeletion { index_uid: _, documents, mut tasks } => {
@ -1136,12 +1199,12 @@ impl IndexScheduler {
Ok(tasks)
}
IndexOperation::SettingsAndDocumentImport {
IndexOperation::SettingsAndDocumentOperation {
index_uid,
primary_key,
method,
documents_counts,
content_files,
operations,
document_import_tasks,
settings,
settings_tasks,
@ -1159,12 +1222,12 @@ impl IndexScheduler {
let mut import_tasks = self.apply_index_operation(
index_wtxn,
index,
IndexOperation::DocumentImport {
IndexOperation::DocumentOperation {
index_uid,
primary_key,
method,
documents_counts,
content_files,
operations,
tasks: document_import_tasks,
},
)?;

View File

@ -9,10 +9,11 @@ use meilisearch_types::heed::types::Str;
use meilisearch_types::heed::{Database, Env, EnvOpenOptions, RoTxn, RwTxn};
use meilisearch_types::milli::update::IndexerConfig;
use meilisearch_types::milli::Index;
use synchronoise::SignalEvent;
use time::OffsetDateTime;
use uuid::Uuid;
use self::IndexStatus::{Available, BeingDeleted};
use self::IndexStatus::{Available, BeingDeleted, BeingResized};
use crate::uuid_codec::UuidCodec;
use crate::{clamp_to_page_size, Error, Result};
@ -45,6 +46,8 @@ pub struct IndexMapper {
pub enum IndexStatus {
/// Do not insert it back in the index map as it is currently being deleted.
BeingDeleted,
/// Temporarily do not insert the index in the index map as it is currently being resized.
BeingResized(Arc<SignalEvent>),
/// You can use the index without worrying about anything.
Available(Index),
}
@ -71,9 +74,10 @@ impl IndexMapper {
&self,
path: &Path,
date: Option<(OffsetDateTime, OffsetDateTime)>,
map_size: usize,
) -> Result<Index> {
let mut options = EnvOpenOptions::new();
options.map_size(clamp_to_page_size(self.index_size));
options.map_size(clamp_to_page_size(map_size));
options.max_readers(1024);
if let Some((created, updated)) = date {
@ -102,14 +106,15 @@ impl IndexMapper {
let index_path = self.base_path.join(uuid.to_string());
fs::create_dir_all(&index_path)?;
let index = self.create_or_open_index(&index_path, date)?;
let index = self.create_or_open_index(&index_path, date, self.index_size)?;
wtxn.commit()?;
// Error if the UUIDv4 somehow already exists in the map, since it should be fresh.
// This is very unlikely to happen in practice.
// TODO: it would be better to lazily create the index. But we need an Index::open function for milli.
if let Some(BeingDeleted) =
self.index_map.write().unwrap().insert(uuid, Available(index.clone()))
if self.index_map.write().unwrap().insert(uuid, Available(index.clone())).is_some()
{
panic!("Uuid v4 conflict.");
panic!("Uuid v4 conflict: index with UUID {uuid} already exists.");
}
Ok(index)
@ -131,13 +136,23 @@ impl IndexMapper {
wtxn.commit()?;
// We remove the index from the in-memory index map.
let mut lock = self.index_map.write().unwrap();
let closing_event = match lock.insert(uuid, BeingDeleted) {
Some(Available(index)) => Some(index.prepare_for_closing()),
_ => None,
};
let closing_event = loop {
let mut lock = self.index_map.write().unwrap();
let resize_operation = match lock.insert(uuid, BeingDeleted) {
Some(Available(index)) => break Some(index.prepare_for_closing()),
// The target index is in the middle of a resize operation.
// Wait for this operation to complete, then try again.
Some(BeingResized(resize_operation)) => resize_operation.clone(),
// The index is already being deleted or doesn't exist.
// It's OK to remove it from the map again.
_ => break None,
};
drop(lock);
// Avoiding deadlocks: we need to drop the lock before waiting for the end of the resize, which
// will involve operations on the very map we're locking.
drop(lock);
resize_operation.wait();
};
let index_map = self.index_map.clone();
let index_path = self.base_path.join(uuid.to_string());
@ -171,6 +186,87 @@ impl IndexMapper {
Ok(self.index_mapping.get(rtxn, name)?.is_some())
}
/// Resizes the maximum size of the specified index to the double of its current maximum size.
///
/// This operation involves closing the underlying environment and so can take a long time to complete.
///
/// # Panics
///
/// - If the Index corresponding to the passed name is concurrently being deleted/resized or cannot be found in the
/// in memory hash map.
pub fn resize_index(&self, rtxn: &RoTxn, name: &str) -> Result<()> {
// fixme: factor to a function?
let uuid = self
.index_mapping
.get(rtxn, name)?
.ok_or_else(|| Error::IndexNotFound(name.to_string()))?;
// We remove the index from the in-memory index map.
let mut lock = self.index_map.write().unwrap();
// signal that will be sent when the resize operation completes
let resize_operation = Arc::new(SignalEvent::manual(false));
let index = match lock.insert(uuid, BeingResized(resize_operation)) {
Some(Available(index)) => index,
Some(previous_status) => {
lock.insert(uuid, previous_status);
panic!(
"Attempting to resize index {name} that is already being resized or deleted."
)
}
None => {
panic!("Could not find the status of index {name} in the in-memory index mapper.")
}
};
drop(lock);
let resize_succeeded = (move || {
let current_size = index.map_size()?;
let new_size = current_size * 2;
let closing_event = index.prepare_for_closing();
log::debug!("Waiting for index {name} to close");
if !closing_event.wait_timeout(std::time::Duration::from_secs(600)) {
// fail after 10 minutes waiting
panic!("Could not resize index {name} (unable to close it)");
}
log::info!("Resized index {name} from {current_size} to {new_size} bytes");
let index_path = self.base_path.join(uuid.to_string());
let index = self.create_or_open_index(&index_path, None, new_size)?;
Ok(index)
})();
// Put the map back to a consistent state.
// Even if there was an error we don't want to leave the map in an inconsistent state as it would cause
// deadlocks.
let mut lock = self.index_map.write().unwrap();
let (resize_operation, resize_succeeded) = match resize_succeeded {
Ok(index) => {
// insert the resized index
let Some(BeingResized(resize_operation)) = lock.insert(uuid, Available(index)) else {
panic!("Index state for index {name} was modified while it was being resized")
};
(resize_operation, Ok(()))
}
Err(error) => {
// there was an error, not much we can do... delete the index from the in-memory map to prevent future errors
let Some(BeingResized(resize_operation)) = lock.remove(&uuid) else {
panic!("Index state for index {name} was modified while it was being resized")
};
(resize_operation, Err(error))
}
};
// drop the lock before signaling completion so that other threads don't immediately await on the lock after waking up.
drop(lock);
resize_operation.signal();
resize_succeeded
}
/// Return an index, may open it if it wasn't already opened.
pub fn index(&self, rtxn: &RoTxn, name: &str) -> Result<Index> {
let uuid = self
@ -179,31 +275,47 @@ impl IndexMapper {
.ok_or_else(|| Error::IndexNotFound(name.to_string()))?;
// we clone here to drop the lock before entering the match
let index = self.index_map.read().unwrap().get(&uuid).cloned();
let index = match index {
Some(Available(index)) => index,
Some(BeingDeleted) => return Err(Error::IndexNotFound(name.to_string())),
// since we're lazy, it's possible that the index has not been opened yet.
None => {
let mut index_map = self.index_map.write().unwrap();
// between the read lock and the write lock it's not impossible
// that someone already opened the index (eg if two search happens
// at the same time), thus before opening it we check a second time
// if it's not already there.
// Since there is a good chance it's not already there we can use
// the entry method.
match index_map.entry(uuid) {
Entry::Vacant(entry) => {
let index_path = self.base_path.join(uuid.to_string());
let index = loop {
let index = self.index_map.read().unwrap().get(&uuid).cloned();
let index = self.create_or_open_index(&index_path, None)?;
entry.insert(Available(index.clone()));
index
match index {
Some(Available(index)) => break index,
Some(BeingResized(ref resize_operation)) => {
// Avoiding deadlocks: no lock taken while doing this operation.
resize_operation.wait();
continue;
}
Some(BeingDeleted) => return Err(Error::IndexNotFound(name.to_string())),
// since we're lazy, it's possible that the index has not been opened yet.
None => {
let mut index_map = self.index_map.write().unwrap();
// between the read lock and the write lock it's not impossible
// that someone already opened the index (eg if two search happens
// at the same time), thus before opening it we check a second time
// if it's not already there.
// Since there is a good chance it's not already there we can use
// the entry method.
match index_map.entry(uuid) {
Entry::Vacant(entry) => {
let index_path = self.base_path.join(uuid.to_string());
let index =
self.create_or_open_index(&index_path, None, self.index_size)?;
entry.insert(Available(index.clone()));
break index;
}
Entry::Occupied(entry) => match entry.get() {
Available(index) => break index.clone(),
BeingResized(resize_operation) => {
// Avoiding the deadlock: we drop the lock before waiting
let resize_operation = resize_operation.clone();
drop(index_map);
resize_operation.wait();
continue;
}
BeingDeleted => return Err(Error::IndexNotFound(name.to_string())),
},
}
Entry::Occupied(entry) => match entry.get() {
Available(index) => index.clone(),
BeingDeleted => return Err(Error::IndexNotFound(name.to_string())),
},
}
}
};

View File

@ -43,6 +43,7 @@ use file_store::FileStore;
use meilisearch_types::error::ResponseError;
use meilisearch_types::heed::types::{OwnedType, SerdeBincode, SerdeJson, Str};
use meilisearch_types::heed::{self, Database, Env, RoTxn};
use meilisearch_types::index_uid_pattern::IndexUidPattern;
use meilisearch_types::milli;
use meilisearch_types::milli::documents::DocumentsBatchBuilder;
use meilisearch_types::milli::update::IndexerConfig;
@ -422,12 +423,12 @@ impl IndexScheduler {
#[cfg(test)]
run.breakpoint(Breakpoint::Init);
loop {
run.wake_up.wait();
run.wake_up.wait();
loop {
match run.tick() {
Ok(0) => (),
Ok(_) => run.wake_up.signal(),
Ok(TickOutcome::TickAgain(_)) => (),
Ok(TickOutcome::WaitForSignal) => run.wake_up.wait(),
Err(e) => {
log::error!("{}", e);
// Wait one second when an irrecoverable error occurs.
@ -440,7 +441,6 @@ impl IndexScheduler {
) {
std::thread::sleep(Duration::from_secs(1));
}
run.wake_up.signal();
}
}
}
@ -630,13 +630,13 @@ impl IndexScheduler {
&self,
rtxn: &RoTxn,
query: &Query,
filters: &meilisearch_auth::AuthFilter,
authorized_indexes: &Option<Vec<IndexUidPattern>>,
) -> Result<RoaringBitmap> {
let mut tasks = self.get_task_ids(rtxn, query)?;
// If the query contains a list of index uid or there is a finite list of authorized indexes,
// then we must exclude all the kinds that aren't associated to one and only one index.
if query.index_uids.is_some() || !filters.all_indexes_authorized() {
if query.index_uids.is_some() || authorized_indexes.is_some() {
for kind in enum_iterator::all::<Kind>().filter(|kind| !kind.related_to_one_index()) {
tasks -= self.get_kind(rtxn, kind)?;
}
@ -644,11 +644,11 @@ impl IndexScheduler {
// Any task that is internally associated with a non-authorized index
// must be discarded.
if !filters.all_indexes_authorized() {
if let Some(authorized_indexes) = authorized_indexes {
let all_indexes_iter = self.index_tasks.iter(rtxn)?;
for result in all_indexes_iter {
let (index, index_tasks) = result?;
if !filters.is_index_authorized(index) {
if !authorized_indexes.iter().any(|p| p.matches_str(index)) {
tasks -= index_tasks;
}
}
@ -668,11 +668,12 @@ impl IndexScheduler {
pub fn get_tasks_from_authorized_indexes(
&self,
query: Query,
filters: &meilisearch_auth::AuthFilter,
authorized_indexes: Option<Vec<IndexUidPattern>>,
) -> Result<Vec<Task>> {
let rtxn = self.env.read_txn()?;
let tasks = self.get_task_ids_from_authorized_indexes(&rtxn, &query, filters)?;
let tasks =
self.get_task_ids_from_authorized_indexes(&rtxn, &query, &authorized_indexes)?;
let tasks = self.get_existing_tasks(
&rtxn,
@ -763,8 +764,8 @@ impl IndexScheduler {
Ok(task)
}
/// Register a new task comming from a dump in the scheduler.
/// By takinig a mutable ref we're pretty sure no one will ever import a dump while actix is running.
/// Register a new task coming from a dump in the scheduler.
/// By taking a mutable ref we're pretty sure no one will ever import a dump while actix is running.
pub fn register_dumped_task(
&mut self,
task: TaskDump,
@ -925,7 +926,7 @@ impl IndexScheduler {
/// 5. Reset the in-memory list of processed tasks.
///
/// Returns the number of processed tasks.
fn tick(&self) -> Result<usize> {
fn tick(&self) -> Result<TickOutcome> {
#[cfg(test)]
{
*self.run_loop_iteration.write().unwrap() += 1;
@ -936,8 +937,9 @@ impl IndexScheduler {
let batch =
match self.create_next_batch(&rtxn).map_err(|e| Error::CreateBatch(Box::new(e)))? {
Some(batch) => batch,
None => return Ok(0),
None => return Ok(TickOutcome::WaitForSignal),
};
let index_uid = batch.index_uid().map(ToOwned::to_owned);
drop(rtxn);
// 1. store the starting date with the bitmap of processing tasks.
@ -1008,7 +1010,23 @@ impl IndexScheduler {
// the `started_at` date times and `processings` of the current processing tasks.
// This date time is used by the task cancelation to store the right `started_at`
// date in the task on disk.
return Ok(0);
return Ok(TickOutcome::TickAgain(0));
}
// If an index said it was full, we need to:
// 1. identify which index is full
// 2. close the associated environment
// 3. resize it
// 4. re-schedule tasks
Err(Error::Milli(milli::Error::UserError(
milli::UserError::MaxDatabaseSizeReached,
))) if index_uid.is_some() => {
// fixme: add index_uid to match to avoid the unwrap
let index_uid = index_uid.unwrap();
// fixme: handle error more gracefully? not sure when this could happen
self.index_mapper.resize_index(&wtxn, &index_uid)?;
wtxn.abort().map_err(Error::HeedTransaction)?;
return Ok(TickOutcome::TickAgain(0));
}
// In case of a failure we must get back and patch all the tasks with the error.
Err(err) => {
@ -1048,7 +1066,7 @@ impl IndexScheduler {
#[cfg(test)]
self.breakpoint(Breakpoint::AfterProcessing);
Ok(processed_tasks)
Ok(TickOutcome::TickAgain(processed_tasks))
}
pub(crate) fn delete_persisted_task_data(&self, task: &Task) -> Result<()> {
@ -1083,6 +1101,16 @@ impl IndexScheduler {
}
}
/// The outcome of calling the [`IndexScheduler::tick`] function.
pub enum TickOutcome {
/// The scheduler should immediately attempt another `tick`.
///
/// The `usize` field contains the number of processed tasks.
TickAgain(usize),
/// The scheduler should wait for an external signal before attempting another `tick`.
WaitForSignal,
}
#[cfg(test)]
mod tests {
use std::io::{BufWriter, Seek, Write};
@ -1092,9 +1120,7 @@ mod tests {
use crossbeam::channel::RecvTimeoutError;
use file_store::File;
use meili_snap::snapshot;
use meilisearch_auth::AuthFilter;
use meilisearch_types::document_formats::DocumentFormatError;
use meilisearch_types::index_uid_pattern::IndexUidPattern;
use meilisearch_types::milli::obkv_to_json;
use meilisearch_types::milli::update::IndexDocumentsMethod::{
ReplaceDocuments, UpdateDocuments,
@ -1680,6 +1706,105 @@ mod tests {
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "both_task_succeeded");
}
#[test]
fn document_addition_and_document_deletion() {
let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]);
let content = r#"[
{ "id": 1, "doggo": "jean bob" },
{ "id": 2, "catto": "jorts" },
{ "id": 3, "doggo": "bork" }
]"#;
let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0).unwrap();
let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap();
file.persist().unwrap();
index_scheduler
.register(KindWithContent::DocumentAdditionOrUpdate {
index_uid: S("doggos"),
primary_key: Some(S("id")),
method: ReplaceDocuments,
content_file: uuid,
documents_count,
allow_index_creation: true,
})
.unwrap();
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task");
index_scheduler
.register(KindWithContent::DocumentDeletion {
index_uid: S("doggos"),
documents_ids: vec![S("1"), S("2")],
})
.unwrap();
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_second_task");
handle.advance_one_successful_batch(); // The addition AND deletion should've been batched together
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_processing_the_batch");
let index = index_scheduler.index("doggos").unwrap();
let rtxn = index.read_txn().unwrap();
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
let documents = index
.all_documents(&rtxn)
.unwrap()
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
.collect::<Vec<_>>();
snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents");
}
#[test]
fn document_deletion_and_document_addition() {
let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]);
index_scheduler
.register(KindWithContent::DocumentDeletion {
index_uid: S("doggos"),
documents_ids: vec![S("1"), S("2")],
})
.unwrap();
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task");
let content = r#"[
{ "id": 1, "doggo": "jean bob" },
{ "id": 2, "catto": "jorts" },
{ "id": 3, "doggo": "bork" }
]"#;
let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0).unwrap();
let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap();
file.persist().unwrap();
index_scheduler
.register(KindWithContent::DocumentAdditionOrUpdate {
index_uid: S("doggos"),
primary_key: Some(S("id")),
method: ReplaceDocuments,
content_file: uuid,
documents_count,
allow_index_creation: true,
})
.unwrap();
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_second_task");
// The deletion should have failed because it can't create an index
handle.advance_one_failed_batch();
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_failing_the_deletion");
// The addition should works
handle.advance_one_successful_batch();
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_last_successful_addition");
let index = index_scheduler.index("doggos").unwrap();
let rtxn = index.read_txn().unwrap();
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
let documents = index
.all_documents(&rtxn)
.unwrap()
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
.collect::<Vec<_>>();
snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents");
}
#[test]
fn do_not_batch_task_of_different_indexes() {
let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]);
@ -2246,45 +2371,38 @@ mod tests {
let rtxn = index_scheduler.env.read_txn().unwrap();
let query = Query { limit: Some(0), ..Default::default() };
let tasks = index_scheduler
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default())
.unwrap();
let tasks =
index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap();
snapshot!(snapshot_bitmap(&tasks), @"[]");
let query = Query { limit: Some(1), ..Default::default() };
let tasks = index_scheduler
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default())
.unwrap();
let tasks =
index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap();
snapshot!(snapshot_bitmap(&tasks), @"[2,]");
let query = Query { limit: Some(2), ..Default::default() };
let tasks = index_scheduler
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default())
.unwrap();
let tasks =
index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap();
snapshot!(snapshot_bitmap(&tasks), @"[1,2,]");
let query = Query { from: Some(1), ..Default::default() };
let tasks = index_scheduler
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default())
.unwrap();
let tasks =
index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap();
snapshot!(snapshot_bitmap(&tasks), @"[0,1,]");
let query = Query { from: Some(2), ..Default::default() };
let tasks = index_scheduler
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default())
.unwrap();
let tasks =
index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap();
snapshot!(snapshot_bitmap(&tasks), @"[0,1,2,]");
let query = Query { from: Some(1), limit: Some(1), ..Default::default() };
let tasks = index_scheduler
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default())
.unwrap();
let tasks =
index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap();
snapshot!(snapshot_bitmap(&tasks), @"[1,]");
let query = Query { from: Some(1), limit: Some(2), ..Default::default() };
let tasks = index_scheduler
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default())
.unwrap();
let tasks =
index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap();
snapshot!(snapshot_bitmap(&tasks), @"[0,1,]");
}
@ -2309,24 +2427,21 @@ mod tests {
let rtxn = index_scheduler.env.read_txn().unwrap();
let query = Query { statuses: Some(vec![Status::Processing]), ..Default::default() };
let tasks = index_scheduler
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default())
.unwrap();
let tasks =
index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap();
snapshot!(snapshot_bitmap(&tasks), @"[0,]"); // only the processing tasks in the first tick
let query = Query { statuses: Some(vec![Status::Enqueued]), ..Default::default() };
let tasks = index_scheduler
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default())
.unwrap();
let tasks =
index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap();
snapshot!(snapshot_bitmap(&tasks), @"[1,2,]"); // only the enqueued tasks in the first tick
let query = Query {
statuses: Some(vec![Status::Enqueued, Status::Processing]),
..Default::default()
};
let tasks = index_scheduler
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default())
.unwrap();
let tasks =
index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap();
snapshot!(snapshot_bitmap(&tasks), @"[0,1,2,]"); // both enqueued and processing tasks in the first tick
let query = Query {
@ -2334,9 +2449,8 @@ mod tests {
after_started_at: Some(start_time),
..Default::default()
};
let tasks = index_scheduler
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default())
.unwrap();
let tasks =
index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap();
// both enqueued and processing tasks in the first tick, but limited to those with a started_at
// that comes after the start of the test, which should excludes the enqueued tasks
snapshot!(snapshot_bitmap(&tasks), @"[0,]");
@ -2346,9 +2460,8 @@ mod tests {
before_started_at: Some(start_time),
..Default::default()
};
let tasks = index_scheduler
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default())
.unwrap();
let tasks =
index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap();
// both enqueued and processing tasks in the first tick, but limited to those with a started_at
// that comes before the start of the test, which should excludes all of them
snapshot!(snapshot_bitmap(&tasks), @"[]");
@ -2359,9 +2472,8 @@ mod tests {
before_started_at: Some(start_time + Duration::minutes(1)),
..Default::default()
};
let tasks = index_scheduler
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default())
.unwrap();
let tasks =
index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap();
// both enqueued and processing tasks in the first tick, but limited to those with a started_at
// that comes after the start of the test and before one minute after the start of the test,
// which should exclude the enqueued tasks and include the only processing task
@ -2386,9 +2498,8 @@ mod tests {
before_started_at: Some(start_time + Duration::minutes(1)),
..Default::default()
};
let tasks = index_scheduler
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default())
.unwrap();
let tasks =
index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap();
// both succeeded and processing tasks in the first tick, but limited to those with a started_at
// that comes after the start of the test and before one minute after the start of the test,
// which should include all tasks
@ -2399,9 +2510,8 @@ mod tests {
before_started_at: Some(start_time),
..Default::default()
};
let tasks = index_scheduler
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default())
.unwrap();
let tasks =
index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap();
// both succeeded and processing tasks in the first tick, but limited to those with a started_at
// that comes before the start of the test, which should exclude all tasks
snapshot!(snapshot_bitmap(&tasks), @"[]");
@ -2412,9 +2522,8 @@ mod tests {
before_started_at: Some(second_start_time + Duration::minutes(1)),
..Default::default()
};
let tasks = index_scheduler
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default())
.unwrap();
let tasks =
index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap();
// both succeeded and processing tasks in the first tick, but limited to those with a started_at
// that comes after the start of the second part of the test and before one minute after the
// second start of the test, which should exclude all tasks
@ -2432,9 +2541,8 @@ mod tests {
let rtxn = index_scheduler.env.read_txn().unwrap();
let tasks = index_scheduler
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default())
.unwrap();
let tasks =
index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap();
// we run the same query to verify that, and indeed find that the last task is matched
snapshot!(snapshot_bitmap(&tasks), @"[2,]");
@ -2444,9 +2552,8 @@ mod tests {
before_started_at: Some(second_start_time + Duration::minutes(1)),
..Default::default()
};
let tasks = index_scheduler
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default())
.unwrap();
let tasks =
index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap();
// enqueued, succeeded, or processing tasks started after the second part of the test, should
// again only return the last task
snapshot!(snapshot_bitmap(&tasks), @"[2,]");
@ -2456,9 +2563,8 @@ mod tests {
// now the last task should have failed
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "end");
let tasks = index_scheduler
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default())
.unwrap();
let tasks =
index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap();
// so running the last query should return nothing
snapshot!(snapshot_bitmap(&tasks), @"[]");
@ -2468,9 +2574,8 @@ mod tests {
before_started_at: Some(second_start_time + Duration::minutes(1)),
..Default::default()
};
let tasks = index_scheduler
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default())
.unwrap();
let tasks =
index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap();
// but the same query on failed tasks should return the last task
snapshot!(snapshot_bitmap(&tasks), @"[2,]");
@ -2480,9 +2585,8 @@ mod tests {
before_started_at: Some(second_start_time + Duration::minutes(1)),
..Default::default()
};
let tasks = index_scheduler
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default())
.unwrap();
let tasks =
index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap();
// but the same query on failed tasks should return the last task
snapshot!(snapshot_bitmap(&tasks), @"[2,]");
@ -2493,9 +2597,8 @@ mod tests {
before_started_at: Some(second_start_time + Duration::minutes(1)),
..Default::default()
};
let tasks = index_scheduler
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default())
.unwrap();
let tasks =
index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap();
// same query but with an invalid uid
snapshot!(snapshot_bitmap(&tasks), @"[]");
@ -2506,9 +2609,8 @@ mod tests {
before_started_at: Some(second_start_time + Duration::minutes(1)),
..Default::default()
};
let tasks = index_scheduler
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default())
.unwrap();
let tasks =
index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap();
// same query but with a valid uid
snapshot!(snapshot_bitmap(&tasks), @"[2,]");
}
@ -2538,9 +2640,8 @@ mod tests {
let rtxn = index_scheduler.env.read_txn().unwrap();
let query = Query { index_uids: Some(vec!["catto".to_owned()]), ..Default::default() };
let tasks = index_scheduler
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default())
.unwrap();
let tasks =
index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap();
// only the first task associated with catto is returned, the indexSwap tasks are excluded!
snapshot!(snapshot_bitmap(&tasks), @"[0,]");
@ -2549,9 +2650,7 @@ mod tests {
.get_task_ids_from_authorized_indexes(
&rtxn,
&query,
&AuthFilter::with_allowed_indexes(
vec![IndexUidPattern::new_unchecked("doggo")].into_iter().collect(),
),
&Some(vec![IndexUidPattern::new_unchecked("doggo")]),
)
.unwrap();
// we have asked for only the tasks associated with catto, but are only authorized to retrieve the tasks
@ -2563,9 +2662,7 @@ mod tests {
.get_task_ids_from_authorized_indexes(
&rtxn,
&query,
&AuthFilter::with_allowed_indexes(
vec![IndexUidPattern::new_unchecked("doggo")].into_iter().collect(),
),
&Some(vec![IndexUidPattern::new_unchecked("doggo")]),
)
.unwrap();
// we asked for all the tasks, but we are only authorized to retrieve the doggo tasks
@ -2577,14 +2674,10 @@ mod tests {
.get_task_ids_from_authorized_indexes(
&rtxn,
&query,
&AuthFilter::with_allowed_indexes(
vec![
IndexUidPattern::new_unchecked("catto"),
IndexUidPattern::new_unchecked("doggo"),
]
.into_iter()
.collect(),
),
&Some(vec![
IndexUidPattern::new_unchecked("catto"),
IndexUidPattern::new_unchecked("doggo"),
]),
)
.unwrap();
// we asked for all the tasks, but we are only authorized to retrieve the doggo and catto tasks
@ -2592,9 +2685,8 @@ mod tests {
snapshot!(snapshot_bitmap(&tasks), @"[0,1,]");
let query = Query::default();
let tasks = index_scheduler
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default())
.unwrap();
let tasks =
index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap();
// we asked for all the tasks with all index authorized -> all tasks returned
snapshot!(snapshot_bitmap(&tasks), @"[0,1,2,3,]");
}
@ -2625,9 +2717,8 @@ mod tests {
let rtxn = index_scheduler.read_txn().unwrap();
let query = Query { canceled_by: Some(vec![task_cancelation.uid]), ..Query::default() };
let tasks = index_scheduler
.get_task_ids_from_authorized_indexes(&rtxn, &query, &AuthFilter::default())
.unwrap();
let tasks =
index_scheduler.get_task_ids_from_authorized_indexes(&rtxn, &query, &None).unwrap();
// 0 is not returned because it was not canceled, 3 is not returned because it is the uid of the
// taskCancelation itself
snapshot!(snapshot_bitmap(&tasks), @"[1,2,]");
@ -2637,9 +2728,7 @@ mod tests {
.get_task_ids_from_authorized_indexes(
&rtxn,
&query,
&AuthFilter::with_allowed_indexes(
vec![IndexUidPattern::new_unchecked("doggo")].into_iter().collect(),
),
&Some(vec![IndexUidPattern::new_unchecked("doggo")]),
)
.unwrap();
// Return only 1 because the user is not authorized to see task 2

View File

@ -0,0 +1,42 @@
---
source: index-scheduler/src/lib.rs
---
### Autobatching Enabled = true
### Processing Tasks:
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: succeeded, details: { received_documents: 3, indexed_documents: Some(3) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }}
1 {uid: 1, status: succeeded, details: { received_document_ids: 2, deleted_documents: Some(2) }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1", "2"] }}
----------------------------------------------------------------------
### Status:
enqueued []
succeeded [0,1,]
----------------------------------------------------------------------
### Kind:
"documentAdditionOrUpdate" [0,]
"documentDeletion" [1,]
----------------------------------------------------------------------
### Index Tasks:
doggos [0,1,]
----------------------------------------------------------------------
### Index Mapper:
["doggos"]
----------------------------------------------------------------------
### Canceled By:
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [0,]
[timestamp] [1,]
----------------------------------------------------------------------
### Started At:
[timestamp] [0,1,]
----------------------------------------------------------------------
### Finished At:
[timestamp] [0,1,]
----------------------------------------------------------------------
### File Store:
----------------------------------------------------------------------

View File

@ -0,0 +1,9 @@
---
source: index-scheduler/src/lib.rs
---
[
{
"id": 3,
"doggo": "bork"
}
]

View File

@ -0,0 +1,37 @@
---
source: index-scheduler/src/lib.rs
---
### Autobatching Enabled = true
### Processing Tasks:
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: enqueued, details: { received_documents: 3, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }}
----------------------------------------------------------------------
### Status:
enqueued [0,]
----------------------------------------------------------------------
### Kind:
"documentAdditionOrUpdate" [0,]
----------------------------------------------------------------------
### Index Tasks:
doggos [0,]
----------------------------------------------------------------------
### Index Mapper:
[]
----------------------------------------------------------------------
### Canceled By:
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [0,]
----------------------------------------------------------------------
### Started At:
----------------------------------------------------------------------
### Finished At:
----------------------------------------------------------------------
### File Store:
00000000-0000-0000-0000-000000000000
----------------------------------------------------------------------

View File

@ -0,0 +1,40 @@
---
source: index-scheduler/src/lib.rs
---
### Autobatching Enabled = true
### Processing Tasks:
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: enqueued, details: { received_documents: 3, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }}
1 {uid: 1, status: enqueued, details: { received_document_ids: 2, deleted_documents: None }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1", "2"] }}
----------------------------------------------------------------------
### Status:
enqueued [0,1,]
----------------------------------------------------------------------
### Kind:
"documentAdditionOrUpdate" [0,]
"documentDeletion" [1,]
----------------------------------------------------------------------
### Index Tasks:
doggos [0,1,]
----------------------------------------------------------------------
### Index Mapper:
[]
----------------------------------------------------------------------
### Canceled By:
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [0,]
[timestamp] [1,]
----------------------------------------------------------------------
### Started At:
----------------------------------------------------------------------
### Finished At:
----------------------------------------------------------------------
### File Store:
00000000-0000-0000-0000-000000000000
----------------------------------------------------------------------

View File

@ -0,0 +1,43 @@
---
source: index-scheduler/src/lib.rs
---
### Autobatching Enabled = true
### Processing Tasks:
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: failed, error: ResponseError { code: 200, message: "Index `doggos` not found.", error_code: "index_not_found", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_not_found" }, details: { received_document_ids: 2, deleted_documents: Some(0) }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1", "2"] }}
1 {uid: 1, status: enqueued, details: { received_documents: 3, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }}
----------------------------------------------------------------------
### Status:
enqueued [1,]
failed [0,]
----------------------------------------------------------------------
### Kind:
"documentAdditionOrUpdate" [1,]
"documentDeletion" [0,]
----------------------------------------------------------------------
### Index Tasks:
doggos [0,1,]
----------------------------------------------------------------------
### Index Mapper:
[]
----------------------------------------------------------------------
### Canceled By:
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [0,]
[timestamp] [1,]
----------------------------------------------------------------------
### Started At:
[timestamp] [0,]
----------------------------------------------------------------------
### Finished At:
[timestamp] [0,]
----------------------------------------------------------------------
### File Store:
00000000-0000-0000-0000-000000000000
----------------------------------------------------------------------

View File

@ -0,0 +1,45 @@
---
source: index-scheduler/src/lib.rs
---
### Autobatching Enabled = true
### Processing Tasks:
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: failed, error: ResponseError { code: 200, message: "Index `doggos` not found.", error_code: "index_not_found", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_not_found" }, details: { received_document_ids: 2, deleted_documents: Some(0) }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1", "2"] }}
1 {uid: 1, status: succeeded, details: { received_documents: 3, indexed_documents: Some(3) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }}
----------------------------------------------------------------------
### Status:
enqueued []
succeeded [1,]
failed [0,]
----------------------------------------------------------------------
### Kind:
"documentAdditionOrUpdate" [1,]
"documentDeletion" [0,]
----------------------------------------------------------------------
### Index Tasks:
doggos [0,1,]
----------------------------------------------------------------------
### Index Mapper:
["doggos"]
----------------------------------------------------------------------
### Canceled By:
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [0,]
[timestamp] [1,]
----------------------------------------------------------------------
### Started At:
[timestamp] [0,]
[timestamp] [1,]
----------------------------------------------------------------------
### Finished At:
[timestamp] [0,]
[timestamp] [1,]
----------------------------------------------------------------------
### File Store:
----------------------------------------------------------------------

View File

@ -0,0 +1,17 @@
---
source: index-scheduler/src/lib.rs
---
[
{
"id": 1,
"doggo": "jean bob"
},
{
"id": 2,
"catto": "jorts"
},
{
"id": 3,
"doggo": "bork"
}
]

View File

@ -0,0 +1,36 @@
---
source: index-scheduler/src/lib.rs
---
### Autobatching Enabled = true
### Processing Tasks:
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: enqueued, details: { received_document_ids: 2, deleted_documents: None }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1", "2"] }}
----------------------------------------------------------------------
### Status:
enqueued [0,]
----------------------------------------------------------------------
### Kind:
"documentDeletion" [0,]
----------------------------------------------------------------------
### Index Tasks:
doggos [0,]
----------------------------------------------------------------------
### Index Mapper:
[]
----------------------------------------------------------------------
### Canceled By:
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [0,]
----------------------------------------------------------------------
### Started At:
----------------------------------------------------------------------
### Finished At:
----------------------------------------------------------------------
### File Store:
----------------------------------------------------------------------

View File

@ -0,0 +1,40 @@
---
source: index-scheduler/src/lib.rs
---
### Autobatching Enabled = true
### Processing Tasks:
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: enqueued, details: { received_document_ids: 2, deleted_documents: None }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1", "2"] }}
1 {uid: 1, status: enqueued, details: { received_documents: 3, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }}
----------------------------------------------------------------------
### Status:
enqueued [0,1,]
----------------------------------------------------------------------
### Kind:
"documentAdditionOrUpdate" [1,]
"documentDeletion" [0,]
----------------------------------------------------------------------
### Index Tasks:
doggos [0,1,]
----------------------------------------------------------------------
### Index Mapper:
[]
----------------------------------------------------------------------
### Canceled By:
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [0,]
[timestamp] [1,]
----------------------------------------------------------------------
### Started At:
----------------------------------------------------------------------
### Finished At:
----------------------------------------------------------------------
### File Store:
00000000-0000-0000-0000-000000000000
----------------------------------------------------------------------

View File

@ -439,20 +439,29 @@ impl IndexScheduler {
provided_ids: received_document_ids,
deleted_documents,
} => {
if let Some(deleted_documents) = deleted_documents {
assert_eq!(status, Status::Succeeded);
assert!(deleted_documents <= received_document_ids as u64);
assert_eq!(kind.as_kind(), Kind::DocumentDeletion);
assert_eq!(kind.as_kind(), Kind::DocumentDeletion);
let (index_uid, documents_ids) =
if let KindWithContent::DocumentDeletion {
ref index_uid,
ref documents_ids,
} = kind
{
(index_uid, documents_ids)
} else {
unreachable!()
};
assert_eq!(&task_index_uid.unwrap(), index_uid);
match &kind {
KindWithContent::DocumentDeletion { index_uid, documents_ids } => {
assert_eq!(&task_index_uid.unwrap(), index_uid);
assert!(documents_ids.len() >= received_document_ids);
}
_ => panic!(),
match status {
Status::Enqueued | Status::Processing => (),
Status::Succeeded => {
assert!(deleted_documents.unwrap() <= received_document_ids as u64);
assert!(documents_ids.len() == received_document_ids);
}
Status::Failed | Status::Canceled => {
assert!(deleted_documents == Some(0));
assert!(documents_ids.len() == received_document_ids);
}
} else {
assert_ne!(status, Status::Succeeded);
}
}
Details::ClearAll { deleted_documents } => {

View File

@ -85,13 +85,17 @@ impl AuthController {
uid: Uuid,
search_rules: Option<SearchRules>,
) -> Result<AuthFilter> {
let mut filters = AuthFilter::default();
let key = self.get_key(uid)?;
let key_authorized_indexes = SearchRules::Set(key.indexes.into_iter().collect());
filters.search_rules = match search_rules {
Some(search_rules) => search_rules,
None => SearchRules::Set(key.indexes.into_iter().collect()),
};
let allow_index_creation = self.is_key_authorized(uid, Action::IndexesAdd, None)?;
filters.allow_index_creation = self.is_key_authorized(uid, Action::IndexesAdd, None)?;
Ok(AuthFilter { search_rules, key_authorized_indexes, allow_index_creation })
Ok(filters)
}
pub fn list_keys(&self) -> Result<Vec<Key>> {
@ -156,59 +160,13 @@ impl AuthController {
}
pub struct AuthFilter {
search_rules: Option<SearchRules>,
key_authorized_indexes: SearchRules,
allow_index_creation: bool,
pub search_rules: SearchRules,
pub allow_index_creation: bool,
}
impl Default for AuthFilter {
fn default() -> Self {
Self {
search_rules: None,
key_authorized_indexes: SearchRules::default(),
allow_index_creation: true,
}
}
}
impl AuthFilter {
#[inline]
pub fn allow_index_creation(&self) -> bool {
self.allow_index_creation
}
pub fn with_allowed_indexes(allowed_indexes: HashSet<IndexUidPattern>) -> Self {
Self {
search_rules: None,
key_authorized_indexes: SearchRules::Set(allowed_indexes),
allow_index_creation: false,
}
}
pub fn all_indexes_authorized(&self) -> bool {
self.key_authorized_indexes.all_indexes_authorized()
&& self
.search_rules
.as_ref()
.map(|search_rules| search_rules.all_indexes_authorized())
.unwrap_or(true)
}
pub fn is_index_authorized(&self, index: &str) -> bool {
self.key_authorized_indexes.is_index_authorized(index)
&& self
.search_rules
.as_ref()
.map(|search_rules| search_rules.is_index_authorized(index))
.unwrap_or(true)
}
pub fn get_index_search_rules(&self, index: &str) -> Option<IndexSearchRules> {
if !self.is_index_authorized(index) {
return None;
}
let search_rules = self.search_rules.as_ref().unwrap_or(&self.key_authorized_indexes);
search_rules.get_index_search_rules(index)
Self { search_rules: SearchRules::default(), allow_index_creation: true }
}
}
@ -227,7 +185,7 @@ impl Default for SearchRules {
}
impl SearchRules {
fn is_index_authorized(&self, index: &str) -> bool {
pub fn is_index_authorized(&self, index: &str) -> bool {
match self {
Self::Set(set) => {
set.contains("*")
@ -242,7 +200,7 @@ impl SearchRules {
}
}
fn get_index_search_rules(&self, index: &str) -> Option<IndexSearchRules> {
pub fn get_index_search_rules(&self, index: &str) -> Option<IndexSearchRules> {
match self {
Self::Set(_) => {
if self.is_index_authorized(index) {
@ -261,10 +219,24 @@ impl SearchRules {
}
}
fn all_indexes_authorized(&self) -> bool {
/// Return the list of indexes such that `self.is_index_authorized(index) == true`,
/// or `None` if all indexes satisfy this condition.
pub fn authorized_indexes(&self) -> Option<Vec<IndexUidPattern>> {
match self {
SearchRules::Set(set) => set.contains("*"),
SearchRules::Map(map) => map.contains_key("*"),
SearchRules::Set(set) => {
if set.contains("*") {
None
} else {
Some(set.iter().cloned().collect())
}
}
SearchRules::Map(map) => {
if map.contains_key("*") {
None
} else {
Some(map.keys().cloned().collect())
}
}
}
}
}

View File

@ -15,7 +15,7 @@ actix-web = { version = "4.2.1", default-features = false }
anyhow = "1.0.65"
convert_case = "0.6.0"
csv = "1.1.6"
deserr = "0.4.1"
deserr = "0.5.0"
either = { version = "1.6.1", features = ["serde"] }
enum-iterator = "1.1.3"
file-store = { path = "../file-store" }

View File

@ -19,7 +19,7 @@ type Result<T> = std::result::Result<T, DocumentFormatError>;
pub enum PayloadType {
Ndjson,
Json,
Csv,
Csv { delimiter: u8 },
}
impl fmt::Display for PayloadType {
@ -27,7 +27,7 @@ impl fmt::Display for PayloadType {
match self {
PayloadType::Ndjson => f.write_str("ndjson"),
PayloadType::Json => f.write_str("json"),
PayloadType::Csv => f.write_str("csv"),
PayloadType::Csv { .. } => f.write_str("csv"),
}
}
}
@ -105,11 +105,11 @@ impl ErrorCode for DocumentFormatError {
}
/// Reads CSV from input and write an obkv batch to writer.
pub fn read_csv(file: &File, writer: impl Write + Seek) -> Result<u64> {
pub fn read_csv(file: &File, writer: impl Write + Seek, delimiter: u8) -> Result<u64> {
let mut builder = DocumentsBatchBuilder::new(writer);
let mmap = unsafe { MmapOptions::new().map(file)? };
let csv = csv::Reader::from_reader(mmap.as_ref());
builder.append_csv(csv).map_err(|e| (PayloadType::Csv, e))?;
let csv = csv::ReaderBuilder::new().delimiter(delimiter).from_reader(mmap.as_ref());
builder.append_csv(csv).map_err(|e| (PayloadType::Csv { delimiter }, e))?;
let count = builder.documents_count();
let _ = builder.into_inner().map_err(DocumentFormatError::Io)?;

View File

@ -220,6 +220,7 @@ InvalidDocumentOffset , InvalidRequest , BAD_REQUEST ;
InvalidIndexLimit , InvalidRequest , BAD_REQUEST ;
InvalidIndexOffset , InvalidRequest , BAD_REQUEST ;
InvalidIndexPrimaryKey , InvalidRequest , BAD_REQUEST ;
InvalidIndexCsvDelimiter , InvalidRequest , BAD_REQUEST ;
InvalidIndexUid , InvalidRequest , BAD_REQUEST ;
InvalidSearchAttributesToCrop , InvalidRequest , BAD_REQUEST ;
InvalidSearchAttributesToHighlight , InvalidRequest , BAD_REQUEST ;

View File

@ -25,7 +25,7 @@ byte-unit = { version = "4.0.14", default-features = false, features = ["std", "
bytes = "1.2.1"
clap = { version = "4.0.9", features = ["derive", "env"] }
crossbeam-channel = "0.5.6"
deserr = "0.4.1"
deserr = "0.5.0"
dump = { path = "../dump" }
either = "1.8.0"
env_logger = "0.9.1"
@ -116,5 +116,5 @@ japanese = ["meilisearch-types/japanese"]
thai = ["meilisearch-types/thai"]
[package.metadata.mini-dashboard]
assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.5/build.zip"
sha1 = "6fe959b78511b32e9ff857fd9fd31740633b9fce"
assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.6/build.zip"
sha1 = "dce0aba16bceab5549edf9f01de89858800f7422"

View File

@ -26,18 +26,6 @@ impl SearchAggregator {
pub fn succeed(&mut self, _: &dyn Any) {}
}
#[derive(Default)]
pub struct MultiSearchAggregator;
#[allow(dead_code)]
impl MultiSearchAggregator {
pub fn from_queries(_: &dyn Any, _: &dyn Any) -> Self {
Self::default()
}
pub fn succeed(&mut self) {}
}
impl MockAnalytics {
#[allow(clippy::new_ret_no_self)]
pub fn new(opt: &Opt) -> Arc<dyn Analytics> {
@ -55,7 +43,6 @@ impl Analytics for MockAnalytics {
fn publish(&self, _event_name: String, _send: Value, _request: Option<&HttpRequest>) {}
fn get_search(&self, _aggregate: super::SearchAggregator) {}
fn post_search(&self, _aggregate: super::SearchAggregator) {}
fn post_multi_search(&self, _aggregate: super::MultiSearchAggregator) {}
fn add_documents(
&self,
_documents_query: &UpdateDocumentsQuery,

View File

@ -23,8 +23,6 @@ use crate::routes::tasks::TasksFilterQuery;
pub type SegmentAnalytics = mock_analytics::MockAnalytics;
#[cfg(any(debug_assertions, not(feature = "analytics")))]
pub type SearchAggregator = mock_analytics::SearchAggregator;
#[cfg(any(debug_assertions, not(feature = "analytics")))]
pub type MultiSearchAggregator = mock_analytics::MultiSearchAggregator;
// if we are in release mode and the feature analytics was enabled
// we use the real analytics
@ -32,8 +30,6 @@ pub type MultiSearchAggregator = mock_analytics::MultiSearchAggregator;
pub type SegmentAnalytics = segment_analytics::SegmentAnalytics;
#[cfg(all(not(debug_assertions), feature = "analytics"))]
pub type SearchAggregator = segment_analytics::SearchAggregator;
#[cfg(all(not(debug_assertions), feature = "analytics"))]
pub type MultiSearchAggregator = segment_analytics::MultiSearchAggregator;
/// The Meilisearch config dir:
/// `~/.config/Meilisearch` on *NIX or *BSD.
@ -78,9 +74,6 @@ pub trait Analytics: Sync + Send {
/// This method should be called to aggregate a post search
fn post_search(&self, aggregate: SearchAggregator);
/// This method should be called to aggregate a post array of searches
fn post_multi_search(&self, aggregate: MultiSearchAggregator);
// this method should be called to aggregate a add documents request
fn add_documents(
&self,

View File

@ -9,7 +9,7 @@ use actix_web::HttpRequest;
use byte_unit::Byte;
use http::header::CONTENT_TYPE;
use index_scheduler::IndexScheduler;
use meilisearch_auth::{AuthController, AuthFilter};
use meilisearch_auth::{AuthController, SearchRules};
use meilisearch_types::InstanceUid;
use once_cell::sync::Lazy;
use regex::Regex;
@ -30,7 +30,7 @@ use crate::routes::indexes::documents::UpdateDocumentsQuery;
use crate::routes::tasks::TasksFilterQuery;
use crate::routes::{create_all_stats, Stats};
use crate::search::{
SearchQuery, SearchQueryWithIndex, SearchResult, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
SearchQuery, SearchResult, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT,
};
use crate::Opt;
@ -68,7 +68,6 @@ pub enum AnalyticsMsg {
BatchMessage(Track),
AggregateGetSearch(SearchAggregator),
AggregatePostSearch(SearchAggregator),
AggregatePostMultiSearch(MultiSearchAggregator),
AggregateAddDocuments(DocumentsAggregator),
AggregateDeleteDocuments(DocumentsDeletionAggregator),
AggregateUpdateDocuments(DocumentsAggregator),
@ -134,7 +133,6 @@ impl SegmentAnalytics {
opt: opt.clone(),
batcher,
post_search_aggregator: SearchAggregator::default(),
post_multi_search_aggregator: MultiSearchAggregator::default(),
get_search_aggregator: SearchAggregator::default(),
add_documents_aggregator: DocumentsAggregator::default(),
delete_documents_aggregator: DocumentsDeletionAggregator::default(),
@ -176,10 +174,6 @@ impl super::Analytics for SegmentAnalytics {
let _ = self.sender.try_send(AnalyticsMsg::AggregatePostSearch(aggregate));
}
fn post_multi_search(&self, aggregate: MultiSearchAggregator) {
let _ = self.sender.try_send(AnalyticsMsg::AggregatePostMultiSearch(aggregate));
}
fn add_documents(
&self,
documents_query: &UpdateDocumentsQuery,
@ -330,7 +324,6 @@ pub struct Segment {
batcher: AutoBatcher,
get_search_aggregator: SearchAggregator,
post_search_aggregator: SearchAggregator,
post_multi_search_aggregator: MultiSearchAggregator,
add_documents_aggregator: DocumentsAggregator,
delete_documents_aggregator: DocumentsDeletionAggregator,
update_documents_aggregator: DocumentsAggregator,
@ -388,7 +381,6 @@ impl Segment {
Some(AnalyticsMsg::BatchMessage(msg)) => drop(self.batcher.push(msg).await),
Some(AnalyticsMsg::AggregateGetSearch(agreg)) => self.get_search_aggregator.aggregate(agreg),
Some(AnalyticsMsg::AggregatePostSearch(agreg)) => self.post_search_aggregator.aggregate(agreg),
Some(AnalyticsMsg::AggregatePostMultiSearch(agreg)) => self.post_multi_search_aggregator.aggregate(agreg),
Some(AnalyticsMsg::AggregateAddDocuments(agreg)) => self.add_documents_aggregator.aggregate(agreg),
Some(AnalyticsMsg::AggregateDeleteDocuments(agreg)) => self.delete_documents_aggregator.aggregate(agreg),
Some(AnalyticsMsg::AggregateUpdateDocuments(agreg)) => self.update_documents_aggregator.aggregate(agreg),
@ -407,7 +399,7 @@ impl Segment {
auth_controller: AuthController,
) {
if let Ok(stats) =
create_all_stats(index_scheduler.into(), auth_controller, &AuthFilter::default())
create_all_stats(index_scheduler.into(), auth_controller, &SearchRules::default())
{
// Replace the version number with the prototype name if any.
let version = if let Some(prototype) = crate::prototype_name() {
@ -434,8 +426,6 @@ impl Segment {
.into_event(&self.user, "Documents Searched GET");
let post_search = std::mem::take(&mut self.post_search_aggregator)
.into_event(&self.user, "Documents Searched POST");
let post_multi_search = std::mem::take(&mut self.post_multi_search_aggregator)
.into_event(&self.user, "Documents Searched by Multi-Search POST");
let add_documents = std::mem::take(&mut self.add_documents_aggregator)
.into_event(&self.user, "Documents Added");
let delete_documents = std::mem::take(&mut self.delete_documents_aggregator)
@ -453,9 +443,6 @@ impl Segment {
if let Some(post_search) = post_search {
let _ = self.batcher.push(post_search).await;
}
if let Some(post_multi_search) = post_multi_search {
let _ = self.batcher.push(post_multi_search).await;
}
if let Some(add_documents) = add_documents {
let _ = self.batcher.push(add_documents).await;
}
@ -729,118 +716,6 @@ impl SearchAggregator {
}
}
#[derive(Default)]
pub struct MultiSearchAggregator {
timestamp: Option<OffsetDateTime>,
// requests
total_received: usize,
total_succeeded: usize,
// sum of the number of distinct indexes in each single request, use with total_received to compute an avg
total_distinct_index_count: usize,
// number of queries with a single index, use with total_received to compute a proportion
total_single_index: usize,
// sum of the number of search queries in the requests, use with total_received to compute an average
total_search_count: usize,
// context
user_agents: HashSet<String>,
}
impl MultiSearchAggregator {
pub fn from_queries(query: &[SearchQueryWithIndex], request: &HttpRequest) -> Self {
let timestamp = Some(OffsetDateTime::now_utc());
let user_agents = extract_user_agents(request).into_iter().collect();
let distinct_indexes: HashSet<_> =
query.iter().map(|query| query.index_uid.as_str()).collect();
Self {
timestamp,
total_received: 1,
total_succeeded: 0,
total_distinct_index_count: distinct_indexes.len(),
total_single_index: if distinct_indexes.len() == 1 { 1 } else { 0 },
total_search_count: query.len(),
user_agents,
}
}
pub fn succeed(&mut self) {
self.total_succeeded = self.total_succeeded.saturating_add(1);
}
pub fn aggregate(&mut self, other: Self) {
// write the aggregate in a way that will cause a compilation error if a field is added.
// get ownership of self, replacing it by a default value.
let this = std::mem::take(self);
let timestamp = this.timestamp.or(other.timestamp);
let total_received = this.total_received.saturating_add(other.total_received);
let total_succeeded = this.total_succeeded.saturating_add(other.total_succeeded);
let total_distinct_index_count =
this.total_distinct_index_count.saturating_add(other.total_distinct_index_count);
let total_single_index = this.total_single_index.saturating_add(other.total_single_index);
let total_search_count = this.total_search_count.saturating_add(other.total_search_count);
let mut user_agents = this.user_agents;
for user_agent in other.user_agents.into_iter() {
user_agents.insert(user_agent);
}
// need all fields or compile error
let mut aggregated = Self {
timestamp,
total_received,
total_succeeded,
total_distinct_index_count,
total_single_index,
total_search_count,
user_agents,
// do not add _ or ..Default::default() here
};
// replace the default self with the aggregated value
std::mem::swap(self, &mut aggregated);
}
pub fn into_event(self, user: &User, event_name: &str) -> Option<Track> {
if self.total_received == 0 {
None
} else {
let properties = json!({
"user-agent": self.user_agents,
"requests": {
"total_succeeded": self.total_succeeded,
"total_failed": self.total_received.saturating_sub(self.total_succeeded), // just to be sure we never panics
"total_received": self.total_received,
},
"indexes": {
"total_single_index": self.total_single_index,
"total_distinct_index_count": self.total_distinct_index_count,
"avg_distinct_index_count": (self.total_distinct_index_count as f64) / (self.total_received as f64), // not 0 else returned early
},
"searches": {
"total_search_count": self.total_search_count,
"avg_search_count": (self.total_search_count as f64) / (self.total_received as f64),
}
});
Some(Track {
timestamp: self.timestamp,
user: user.clone(),
event: event_name.to_string(),
properties,
..Default::default()
})
}
}
}
#[derive(Default)]
pub struct DocumentsAggregator {
timestamp: Option<OffsetDateTime>,

View File

@ -11,6 +11,8 @@ pub enum MeilisearchHttpError {
#[error("A Content-Type header is missing. Accepted values for the Content-Type header are: {}",
.0.iter().map(|s| format!("`{}`", s)).collect::<Vec<_>>().join(", "))]
MissingContentType(Vec<String>),
#[error("The Content-Type `{0}` does not support the use of a csv delimiter. The csv delimiter can only be used with the Content-Type `text/csv`.")]
CsvDelimiterWithWrongContentType(String),
#[error(
"The Content-Type `{0}` is invalid. Accepted values for the Content-Type header are: {}",
.1.iter().map(|s| format!("`{}`", s)).collect::<Vec<_>>().join(", ")
@ -52,6 +54,7 @@ impl ErrorCode for MeilisearchHttpError {
fn error_code(&self) -> Code {
match self {
MeilisearchHttpError::MissingContentType(_) => Code::MissingContentType,
MeilisearchHttpError::CsvDelimiterWithWrongContentType(_) => Code::InvalidContentType,
MeilisearchHttpError::MissingPayload(_) => Code::MissingPayload,
MeilisearchHttpError::InvalidContentType(_, _) => Code::InvalidContentType,
MeilisearchHttpError::DocumentNotFound(_) => Code::DocumentNotFound,

View File

@ -136,13 +136,6 @@ pub mod policies {
use crate::extractors::authentication::Policy;
enum TenantTokenOutcome {
NotATenantToken,
Invalid,
Expired,
Valid(Uuid, SearchRules),
}
fn tenant_token_validation() -> Validation {
let mut validation = Validation::default();
validation.validate_exp = false;
@ -171,42 +164,29 @@ pub mod policies {
pub struct ActionPolicy<const A: u8>;
impl<const A: u8> Policy for ActionPolicy<A> {
/// Attempts to grant authentication from a bearer token (that can be a tenant token or an API key), the requested Action,
/// and a list of requested indexes.
///
/// If the bearer token is not allowed for the specified indexes and action, returns `None`.
/// Otherwise, returns an object containing the generated permissions: the search filters to add to a search, and the list of allowed indexes
/// (that may contain more indexes than requested).
fn authenticate(
auth: AuthController,
token: &str,
index: Option<&str>,
) -> Option<AuthFilter> {
// authenticate if token is the master key.
// Without a master key, all routes are accessible except the key-related routes.
// master key can only have access to keys routes.
// if master key is None only keys routes are inaccessible.
if auth.get_master_key().map_or_else(|| !is_keys_action(A), |mk| mk == token) {
return Some(AuthFilter::default());
}
let (key_uuid, search_rules) =
match ActionPolicy::<A>::authenticate_tenant_token(&auth, token) {
TenantTokenOutcome::Valid(key_uuid, search_rules) => {
(key_uuid, Some(search_rules))
}
TenantTokenOutcome::Expired => return None,
TenantTokenOutcome::Invalid => return None,
TenantTokenOutcome::NotATenantToken => {
(auth.get_optional_uid_from_encoded_key(token.as_bytes()).ok()??, None)
}
};
// check that the indexes are allowed
let action = Action::from_repr(A)?;
let auth_filter = auth.get_key_filters(key_uuid, search_rules).ok()?;
if auth.is_key_authorized(key_uuid, action, index).unwrap_or(false)
&& index.map(|index| auth_filter.is_index_authorized(index)).unwrap_or(true)
// Tenant token
if let Some(filters) = ActionPolicy::<A>::authenticate_tenant_token(&auth, token, index)
{
return Some(auth_filter);
return Some(filters);
} else if let Some(action) = Action::from_repr(A) {
// API key
if let Ok(Some(uid)) = auth.get_optional_uid_from_encoded_key(token.as_bytes()) {
if let Ok(true) = auth.is_key_authorized(uid, action, index) {
return auth.get_key_filters(uid, None).ok();
}
}
}
None
@ -214,43 +194,50 @@ pub mod policies {
}
impl<const A: u8> ActionPolicy<A> {
fn authenticate_tenant_token(auth: &AuthController, token: &str) -> TenantTokenOutcome {
fn authenticate_tenant_token(
auth: &AuthController,
token: &str,
index: Option<&str>,
) -> Option<AuthFilter> {
// A tenant token only has access to the search route which always defines an index.
let index = index?;
// Only search action can be accessed by a tenant token.
if A != actions::SEARCH {
return TenantTokenOutcome::NotATenantToken;
return None;
}
let uid = if let Some(uid) = extract_key_id(token) {
uid
} else {
return TenantTokenOutcome::NotATenantToken;
};
let uid = extract_key_id(token)?;
// check if parent key is authorized to do the action.
if auth.is_key_authorized(uid, Action::Search, Some(index)).ok()? {
// Check if tenant token is valid.
let key = auth.generate_key(uid)?;
let data = decode::<Claims>(
token,
&DecodingKey::from_secret(key.as_bytes()),
&tenant_token_validation(),
)
.ok()?;
// Check if tenant token is valid.
let key = if let Some(key) = auth.generate_key(uid) {
key
} else {
return TenantTokenOutcome::Invalid;
};
let data = if let Ok(data) = decode::<Claims>(
token,
&DecodingKey::from_secret(key.as_bytes()),
&tenant_token_validation(),
) {
data
} else {
return TenantTokenOutcome::Invalid;
};
// Check if token is expired.
if let Some(exp) = data.claims.exp {
if OffsetDateTime::now_utc().unix_timestamp() > exp {
return TenantTokenOutcome::Expired;
// Check index access if an index restriction is provided.
if !data.claims.search_rules.is_index_authorized(index) {
return None;
}
// Check if token is expired.
if let Some(exp) = data.claims.exp {
if OffsetDateTime::now_utc().unix_timestamp() > exp {
return None;
}
}
return match auth.get_key_filters(uid, Some(data.claims.search_rules)) {
Ok(auth) if auth.search_rules.is_index_authorized(index) => Some(auth),
_ => None,
};
}
TenantTokenOutcome::Valid(uid, data.claims.search_rules)
None
}
}

View File

@ -10,10 +10,10 @@ use futures::StreamExt;
use index_scheduler::IndexScheduler;
use log::debug;
use meilisearch_types::deserr::query_params::Param;
use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError};
use meilisearch_types::deserr::DeserrQueryParamError;
use meilisearch_types::document_formats::{read_csv, read_json, read_ndjson, PayloadType};
use meilisearch_types::error::deserr_codes::*;
use meilisearch_types::error::ResponseError;
use meilisearch_types::error::{Code, ResponseError};
use meilisearch_types::heed::RoTxn;
use meilisearch_types::index_uid::IndexUid;
use meilisearch_types::milli::update::IndexDocumentsMethod;
@ -67,7 +67,7 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
cfg.service(
web::resource("")
.route(web::get().to(SeqHandler(get_all_documents)))
.route(web::post().to(SeqHandler(add_documents)))
.route(web::post().to(SeqHandler(replace_documents)))
.route(web::put().to(SeqHandler(update_documents)))
.route(web::delete().to(SeqHandler(clear_all_documents))),
)
@ -156,16 +156,31 @@ pub async fn get_all_documents(
}
#[derive(Deserialize, Debug, Deserr)]
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
#[deserr(error = DeserrQueryParamError, rename_all = camelCase, deny_unknown_fields)]
pub struct UpdateDocumentsQuery {
#[deserr(default, error = DeserrJsonError<InvalidIndexPrimaryKey>)]
#[deserr(default, error = DeserrQueryParamError<InvalidIndexPrimaryKey>)]
pub primary_key: Option<String>,
#[deserr(default, try_from(char) = from_char_csv_delimiter -> DeserrQueryParamError<InvalidIndexCsvDelimiter>, error = DeserrQueryParamError<InvalidIndexCsvDelimiter>)]
pub csv_delimiter: Option<u8>,
}
pub async fn add_documents(
fn from_char_csv_delimiter(
c: char,
) -> Result<Option<u8>, DeserrQueryParamError<InvalidIndexCsvDelimiter>> {
if c.is_ascii() {
Ok(Some(c as u8))
} else {
Err(DeserrQueryParamError::new(
format!("csv delimiter must be an ascii character. Found: `{}`", c),
Code::InvalidIndexCsvDelimiter,
))
}
}
pub async fn replace_documents(
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_ADD }>, Data<IndexScheduler>>,
index_uid: web::Path<String>,
params: AwebQueryParameter<UpdateDocumentsQuery, DeserrJsonError>,
params: AwebQueryParameter<UpdateDocumentsQuery, DeserrQueryParamError>,
body: Payload,
req: HttpRequest,
analytics: web::Data<dyn Analytics>,
@ -177,12 +192,13 @@ pub async fn add_documents(
analytics.add_documents(&params, index_scheduler.index(&index_uid).is_err(), &req);
let allow_index_creation = index_scheduler.filters().allow_index_creation();
let allow_index_creation = index_scheduler.filters().allow_index_creation;
let task = document_addition(
extract_mime_type(&req)?,
index_scheduler,
index_uid,
params.primary_key,
params.csv_delimiter,
body,
IndexDocumentsMethod::ReplaceDocuments,
allow_index_creation,
@ -195,7 +211,7 @@ pub async fn add_documents(
pub async fn update_documents(
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_ADD }>, Data<IndexScheduler>>,
index_uid: web::Path<String>,
params: AwebQueryParameter<UpdateDocumentsQuery, DeserrJsonError>,
params: AwebQueryParameter<UpdateDocumentsQuery, DeserrQueryParamError>,
body: Payload,
req: HttpRequest,
analytics: web::Data<dyn Analytics>,
@ -203,15 +219,17 @@ pub async fn update_documents(
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
debug!("called with params: {:?}", params);
let params = params.into_inner();
analytics.update_documents(&params, index_scheduler.index(&index_uid).is_err(), &req);
let allow_index_creation = index_scheduler.filters().allow_index_creation();
let allow_index_creation = index_scheduler.filters().allow_index_creation;
let task = document_addition(
extract_mime_type(&req)?,
index_scheduler,
index_uid,
params.into_inner().primary_key,
params.primary_key,
params.csv_delimiter,
body,
IndexDocumentsMethod::UpdateDocuments,
allow_index_creation,
@ -221,26 +239,43 @@ pub async fn update_documents(
Ok(HttpResponse::Accepted().json(task))
}
#[allow(clippy::too_many_arguments)]
async fn document_addition(
mime_type: Option<Mime>,
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_ADD }>, Data<IndexScheduler>>,
index_uid: IndexUid,
primary_key: Option<String>,
csv_delimiter: Option<u8>,
mut body: Payload,
method: IndexDocumentsMethod,
allow_index_creation: bool,
) -> Result<SummarizedTaskView, MeilisearchHttpError> {
let format = match mime_type.as_ref().map(|m| (m.type_().as_str(), m.subtype().as_str())) {
Some(("application", "json")) => PayloadType::Json,
Some(("application", "x-ndjson")) => PayloadType::Ndjson,
Some(("text", "csv")) => PayloadType::Csv,
Some((type_, subtype)) => {
let format = match (
mime_type.as_ref().map(|m| (m.type_().as_str(), m.subtype().as_str())),
csv_delimiter,
) {
(Some(("application", "json")), None) => PayloadType::Json,
(Some(("application", "x-ndjson")), None) => PayloadType::Ndjson,
(Some(("text", "csv")), None) => PayloadType::Csv { delimiter: b',' },
(Some(("text", "csv")), Some(delimiter)) => PayloadType::Csv { delimiter },
(Some(("application", "json")), Some(_)) => {
return Err(MeilisearchHttpError::CsvDelimiterWithWrongContentType(String::from(
"application/json",
)))
}
(Some(("application", "x-ndjson")), Some(_)) => {
return Err(MeilisearchHttpError::CsvDelimiterWithWrongContentType(String::from(
"application/x-ndjson",
)))
}
(Some((type_, subtype)), _) => {
return Err(MeilisearchHttpError::InvalidContentType(
format!("{}/{}", type_, subtype),
ACCEPTED_CONTENT_TYPE.clone(),
))
}
None => {
(None, _) => {
return Err(MeilisearchHttpError::MissingContentType(ACCEPTED_CONTENT_TYPE.clone()))
}
};
@ -285,7 +320,9 @@ async fn document_addition(
let documents_count = tokio::task::spawn_blocking(move || {
let documents_count = match format {
PayloadType::Json => read_json(&read_file, update_file.as_file_mut())?,
PayloadType::Csv => read_csv(&read_file, update_file.as_file_mut())?,
PayloadType::Csv { delimiter } => {
read_csv(&read_file, update_file.as_file_mut(), delimiter)?
}
PayloadType::Ndjson => read_ndjson(&read_file, update_file.as_file_mut())?,
};
// we NEED to persist the file here because we moved the `udpate_file` in another task.

View File

@ -89,11 +89,11 @@ pub async fn list_indexes(
index_scheduler: GuardedData<ActionPolicy<{ actions::INDEXES_GET }>, Data<IndexScheduler>>,
paginate: AwebQueryParameter<ListIndexes, DeserrQueryParamError>,
) -> Result<HttpResponse, ResponseError> {
let filters = index_scheduler.filters();
let search_rules = &index_scheduler.filters().search_rules;
let indexes: Vec<_> = index_scheduler.indexes()?;
let indexes = indexes
.into_iter()
.filter(|(name, _)| filters.is_index_authorized(name))
.filter(|(name, _)| search_rules.is_index_authorized(name))
.map(|(name, index)| IndexView::new(name, &index))
.collect::<Result<Vec<_>, _>>()?;
@ -120,8 +120,7 @@ pub async fn create_index(
) -> Result<HttpResponse, ResponseError> {
let IndexCreateRequest { primary_key, uid } = body.into_inner();
// FIXME: allow_index_creation?
let allow_index_creation = index_scheduler.filters().is_index_authorized(&uid);
let allow_index_creation = index_scheduler.filters().search_rules.is_index_authorized(&uid);
if allow_index_creation {
analytics.publish(
"Index Created".to_string(),

View File

@ -3,6 +3,7 @@ use actix_web::{web, HttpRequest, HttpResponse};
use deserr::actix_web::{AwebJson, AwebQueryParameter};
use index_scheduler::IndexScheduler;
use log::debug;
use meilisearch_auth::IndexSearchRules;
use meilisearch_types::deserr::query_params::Param;
use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError};
use meilisearch_types::error::deserr_codes::*;
@ -16,9 +17,9 @@ use crate::extractors::authentication::policies::*;
use crate::extractors::authentication::GuardedData;
use crate::extractors::sequential_extractor::SeqHandler;
use crate::search::{
add_search_rules, perform_search, MatchingStrategy, SearchQuery, DEFAULT_CROP_LENGTH,
DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG,
DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET,
perform_search, MatchingStrategy, SearchQuery, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT,
DEFAULT_SEARCH_OFFSET,
};
pub fn configure(cfg: &mut web::ServiceConfig) {
@ -100,6 +101,26 @@ impl From<SearchQueryGet> for SearchQuery {
}
}
/// Incorporate search rules in search query
fn add_search_rules(query: &mut SearchQuery, rules: IndexSearchRules) {
query.filter = match (query.filter.take(), rules.filter) {
(None, rules_filter) => rules_filter,
(filter, None) => filter,
(Some(filter), Some(rules_filter)) => {
let filter = match filter {
Value::Array(filter) => filter,
filter => vec![filter],
};
let rules_filter = match rules_filter {
Value::Array(rules_filter) => rules_filter,
rules_filter => vec![rules_filter],
};
Some(Value::Array([filter, rules_filter].concat()))
}
}
}
// TODO: TAMO: split on :asc, and :desc, instead of doing some weird things
/// Transform the sort query parameter into something that matches the post expected format.
@ -138,7 +159,9 @@ pub async fn search_with_url_query(
let mut query: SearchQuery = params.into_inner().into();
// Tenant token search_rules.
if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid) {
if let Some(search_rules) =
index_scheduler.filters().search_rules.get_index_search_rules(&index_uid)
{
add_search_rules(&mut query, search_rules);
}
@ -170,7 +193,9 @@ pub async fn search_with_post(
debug!("search called with params: {:?}", query);
// Tenant token search_rules.
if let Some(search_rules) = index_scheduler.filters().get_index_search_rules(&index_uid) {
if let Some(search_rules) =
index_scheduler.filters().search_rules.get_index_search_rules(&index_uid)
{
add_search_rules(&mut query, search_rules);
}

View File

@ -45,7 +45,7 @@ macro_rules! make_setting_route {
let new_settings = Settings { $attr: Setting::Reset.into(), ..Default::default() };
let allow_index_creation = index_scheduler.filters().allow_index_creation();
let allow_index_creation = index_scheduler.filters().allow_index_creation;
let task = KindWithContent::SettingsUpdate {
index_uid: index_uid.to_string(),
@ -86,7 +86,7 @@ macro_rules! make_setting_route {
..Default::default()
};
let allow_index_creation = index_scheduler.filters().allow_index_creation();
let allow_index_creation = index_scheduler.filters().allow_index_creation;
let task = KindWithContent::SettingsUpdate {
index_uid: index_uid.to_string(),
@ -560,7 +560,7 @@ pub async fn update_all(
Some(&req),
);
let allow_index_creation = index_scheduler.filters().allow_index_creation();
let allow_index_creation = index_scheduler.filters().allow_index_creation;
let index_uid = IndexUid::try_from(index_uid.into_inner())?.into_inner();
let task = KindWithContent::SettingsUpdate {
index_uid,
@ -596,7 +596,7 @@ pub async fn delete_all(
let new_settings = Settings::cleared().into_unchecked();
let allow_index_creation = index_scheduler.filters().allow_index_creation();
let allow_index_creation = index_scheduler.filters().allow_index_creation;
let index_uid = IndexUid::try_from(index_uid.into_inner())?.into_inner();
let task = KindWithContent::SettingsUpdate {
index_uid,

View File

@ -33,7 +33,6 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
.service(web::resource("/stats").route(web::get().to(get_stats)))
.service(web::resource("/version").route(web::get().to(get_version)))
.service(web::scope("/indexes").configure(indexes::configure))
.service(web::scope("/multi-search").configure(multi_search::configure))
.service(web::scope("/swap-indexes").configure(swap_indexes::configure));
}
@ -238,9 +237,10 @@ async fn get_stats(
analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {
analytics.publish("Stats Seen".to_string(), json!({ "per_index_uid": false }), Some(&req));
let filters = index_scheduler.filters();
let search_rules = &index_scheduler.filters().search_rules;
let stats = create_all_stats((*index_scheduler).clone(), (*auth_controller).clone(), filters)?;
let stats =
create_all_stats((*index_scheduler).clone(), (*auth_controller).clone(), search_rules)?;
debug!("returns: {:?}", stats);
Ok(HttpResponse::Ok().json(stats))
@ -249,19 +249,19 @@ async fn get_stats(
pub fn create_all_stats(
index_scheduler: Data<IndexScheduler>,
auth_controller: AuthController,
filters: &meilisearch_auth::AuthFilter,
search_rules: &meilisearch_auth::SearchRules,
) -> Result<Stats, ResponseError> {
let mut last_task: Option<OffsetDateTime> = None;
let mut indexes = BTreeMap::new();
let mut database_size = 0;
let processing_task = index_scheduler.get_tasks_from_authorized_indexes(
Query { statuses: Some(vec![Status::Processing]), limit: Some(1), ..Query::default() },
filters,
search_rules.authorized_indexes(),
)?;
// accumulate the size of each indexes
let processing_index = processing_task.first().and_then(|task| task.index_uid());
for (name, index) in index_scheduler.indexes()? {
if !filters.is_index_authorized(&name) {
if !search_rules.is_index_authorized(&name) {
continue;
}
@ -327,5 +327,3 @@ pub async fn get_health(
Ok(HttpResponse::Ok().json(serde_json::json!({ "status": "available" })))
}
mod multi_search;

View File

@ -1,87 +0,0 @@
use actix_web::web::{self, Data};
use actix_web::{HttpRequest, HttpResponse};
use deserr::actix_web::AwebJson;
use index_scheduler::IndexScheduler;
use log::debug;
use meilisearch_types::deserr::DeserrJsonError;
use meilisearch_types::error::ResponseError;
use meilisearch_types::keys::actions;
use serde::Serialize;
use crate::analytics::{Analytics, MultiSearchAggregator};
use crate::extractors::authentication::policies::ActionPolicy;
use crate::extractors::authentication::{AuthenticationError, GuardedData};
use crate::extractors::sequential_extractor::SeqHandler;
use crate::search::{
add_search_rules, perform_search, SearchQueryWithIndex, SearchResultWithIndex,
};
pub fn configure(cfg: &mut web::ServiceConfig) {
cfg.service(web::resource("").route(web::post().to(SeqHandler(multi_search_with_post))));
}
#[derive(Serialize)]
struct SearchResults {
results: Vec<SearchResultWithIndex>,
}
#[derive(Debug, deserr::Deserr)]
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
pub struct SearchQueries {
queries: Vec<SearchQueryWithIndex>,
}
pub async fn multi_search_with_post(
index_scheduler: GuardedData<ActionPolicy<{ actions::SEARCH }>, Data<IndexScheduler>>,
params: AwebJson<SearchQueries, DeserrJsonError>,
req: HttpRequest,
analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {
let queries = params.into_inner().queries;
let mut multi_aggregate = MultiSearchAggregator::from_queries(&queries, &req);
let search_results: Result<_, ResponseError> = (|| {
async {
let mut search_results = Vec::with_capacity(queries.len());
for (index_uid, mut query) in
queries.into_iter().map(SearchQueryWithIndex::into_index_query)
{
debug!("search called with params: {:?}", query);
// Check index from API key
if !index_scheduler.filters().is_index_authorized(&index_uid) {
return Err(AuthenticationError::InvalidToken.into());
}
// Apply search rules from tenant token
if let Some(search_rules) =
index_scheduler.filters().get_index_search_rules(&index_uid)
{
add_search_rules(&mut query, search_rules);
}
let index = index_scheduler.index(&index_uid)?;
let search_result =
tokio::task::spawn_blocking(move || perform_search(&index, query)).await?;
search_results.push(SearchResultWithIndex {
index_uid: index_uid.into_inner(),
result: search_result?,
});
}
Ok(search_results)
}
})()
.await;
if search_results.is_ok() {
multi_aggregate.succeed();
}
analytics.post_multi_search(multi_aggregate);
let search_results = search_results?;
debug!("returns: {:?}", search_results);
Ok(HttpResponse::Ok().json(SearchResults { results: search_results }))
}

View File

@ -42,7 +42,7 @@ pub async fn swap_indexes(
}),
Some(&req),
);
let filters = index_scheduler.filters();
let search_rules = &index_scheduler.filters().search_rules;
let mut swaps = vec![];
for SwapIndexesPayload { indexes } in params.into_iter() {
@ -53,7 +53,7 @@ pub async fn swap_indexes(
return Err(MeilisearchHttpError::SwapIndexPayloadWrongLength(indexes).into());
}
};
if !filters.is_index_authorized(lhs) || !filters.is_index_authorized(rhs) {
if !search_rules.is_index_authorized(lhs) || !search_rules.is_index_authorized(rhs) {
return Err(AuthenticationError::InvalidToken.into());
}
swaps.push(IndexSwap { indexes: (lhs.to_string(), rhs.to_string()) });

View File

@ -319,7 +319,7 @@ async fn cancel_tasks(
let tasks = index_scheduler.get_task_ids_from_authorized_indexes(
&index_scheduler.read_txn()?,
&query,
index_scheduler.filters(),
&index_scheduler.filters().search_rules.authorized_indexes(),
)?;
let task_cancelation =
KindWithContent::TaskCancelation { query: format!("?{}", req.query_string()), tasks };
@ -364,7 +364,7 @@ async fn delete_tasks(
let tasks = index_scheduler.get_task_ids_from_authorized_indexes(
&index_scheduler.read_txn()?,
&query,
index_scheduler.filters(),
&index_scheduler.filters().search_rules.authorized_indexes(),
)?;
let task_deletion =
KindWithContent::TaskDeletion { query: format!("?{}", req.query_string()), tasks };
@ -398,7 +398,10 @@ async fn get_tasks(
let query = params.into_query();
let mut tasks_results: Vec<TaskView> = index_scheduler
.get_tasks_from_authorized_indexes(query, index_scheduler.filters())?
.get_tasks_from_authorized_indexes(
query,
index_scheduler.filters().search_rules.authorized_indexes(),
)?
.into_iter()
.map(|t| TaskView::from_task(&t))
.collect();
@ -436,8 +439,12 @@ async fn get_task(
let query = index_scheduler::Query { uids: Some(vec![task_uid]), ..Query::default() };
if let Some(task) =
index_scheduler.get_tasks_from_authorized_indexes(query, index_scheduler.filters())?.first()
if let Some(task) = index_scheduler
.get_tasks_from_authorized_indexes(
query,
index_scheduler.filters().search_rules.authorized_indexes(),
)?
.first()
{
let task_view = TaskView::from_task(task);
Ok(HttpResponse::Ok().json(task_view))

View File

@ -5,10 +5,8 @@ use std::time::Instant;
use deserr::Deserr;
use either::Either;
use meilisearch_auth::IndexSearchRules;
use meilisearch_types::deserr::DeserrJsonError;
use meilisearch_types::error::deserr_codes::*;
use meilisearch_types::index_uid::IndexUid;
use meilisearch_types::settings::DEFAULT_PAGINATION_MAX_TOTAL_HITS;
use meilisearch_types::{milli, Document};
use milli::tokenizer::TokenizerBuilder;
@ -76,100 +74,6 @@ impl SearchQuery {
}
}
/// A `SearchQuery` + an index UID.
// This struct contains the fields of `SearchQuery` inline.
// This is because neither deserr nor serde support `flatten` when using `deny_unknown_fields.
// The `From<SearchQueryWithIndex>` implementation ensures both structs remain up to date.
#[derive(Debug, Clone, PartialEq, Eq, Deserr)]
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
pub struct SearchQueryWithIndex {
#[deserr(error = DeserrJsonError<InvalidIndexUid>, missing_field_error = DeserrJsonError::missing_index_uid)]
pub index_uid: IndexUid,
#[deserr(default, error = DeserrJsonError<InvalidSearchQ>)]
pub q: Option<String>,
#[deserr(default = DEFAULT_SEARCH_OFFSET(), error = DeserrJsonError<InvalidSearchOffset>)]
pub offset: usize,
#[deserr(default = DEFAULT_SEARCH_LIMIT(), error = DeserrJsonError<InvalidSearchLimit>)]
pub limit: usize,
#[deserr(default, error = DeserrJsonError<InvalidSearchPage>)]
pub page: Option<usize>,
#[deserr(default, error = DeserrJsonError<InvalidSearchHitsPerPage>)]
pub hits_per_page: Option<usize>,
#[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToRetrieve>)]
pub attributes_to_retrieve: Option<BTreeSet<String>>,
#[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToCrop>)]
pub attributes_to_crop: Option<Vec<String>>,
#[deserr(default, error = DeserrJsonError<InvalidSearchCropLength>, default = DEFAULT_CROP_LENGTH())]
pub crop_length: usize,
#[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToHighlight>)]
pub attributes_to_highlight: Option<HashSet<String>>,
#[deserr(default, error = DeserrJsonError<InvalidSearchShowMatchesPosition>, default)]
pub show_matches_position: bool,
#[deserr(default, error = DeserrJsonError<InvalidSearchFilter>)]
pub filter: Option<Value>,
#[deserr(default, error = DeserrJsonError<InvalidSearchSort>)]
pub sort: Option<Vec<String>>,
#[deserr(default, error = DeserrJsonError<InvalidSearchFacets>)]
pub facets: Option<Vec<String>>,
#[deserr(default, error = DeserrJsonError<InvalidSearchHighlightPreTag>, default = DEFAULT_HIGHLIGHT_PRE_TAG())]
pub highlight_pre_tag: String,
#[deserr(default, error = DeserrJsonError<InvalidSearchHighlightPostTag>, default = DEFAULT_HIGHLIGHT_POST_TAG())]
pub highlight_post_tag: String,
#[deserr(default, error = DeserrJsonError<InvalidSearchCropMarker>, default = DEFAULT_CROP_MARKER())]
pub crop_marker: String,
#[deserr(default, error = DeserrJsonError<InvalidSearchMatchingStrategy>, default)]
pub matching_strategy: MatchingStrategy,
}
impl SearchQueryWithIndex {
pub fn into_index_query(self) -> (IndexUid, SearchQuery) {
let SearchQueryWithIndex {
index_uid,
q,
offset,
limit,
page,
hits_per_page,
attributes_to_retrieve,
attributes_to_crop,
crop_length,
attributes_to_highlight,
show_matches_position,
filter,
sort,
facets,
highlight_pre_tag,
highlight_post_tag,
crop_marker,
matching_strategy,
} = self;
(
index_uid,
SearchQuery {
q,
offset,
limit,
page,
hits_per_page,
attributes_to_retrieve,
attributes_to_crop,
crop_length,
attributes_to_highlight,
show_matches_position,
filter,
sort,
facets,
highlight_pre_tag,
highlight_post_tag,
crop_marker,
matching_strategy,
// do not use ..Default::default() here,
// rather add any missing field from `SearchQuery` to `SearchQueryWithIndex`
},
)
}
}
#[derive(Debug, Clone, PartialEq, Eq, Deserr)]
#[deserr(rename_all = camelCase)]
pub enum MatchingStrategy {
@ -216,14 +120,6 @@ pub struct SearchResult {
pub facet_distribution: Option<BTreeMap<String, BTreeMap<String, u64>>>,
}
#[derive(Serialize, Debug, Clone, PartialEq, Eq)]
#[serde(rename_all = "camelCase")]
pub struct SearchResultWithIndex {
pub index_uid: String,
#[serde(flatten)]
pub result: SearchResult,
}
#[derive(Serialize, Debug, Clone, PartialEq, Eq)]
#[serde(untagged)]
pub enum HitsInfo {
@ -233,26 +129,6 @@ pub enum HitsInfo {
OffsetLimit { limit: usize, offset: usize, estimated_total_hits: usize },
}
/// Incorporate search rules in search query
pub fn add_search_rules(query: &mut SearchQuery, rules: IndexSearchRules) {
query.filter = match (query.filter.take(), rules.filter) {
(None, rules_filter) => rules_filter,
(filter, None) => filter,
(Some(filter), Some(rules_filter)) => {
let filter = match filter {
Value::Array(filter) => filter,
filter => vec![filter],
};
let rules_filter = match rules_filter {
Value::Array(rules_filter) => rules_filter,
rules_filter => vec![rules_filter],
};
Some(Value::Array([filter, rules_filter].concat()))
}
}
}
pub fn perform_search(
index: &Index,
query: SearchQuery,

View File

@ -11,7 +11,6 @@ use crate::common::Server;
pub static AUTHORIZATIONS: Lazy<HashMap<(&'static str, &'static str), HashSet<&'static str>>> =
Lazy::new(|| {
let mut authorizations = hashmap! {
("POST", "/multi-search") => hashset!{"search", "*"},
("POST", "/indexes/products/search") => hashset!{"search", "*"},
("GET", "/indexes/products/search") => hashset!{"search", "*"},
("POST", "/indexes/products/documents") => hashset!{"documents.add", "documents.*", "*"},

View File

@ -4,8 +4,6 @@ mod errors;
mod payload;
mod tenant_token;
mod tenant_token_multi_search;
use actix_web::http::StatusCode;
use serde_json::{json, Value};

File diff suppressed because it is too large Load Diff

View File

@ -30,7 +30,7 @@ impl Index<'_> {
.post_str(
url,
include_str!("../assets/test_set.json"),
("content-type", "application/json"),
vec![("content-type", "application/json")],
)
.await;
assert_eq!(code, 202);
@ -46,7 +46,7 @@ impl Index<'_> {
.post_str(
url,
include_str!("../assets/test_set.ndjson"),
("content-type", "application/x-ndjson"),
vec![("content-type", "application/x-ndjson")],
)
.await;
assert_eq!(code, 202);
@ -96,6 +96,21 @@ impl Index<'_> {
self.service.post_encoded(url, documents, self.encoder).await
}
pub async fn raw_add_documents(
&self,
payload: &str,
content_type: Option<&str>,
query_parameter: &str,
) -> (Value, StatusCode) {
let url = format!("/indexes/{}/documents{}", urlencode(self.uid.as_ref()), query_parameter);
if let Some(content_type) = content_type {
self.service.post_str(url, payload, vec![("Content-Type", content_type)]).await
} else {
self.service.post_str(url, payload, Vec::new()).await
}
}
pub async fn update_documents(
&self,
documents: Value,
@ -110,6 +125,21 @@ impl Index<'_> {
self.service.put_encoded(url, documents, self.encoder).await
}
pub async fn raw_update_documents(
&self,
payload: &str,
content_type: Option<&str>,
query_parameter: &str,
) -> (Value, StatusCode) {
let url = format!("/indexes/{}/documents{}", urlencode(self.uid.as_ref()), query_parameter);
if let Some(content_type) = content_type {
self.service.put_str(url, payload, vec![("Content-Type", content_type)]).await
} else {
self.service.put_str(url, payload, Vec::new()).await
}
}
pub async fn wait_task(&self, update_id: u64) -> Value {
// try several times to get status, or panic to not wait forever
let url = format!("/tasks/{}", update_id);

View File

@ -103,10 +103,6 @@ impl Server {
Index { uid: uid.as_ref().to_string(), service: &self.service, encoder }
}
pub async fn multi_search(&self, queries: Value) -> (Value, StatusCode) {
self.service.post("/multi-search", queries).await
}
pub async fn list_indexes_raw(&self, parameters: &str) -> (Value, StatusCode) {
self.service.get(format!("/indexes{parameters}")).await
}

View File

@ -34,17 +34,18 @@ impl Service {
self.request(req).await
}
/// Send a test post request from a text body, with a `content-type:application/json` header.
/// Send a test post request from a text body.
pub async fn post_str(
&self,
url: impl AsRef<str>,
body: impl AsRef<str>,
header: (&str, &str),
headers: Vec<(&str, &str)>,
) -> (Value, StatusCode) {
let req = test::TestRequest::post()
.uri(url.as_ref())
.set_payload(body.as_ref().to_string())
.insert_header(header);
let mut req =
test::TestRequest::post().uri(url.as_ref()).set_payload(body.as_ref().to_string());
for header in headers {
req = req.insert_header(header);
}
self.request(req).await
}
@ -57,6 +58,21 @@ impl Service {
self.put_encoded(url, body, Encoder::Plain).await
}
/// Send a test put request from a text body.
pub async fn put_str(
&self,
url: impl AsRef<str>,
body: impl AsRef<str>,
headers: Vec<(&str, &str)>,
) -> (Value, StatusCode) {
let mut req =
test::TestRequest::put().uri(url.as_ref()).set_payload(body.as_ref().to_string());
for header in headers {
req = req.insert_header(header);
}
self.request(req).await
}
pub async fn put_encoded(
&self,
url: impl AsRef<str>,

View File

@ -216,6 +216,133 @@ async fn add_single_document_with_every_encoding() {
}
}
#[actix_rt::test]
async fn add_csv_document() {
let server = Server::new().await;
let index = server.index("pets");
let document = "#id,name,race
0,jean,bernese mountain
1,jorts,orange cat";
let (response, code) = index.raw_update_documents(document, Some("text/csv"), "").await;
snapshot!(code, @"202 Accepted");
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
{
"taskUid": 0,
"indexUid": "pets",
"status": "enqueued",
"type": "documentAdditionOrUpdate",
"enqueuedAt": "[date]"
}
"###);
let response = index.wait_task(response["taskUid"].as_u64().unwrap()).await;
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]" }), @r###"
{
"uid": 0,
"indexUid": "pets",
"status": "succeeded",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 2,
"indexedDocuments": 2
},
"error": null,
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
let (documents, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await;
snapshot!(code, @"200 OK");
snapshot!(json_string!(documents), @r###"
{
"results": [
{
"#id": "0",
"name": "jean",
"race": "bernese mountain"
},
{
"#id": "1",
"name": "jorts",
"race": "orange cat"
}
],
"offset": 0,
"limit": 20,
"total": 2
}
"###);
}
#[actix_rt::test]
async fn add_csv_document_with_custom_delimiter() {
let server = Server::new().await;
let index = server.index("pets");
let document = "#id|name|race
0|jean|bernese mountain
1|jorts|orange cat";
let (response, code) =
index.raw_update_documents(document, Some("text/csv"), "?csvDelimiter=|").await;
snapshot!(code, @"202 Accepted");
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
{
"taskUid": 0,
"indexUid": "pets",
"status": "enqueued",
"type": "documentAdditionOrUpdate",
"enqueuedAt": "[date]"
}
"###);
let response = index.wait_task(response["taskUid"].as_u64().unwrap()).await;
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]" }), @r###"
{
"uid": 0,
"indexUid": "pets",
"status": "succeeded",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 2,
"indexedDocuments": 2
},
"error": null,
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
let (documents, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await;
snapshot!(code, @"200 OK");
snapshot!(json_string!(documents), @r###"
{
"results": [
{
"#id": "0",
"name": "jean",
"race": "bernese mountain"
},
{
"#id": "1",
"name": "jorts",
"race": "orange cat"
}
],
"offset": 0,
"limit": 20,
"total": 2
}
"###);
}
/// any other content-type is must be refused
#[actix_rt::test]
async fn error_add_documents_test_bad_content_types() {
@ -1027,6 +1154,53 @@ async fn error_document_field_limit_reached() {
@"");
}
#[actix_rt::test]
async fn add_documents_with_geo_field() {
let server = Server::new().await;
let index = server.index("doggo");
index.update_settings(json!({"sortableAttributes": ["_geo"]})).await;
let documents = json!([
{
"id": "1",
},
{
"id": "2",
"_geo": null,
},
{
"id": "3",
"_geo": { "lat": 1, "lng": 1 },
},
{
"id": "4",
"_geo": { "lat": "1", "lng": "1" },
},
]);
index.add_documents(documents, None).await;
let response = index.wait_task(1).await;
snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }),
@r###"
{
"uid": 1,
"indexUid": "doggo",
"status": "succeeded",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 4,
"indexedDocuments": 4
},
"error": null,
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
}
#[actix_rt::test]
async fn add_documents_invalid_geo_field() {
let server = Server::new().await;

View File

@ -1,5 +1,6 @@
use meili_snap::*;
use serde_json::json;
use urlencoding::encode;
use crate::common::Server;
@ -97,3 +98,323 @@ async fn delete_documents_batch() {
}
"###);
}
#[actix_rt::test]
async fn replace_documents_missing_payload() {
let server = Server::new().await;
let index = server.index("test");
let (response, code) = index.raw_add_documents("", Some("application/json"), "").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "A json payload is missing.",
"code": "missing_payload",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#missing_payload"
}
"###);
let (response, code) = index.raw_add_documents("", Some("application/x-ndjson"), "").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "A ndjson payload is missing.",
"code": "missing_payload",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#missing_payload"
}
"###);
let (response, code) = index.raw_add_documents("", Some("text/csv"), "").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "A csv payload is missing.",
"code": "missing_payload",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#missing_payload"
}
"###);
}
#[actix_rt::test]
async fn update_documents_missing_payload() {
let server = Server::new().await;
let index = server.index("test");
let (response, code) = index.raw_update_documents("", Some("application/json"), "").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "A json payload is missing.",
"code": "missing_payload",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#missing_payload"
}
"###);
let (response, code) = index.raw_update_documents("", Some("application/x-ndjson"), "").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "A ndjson payload is missing.",
"code": "missing_payload",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#missing_payload"
}
"###);
let (response, code) = index.raw_update_documents("", Some("text/csv"), "").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "A csv payload is missing.",
"code": "missing_payload",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#missing_payload"
}
"###);
}
#[actix_rt::test]
async fn replace_documents_missing_content_type() {
let server = Server::new().await;
let index = server.index("test");
let (response, code) = index.raw_add_documents("", None, "").await;
snapshot!(code, @"415 Unsupported Media Type");
snapshot!(json_string!(response), @r###"
{
"message": "A Content-Type header is missing. Accepted values for the Content-Type header are: `application/json`, `application/x-ndjson`, `text/csv`",
"code": "missing_content_type",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#missing_content_type"
}
"###);
// even with a csv delimiter specified this error is triggered first
let (response, code) = index.raw_add_documents("", None, "?csvDelimiter=;").await;
snapshot!(code, @"415 Unsupported Media Type");
snapshot!(json_string!(response), @r###"
{
"message": "A Content-Type header is missing. Accepted values for the Content-Type header are: `application/json`, `application/x-ndjson`, `text/csv`",
"code": "missing_content_type",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#missing_content_type"
}
"###);
}
#[actix_rt::test]
async fn update_documents_missing_content_type() {
let server = Server::new().await;
let index = server.index("test");
let (response, code) = index.raw_update_documents("", None, "").await;
snapshot!(code, @"415 Unsupported Media Type");
snapshot!(json_string!(response), @r###"
{
"message": "A Content-Type header is missing. Accepted values for the Content-Type header are: `application/json`, `application/x-ndjson`, `text/csv`",
"code": "missing_content_type",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#missing_content_type"
}
"###);
// even with a csv delimiter specified this error is triggered first
let (response, code) = index.raw_update_documents("", None, "?csvDelimiter=;").await;
snapshot!(code, @"415 Unsupported Media Type");
snapshot!(json_string!(response), @r###"
{
"message": "A Content-Type header is missing. Accepted values for the Content-Type header are: `application/json`, `application/x-ndjson`, `text/csv`",
"code": "missing_content_type",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#missing_content_type"
}
"###);
}
#[actix_rt::test]
async fn replace_documents_bad_content_type() {
let server = Server::new().await;
let index = server.index("test");
let (response, code) = index.raw_add_documents("", Some("doggo"), "").await;
snapshot!(code, @"415 Unsupported Media Type");
snapshot!(json_string!(response), @r###"
{
"message": "The Content-Type `doggo` is invalid. Accepted values for the Content-Type header are: `application/json`, `application/x-ndjson`, `text/csv`",
"code": "invalid_content_type",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_content_type"
}
"###);
}
#[actix_rt::test]
async fn update_documents_bad_content_type() {
let server = Server::new().await;
let index = server.index("test");
let (response, code) = index.raw_update_documents("", Some("doggo"), "").await;
snapshot!(code, @"415 Unsupported Media Type");
snapshot!(json_string!(response), @r###"
{
"message": "The Content-Type `doggo` is invalid. Accepted values for the Content-Type header are: `application/json`, `application/x-ndjson`, `text/csv`",
"code": "invalid_content_type",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_content_type"
}
"###);
}
#[actix_rt::test]
async fn replace_documents_bad_csv_delimiter() {
let server = Server::new().await;
let index = server.index("test");
let (response, code) =
index.raw_add_documents("", Some("application/json"), "?csvDelimiter").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Invalid value in parameter `csvDelimiter`: expected a string of one character, but found an empty string",
"code": "invalid_index_csv_delimiter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_index_csv_delimiter"
}
"###);
let (response, code) =
index.raw_add_documents("", Some("application/json"), "?csvDelimiter=doggo").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Invalid value in parameter `csvDelimiter`: expected a string of one character, but found the following string of 5 characters: `doggo`",
"code": "invalid_index_csv_delimiter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_index_csv_delimiter"
}
"###);
let (response, code) = index
.raw_add_documents("", Some("application/json"), &format!("?csvDelimiter={}", encode("🍰")))
.await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "csv delimiter must be an ascii character. Found: `🍰`",
"code": "invalid_index_csv_delimiter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_index_csv_delimiter"
}
"###);
}
#[actix_rt::test]
async fn update_documents_bad_csv_delimiter() {
let server = Server::new().await;
let index = server.index("test");
let (response, code) =
index.raw_update_documents("", Some("application/json"), "?csvDelimiter").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Invalid value in parameter `csvDelimiter`: expected a string of one character, but found an empty string",
"code": "invalid_index_csv_delimiter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_index_csv_delimiter"
}
"###);
let (response, code) =
index.raw_update_documents("", Some("application/json"), "?csvDelimiter=doggo").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Invalid value in parameter `csvDelimiter`: expected a string of one character, but found the following string of 5 characters: `doggo`",
"code": "invalid_index_csv_delimiter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_index_csv_delimiter"
}
"###);
let (response, code) = index
.raw_update_documents(
"",
Some("application/json"),
&format!("?csvDelimiter={}", encode("🍰")),
)
.await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "csv delimiter must be an ascii character. Found: `🍰`",
"code": "invalid_index_csv_delimiter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_index_csv_delimiter"
}
"###);
}
#[actix_rt::test]
async fn replace_documents_csv_delimiter_with_bad_content_type() {
let server = Server::new().await;
let index = server.index("test");
let (response, code) =
index.raw_add_documents("", Some("application/json"), "?csvDelimiter=a").await;
snapshot!(code, @"415 Unsupported Media Type");
snapshot!(json_string!(response), @r###"
{
"message": "The Content-Type `application/json` does not support the use of a csv delimiter. The csv delimiter can only be used with the Content-Type `text/csv`.",
"code": "invalid_content_type",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_content_type"
}
"###);
let (response, code) =
index.raw_add_documents("", Some("application/x-ndjson"), "?csvDelimiter=a").await;
snapshot!(code, @"415 Unsupported Media Type");
snapshot!(json_string!(response), @r###"
{
"message": "The Content-Type `application/x-ndjson` does not support the use of a csv delimiter. The csv delimiter can only be used with the Content-Type `text/csv`.",
"code": "invalid_content_type",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_content_type"
}
"###);
}
#[actix_rt::test]
async fn update_documents_csv_delimiter_with_bad_content_type() {
let server = Server::new().await;
let index = server.index("test");
let (response, code) =
index.raw_update_documents("", Some("application/json"), "?csvDelimiter=a").await;
snapshot!(code, @"415 Unsupported Media Type");
snapshot!(json_string!(response), @r###"
{
"message": "The Content-Type `application/json` does not support the use of a csv delimiter. The csv delimiter can only be used with the Content-Type `text/csv`.",
"code": "invalid_content_type",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_content_type"
}
"###);
let (response, code) =
index.raw_update_documents("", Some("application/x-ndjson"), "?csvDelimiter=a").await;
snapshot!(code, @"415 Unsupported Media Type");
snapshot!(json_string!(response), @r###"
{
"message": "The Content-Type `application/x-ndjson` does not support the use of a csv delimiter. The csv delimiter can only be used with the Content-Type `text/csv`.",
"code": "invalid_content_type",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_content_type"
}
"###);
}

View File

@ -3,7 +3,6 @@
mod errors;
mod formatted;
mod multi;
mod pagination;
use once_cell::sync::Lazy;

View File

@ -1,343 +0,0 @@
use meili_snap::{json_string, snapshot};
use serde_json::json;
use super::{DOCUMENTS, NESTED_DOCUMENTS};
use crate::common::Server;
#[actix_rt::test]
async fn search_empty_list() {
let server = Server::new().await;
let (response, code) = server.multi_search(json!({"queries": []})).await;
snapshot!(code, @"200 OK");
snapshot!(json_string!(response), @r###"
{
"results": []
}
"###);
}
#[actix_rt::test]
async fn search_json_object() {
let server = Server::new().await;
let (response, code) = server.multi_search(json!({})).await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Missing field `queries`",
"code": "bad_request",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request"
}
"###);
}
#[actix_rt::test]
async fn search_json_array() {
let server = Server::new().await;
let (response, code) = server.multi_search(json!([])).await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Invalid value type: expected an object, but found an array: `[]`",
"code": "bad_request",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request"
}
"###);
}
#[actix_rt::test]
async fn simple_search_single_index() {
let server = Server::new().await;
let index = server.index("test");
let documents = DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(0).await;
let (response, code) = server
.multi_search(json!({"queries": [
{"indexUid" : "test", "q": "glass"},
{"indexUid": "test", "q": "captain"},
]}))
.await;
snapshot!(code, @"200 OK");
insta::assert_json_snapshot!(response["results"], { "[].processingTimeMs" => "[time]" }, @r###"
[
{
"indexUid": "test",
"hits": [
{
"title": "Glass",
"id": "450465"
}
],
"query": "glass",
"processingTimeMs": "[time]",
"limit": 20,
"offset": 0,
"estimatedTotalHits": 1
},
{
"indexUid": "test",
"hits": [
{
"title": "Captain Marvel",
"id": "299537"
}
],
"query": "captain",
"processingTimeMs": "[time]",
"limit": 20,
"offset": 0,
"estimatedTotalHits": 1
}
]
"###);
}
#[actix_rt::test]
async fn simple_search_missing_index_uid() {
let server = Server::new().await;
let index = server.index("test");
let documents = DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(0).await;
let (response, code) = server
.multi_search(json!({"queries": [
{"q": "glass"},
]}))
.await;
snapshot!(code, @"400 Bad Request");
insta::assert_json_snapshot!(response, @r###"
{
"message": "Missing field `indexUid` inside `.queries[0]`",
"code": "missing_index_uid",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#missing_index_uid"
}
"###);
}
#[actix_rt::test]
async fn simple_search_illegal_index_uid() {
let server = Server::new().await;
let index = server.index("test");
let documents = DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(0).await;
let (response, code) = server
.multi_search(json!({"queries": [
{"indexUid": "hé", "q": "glass"},
]}))
.await;
snapshot!(code, @"400 Bad Request");
insta::assert_json_snapshot!(response, @r###"
{
"message": "Invalid value at `.queries[0].indexUid`: `hé` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_).",
"code": "invalid_index_uid",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_index_uid"
}
"###);
}
#[actix_rt::test]
async fn simple_search_two_indexes() {
let server = Server::new().await;
let index = server.index("test");
let documents = DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(0).await;
let index = server.index("nested");
let documents = NESTED_DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(1).await;
let (response, code) = server
.multi_search(json!({"queries": [
{"indexUid" : "test", "q": "glass"},
{"indexUid": "nested", "q": "pesti"},
]}))
.await;
snapshot!(code, @"200 OK");
insta::assert_json_snapshot!(response["results"], { "[].processingTimeMs" => "[time]" }, @r###"
[
{
"indexUid": "test",
"hits": [
{
"title": "Glass",
"id": "450465"
}
],
"query": "glass",
"processingTimeMs": "[time]",
"limit": 20,
"offset": 0,
"estimatedTotalHits": 1
},
{
"indexUid": "nested",
"hits": [
{
"id": 852,
"father": "jean",
"mother": "michelle",
"doggos": [
{
"name": "bobby",
"age": 2
},
{
"name": "buddy",
"age": 4
}
],
"cattos": "pesti"
},
{
"id": 654,
"father": "pierre",
"mother": "sabine",
"doggos": [
{
"name": "gros bill",
"age": 8
}
],
"cattos": [
"simba",
"pestiféré"
]
}
],
"query": "pesti",
"processingTimeMs": "[time]",
"limit": 20,
"offset": 0,
"estimatedTotalHits": 2
}
]
"###);
}
#[actix_rt::test]
async fn search_one_index_doesnt_exist() {
let server = Server::new().await;
let index = server.index("test");
let documents = DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(0).await;
let (response, code) = server
.multi_search(json!({"queries": [
{"indexUid" : "test", "q": "glass"},
{"indexUid": "nested", "q": "pesti"},
]}))
.await;
snapshot!(code, @"404 Not Found");
snapshot!(json_string!(response), @r###"
{
"message": "Index `nested` not found.",
"code": "index_not_found",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#index_not_found"
}
"###);
}
#[actix_rt::test]
async fn search_multiple_indexes_dont_exist() {
let server = Server::new().await;
let (response, code) = server
.multi_search(json!({"queries": [
{"indexUid" : "test", "q": "glass"},
{"indexUid": "nested", "q": "pesti"},
]}))
.await;
snapshot!(code, @"404 Not Found");
snapshot!(json_string!(response), @r###"
{
"message": "Index `test` not found.",
"code": "index_not_found",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#index_not_found"
}
"###);
}
#[actix_rt::test]
async fn search_one_query_error() {
let server = Server::new().await;
let index = server.index("test");
let documents = DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(0).await;
let index = server.index("nested");
let documents = NESTED_DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(1).await;
let (response, code) = server
.multi_search(json!({"queries": [
{"indexUid" : "test", "q": "glass", "facets": ["title"]},
{"indexUid": "nested", "q": "pesti"},
]}))
.await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Invalid facet distribution, this index does not have configured filterable attributes.",
"code": "invalid_search_facets",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_facets"
}
"###);
}
#[actix_rt::test]
async fn search_multiple_query_errors() {
let server = Server::new().await;
let index = server.index("test");
let documents = DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(0).await;
let index = server.index("nested");
let documents = NESTED_DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(1).await;
let (response, code) = server
.multi_search(json!({"queries": [
{"indexUid" : "test", "q": "glass", "facets": ["title"]},
{"indexUid": "nested", "q": "pesti", "facets": ["doggos"]},
]}))
.await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Invalid facet distribution, this index does not have configured filterable attributes.",
"code": "invalid_search_facets",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_facets"
}
"###);
}

View File

@ -19,7 +19,7 @@ byteorder = "1.4.3"
charabia = { version = "0.7.0", default-features = false }
concat-arrays = "0.1.2"
crossbeam-channel = "0.5.6"
deserr = "0.4.1"
deserr = "0.5.0"
either = "1.8.0"
flatten-serde-json = { path = "../flatten-serde-json" }
fst = "0.4.7"

View File

@ -6,7 +6,7 @@ use roaring::RoaringBitmap;
use crate::facet::FacetType;
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupValue};
use crate::{make_db_snap_from_iter, ExternalDocumentsIds, Index};
use crate::{make_db_snap_from_iter, obkv_to_json, ExternalDocumentsIds, Index};
#[track_caller]
pub fn default_db_snapshot_settings_for_test(name: Option<&str>) -> (insta::Settings, String) {
@ -427,8 +427,26 @@ pub fn snap_settings(index: &Index) -> String {
snap
}
pub fn snap_documents(index: &Index) -> String {
let mut snap = String::new();
let rtxn = index.read_txn().unwrap();
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
let display = fields_ids_map.ids().collect::<Vec<_>>();
for document in index.all_documents(&rtxn).unwrap() {
let doc = obkv_to_json(&display, &fields_ids_map, document.unwrap().1).unwrap();
snap.push_str(&serde_json::to_string(&doc).unwrap());
snap.push('\n');
}
snap
}
#[macro_export]
macro_rules! full_snap_of_db {
($index:ident, documents) => {{
$crate::snapshot_tests::snap_documents(&$index)
}};
($index:ident, settings) => {{
$crate::snapshot_tests::snap_settings(&$index)
}};

View File

@ -395,6 +395,7 @@ pub fn validate_geo_from_json(id: &DocumentId, bytes: &[u8]) -> Result<StdResult
(Some(_), None) => Ok(Err(MissingLongitude { document_id: debug_id() })),
(None, None) => Ok(Err(MissingLatitudeAndLongitude { document_id: debug_id() })),
},
Value::Null => Ok(Ok(())),
value => Ok(Err(NotAnObject { document_id: debug_id(), value })),
}
}

View File

@ -59,6 +59,7 @@ pub fn extract_geo_points<R: io::Read + io::Seek>(
} else if lat.is_some() && lng.is_none() {
return Err(GeoError::MissingLongitude { document_id: document_id() })?;
}
// else => the _geo object was `null`, there is nothing to do
}
writer_into_reader(writer)

View File

@ -6,6 +6,7 @@ use roaring::RoaringBitmap;
use super::read_u32_ne_bytes;
use crate::heed_codec::CboRoaringBitmapCodec;
use crate::update::index_documents::transform::Operation;
use crate::Result;
pub type MergeFn = for<'a> fn(&[u8], &[Cow<'a, [u8]>]) -> Result<Cow<'a, [u8]>>;
@ -57,21 +58,6 @@ pub fn keep_latest_obkv<'a>(_key: &[u8], obkvs: &[Cow<'a, [u8]>]) -> Result<Cow<
Ok(obkvs.last().unwrap().clone())
}
/// Merge all the obks in the order we see them.
pub fn merge_obkvs<'a>(_key: &[u8], obkvs: &[Cow<'a, [u8]>]) -> Result<Cow<'a, [u8]>> {
Ok(obkvs
.iter()
.cloned()
.reduce(|acc, current| {
let first = obkv::KvReader::new(&acc);
let second = obkv::KvReader::new(&current);
let mut buffer = Vec::new();
merge_two_obkvs(first, second, &mut buffer);
Cow::from(buffer)
})
.unwrap())
}
pub fn merge_two_obkvs(base: obkv::KvReaderU16, update: obkv::KvReaderU16, buffer: &mut Vec<u8>) {
use itertools::merge_join_by;
use itertools::EitherOrBoth::{Both, Left, Right};
@ -88,6 +74,41 @@ pub fn merge_two_obkvs(base: obkv::KvReaderU16, update: obkv::KvReaderU16, buffe
writer.finish().unwrap();
}
/// Merge all the obks in the order we see them.
pub fn merge_obkvs_and_operations<'a>(
_key: &[u8],
obkvs: &[Cow<'a, [u8]>],
) -> Result<Cow<'a, [u8]>> {
// [add, add, delete, add, add]
// we can ignore everything that happened before the last delete.
let starting_position =
obkvs.iter().rposition(|obkv| obkv[0] == Operation::Deletion as u8).unwrap_or(0);
// [add, add, delete]
// if the last operation was a deletion then we simply return the deletion
if starting_position == obkvs.len() - 1 && obkvs.last().unwrap()[0] == Operation::Deletion as u8
{
return Ok(obkvs[obkvs.len() - 1].clone());
}
let mut buffer = Vec::new();
// (add, add, delete) [add, add]
// in the other case, no deletion will be encountered during the merge
let mut ret =
obkvs[starting_position..].iter().cloned().fold(Vec::new(), |mut acc, current| {
let first = obkv::KvReader::new(&acc);
let second = obkv::KvReader::new(&current[1..]);
merge_two_obkvs(first, second, &mut buffer);
// we want the result of the merge into our accumulator
std::mem::swap(&mut acc, &mut buffer);
acc
});
ret.insert(0, Operation::Addition as u8);
Ok(Cow::from(ret))
}
pub fn merge_cbo_roaring_bitmaps<'a>(
_key: &[u8],
values: &[Cow<'a, [u8]>],

View File

@ -13,9 +13,9 @@ pub use grenad_helpers::{
GrenadParameters, MergeableReader,
};
pub use merge_functions::{
concat_u32s_array, keep_first, keep_latest_obkv, merge_cbo_roaring_bitmaps, merge_obkvs,
merge_roaring_bitmaps, merge_two_obkvs, roaring_bitmap_from_u32s_array,
serialize_roaring_bitmap, MergeFn,
concat_u32s_array, keep_first, keep_latest_obkv, merge_cbo_roaring_bitmaps,
merge_obkvs_and_operations, merge_roaring_bitmaps, merge_two_obkvs,
roaring_bitmap_from_u32s_array, serialize_roaring_bitmap, MergeFn,
};
use crate::MAX_WORD_LENGTH;

View File

@ -79,6 +79,7 @@ pub struct IndexDocuments<'t, 'u, 'i, 'a, FP, FA> {
progress: FP,
should_abort: FA,
added_documents: u64,
deleted_documents: u64,
}
#[derive(Default, Debug, Clone)]
@ -122,6 +123,7 @@ where
wtxn,
index,
added_documents: 0,
deleted_documents: 0,
})
}
@ -166,6 +168,30 @@ where
Ok((self, Ok(indexed_documents)))
}
/// Remove a batch of documents from the current builder.
///
/// Returns the number of documents deleted from the builder.
pub fn remove_documents(
mut self,
to_delete: Vec<String>,
) -> Result<(Self, StdResult<u64, UserError>)> {
// Early return when there is no document to add
if to_delete.is_empty() {
return Ok((self, Ok(0)));
}
let deleted_documents = self
.transform
.as_mut()
.expect("Invalid document deletion state")
.remove_documents(to_delete, self.wtxn, &self.should_abort)?
as u64;
self.deleted_documents += deleted_documents;
Ok((self, Ok(deleted_documents)))
}
#[logging_timer::time("IndexDocuments::{}")]
pub fn execute(mut self) -> Result<DocumentAdditionResult> {
if self.added_documents == 0 {
@ -1879,4 +1905,328 @@ mod tests {
index.add_documents(doc1).unwrap();
}
#[test]
fn add_and_delete_documents_in_single_transform() {
let mut index = TempIndex::new();
index.index_documents_config.update_method = IndexDocumentsMethod::UpdateDocuments;
let mut wtxn = index.write_txn().unwrap();
let builder = IndexDocuments::new(
&mut wtxn,
&index,
&index.indexer_config,
index.index_documents_config.clone(),
|_| (),
|| false,
)
.unwrap();
let documents = documents!([
{ "id": 1, "doggo": "kevin" },
{ "id": 2, "doggo": { "name": "bob", "age": 20 } },
{ "id": 3, "name": "jean", "age": 25 },
]);
let (builder, added) = builder.add_documents(documents).unwrap();
insta::assert_display_snapshot!(added.unwrap(), @"3");
let (builder, removed) = builder.remove_documents(vec![S("2")]).unwrap();
insta::assert_display_snapshot!(removed.unwrap(), @"1");
let addition = builder.execute().unwrap();
insta::assert_debug_snapshot!(addition, @r###"
DocumentAdditionResult {
indexed_documents: 3,
number_of_documents: 2,
}
"###);
wtxn.commit().unwrap();
db_snap!(index, documents, @r###"
{"id":1,"doggo":"kevin"}
{"id":3,"name":"jean","age":25}
"###);
}
#[test]
fn add_update_and_delete_documents_in_single_transform() {
let mut index = TempIndex::new();
index.index_documents_config.update_method = IndexDocumentsMethod::UpdateDocuments;
let mut wtxn = index.write_txn().unwrap();
let builder = IndexDocuments::new(
&mut wtxn,
&index,
&index.indexer_config,
index.index_documents_config.clone(),
|_| (),
|| false,
)
.unwrap();
let documents = documents!([
{ "id": 1, "doggo": "kevin" },
{ "id": 2, "doggo": { "name": "bob", "age": 20 } },
{ "id": 3, "name": "jean", "age": 25 },
]);
let (builder, added) = builder.add_documents(documents).unwrap();
insta::assert_display_snapshot!(added.unwrap(), @"3");
let documents = documents!([
{ "id": 2, "catto": "jorts" },
{ "id": 3, "legs": 4 },
]);
let (builder, added) = builder.add_documents(documents).unwrap();
insta::assert_display_snapshot!(added.unwrap(), @"2");
let (builder, removed) = builder.remove_documents(vec![S("1"), S("2")]).unwrap();
insta::assert_display_snapshot!(removed.unwrap(), @"2");
let addition = builder.execute().unwrap();
insta::assert_debug_snapshot!(addition, @r###"
DocumentAdditionResult {
indexed_documents: 5,
number_of_documents: 1,
}
"###);
wtxn.commit().unwrap();
db_snap!(index, documents, @r###"
{"id":3,"name":"jean","age":25,"legs":4}
"###);
}
#[test]
fn add_document_and_in_another_transform_update_and_delete_documents() {
let mut index = TempIndex::new();
index.index_documents_config.update_method = IndexDocumentsMethod::UpdateDocuments;
let mut wtxn = index.write_txn().unwrap();
let builder = IndexDocuments::new(
&mut wtxn,
&index,
&index.indexer_config,
index.index_documents_config.clone(),
|_| (),
|| false,
)
.unwrap();
let documents = documents!([
{ "id": 1, "doggo": "kevin" },
{ "id": 2, "doggo": { "name": "bob", "age": 20 } },
{ "id": 3, "name": "jean", "age": 25 },
]);
let (builder, added) = builder.add_documents(documents).unwrap();
insta::assert_display_snapshot!(added.unwrap(), @"3");
let addition = builder.execute().unwrap();
insta::assert_debug_snapshot!(addition, @r###"
DocumentAdditionResult {
indexed_documents: 3,
number_of_documents: 3,
}
"###);
wtxn.commit().unwrap();
db_snap!(index, documents, @r###"
{"id":1,"doggo":"kevin"}
{"id":2,"doggo":{"name":"bob","age":20}}
{"id":3,"name":"jean","age":25}
"###);
// A first batch of documents has been inserted
let mut wtxn = index.write_txn().unwrap();
let builder = IndexDocuments::new(
&mut wtxn,
&index,
&index.indexer_config,
index.index_documents_config.clone(),
|_| (),
|| false,
)
.unwrap();
let documents = documents!([
{ "id": 2, "catto": "jorts" },
{ "id": 3, "legs": 4 },
]);
let (builder, added) = builder.add_documents(documents).unwrap();
insta::assert_display_snapshot!(added.unwrap(), @"2");
let (builder, removed) = builder.remove_documents(vec![S("1"), S("2")]).unwrap();
insta::assert_display_snapshot!(removed.unwrap(), @"2");
let addition = builder.execute().unwrap();
insta::assert_debug_snapshot!(addition, @r###"
DocumentAdditionResult {
indexed_documents: 2,
number_of_documents: 1,
}
"###);
wtxn.commit().unwrap();
db_snap!(index, documents, @r###"
{"id":3,"name":"jean","age":25,"legs":4}
"###);
}
#[test]
fn delete_document_and_then_add_documents_in_the_same_transform() {
let mut index = TempIndex::new();
index.index_documents_config.update_method = IndexDocumentsMethod::UpdateDocuments;
let mut wtxn = index.write_txn().unwrap();
let builder = IndexDocuments::new(
&mut wtxn,
&index,
&index.indexer_config,
index.index_documents_config.clone(),
|_| (),
|| false,
)
.unwrap();
let (builder, removed) = builder.remove_documents(vec![S("1"), S("2")]).unwrap();
insta::assert_display_snapshot!(removed.unwrap(), @"0");
let documents = documents!([
{ "id": 2, "doggo": { "name": "jean", "age": 20 } },
{ "id": 3, "name": "bob", "age": 25 },
]);
let (builder, added) = builder.add_documents(documents).unwrap();
insta::assert_display_snapshot!(added.unwrap(), @"2");
let addition = builder.execute().unwrap();
insta::assert_debug_snapshot!(addition, @r###"
DocumentAdditionResult {
indexed_documents: 2,
number_of_documents: 2,
}
"###);
wtxn.commit().unwrap();
db_snap!(index, documents, @r###"
{"id":2,"doggo":{"name":"jean","age":20}}
{"id":3,"name":"bob","age":25}
"###);
}
#[test]
fn delete_the_same_document_multiple_time() {
let mut index = TempIndex::new();
index.index_documents_config.update_method = IndexDocumentsMethod::UpdateDocuments;
let mut wtxn = index.write_txn().unwrap();
let builder = IndexDocuments::new(
&mut wtxn,
&index,
&index.indexer_config,
index.index_documents_config.clone(),
|_| (),
|| false,
)
.unwrap();
let (builder, removed) =
builder.remove_documents(vec![S("1"), S("2"), S("1"), S("2")]).unwrap();
insta::assert_display_snapshot!(removed.unwrap(), @"0");
let documents = documents!([
{ "id": 1, "doggo": "kevin" },
{ "id": 2, "doggo": { "name": "jean", "age": 20 } },
{ "id": 3, "name": "bob", "age": 25 },
]);
let (builder, added) = builder.add_documents(documents).unwrap();
insta::assert_display_snapshot!(added.unwrap(), @"3");
let (builder, removed) =
builder.remove_documents(vec![S("1"), S("2"), S("1"), S("2")]).unwrap();
insta::assert_display_snapshot!(removed.unwrap(), @"2");
let addition = builder.execute().unwrap();
insta::assert_debug_snapshot!(addition, @r###"
DocumentAdditionResult {
indexed_documents: 3,
number_of_documents: 1,
}
"###);
wtxn.commit().unwrap();
db_snap!(index, documents, @r###"
{"id":3,"name":"bob","age":25}
"###);
}
#[test]
fn add_document_and_in_another_transform_delete_the_document_then_add_it_again() {
let mut index = TempIndex::new();
index.index_documents_config.update_method = IndexDocumentsMethod::UpdateDocuments;
let mut wtxn = index.write_txn().unwrap();
let builder = IndexDocuments::new(
&mut wtxn,
&index,
&index.indexer_config,
index.index_documents_config.clone(),
|_| (),
|| false,
)
.unwrap();
let documents = documents!([
{ "id": 1, "doggo": "kevin" },
]);
let (builder, added) = builder.add_documents(documents).unwrap();
insta::assert_display_snapshot!(added.unwrap(), @"1");
let addition = builder.execute().unwrap();
insta::assert_debug_snapshot!(addition, @r###"
DocumentAdditionResult {
indexed_documents: 1,
number_of_documents: 1,
}
"###);
wtxn.commit().unwrap();
db_snap!(index, documents, @r###"
{"id":1,"doggo":"kevin"}
"###);
// A first batch of documents has been inserted
let mut wtxn = index.write_txn().unwrap();
let builder = IndexDocuments::new(
&mut wtxn,
&index,
&index.indexer_config,
index.index_documents_config.clone(),
|_| (),
|| false,
)
.unwrap();
let (builder, removed) = builder.remove_documents(vec![S("1")]).unwrap();
insta::assert_display_snapshot!(removed.unwrap(), @"1");
let documents = documents!([
{ "id": 1, "catto": "jorts" },
]);
let (builder, added) = builder.add_documents(documents).unwrap();
insta::assert_display_snapshot!(added.unwrap(), @"1");
let addition = builder.execute().unwrap();
insta::assert_debug_snapshot!(addition, @r###"
DocumentAdditionResult {
indexed_documents: 1,
number_of_documents: 1,
}
"###);
wtxn.commit().unwrap();
db_snap!(index, documents, @r###"
{"id":1,"catto":"jorts"}
"###);
}
}

View File

@ -12,7 +12,9 @@ use roaring::RoaringBitmap;
use serde_json::Value;
use smartstring::SmartString;
use super::helpers::{create_sorter, create_writer, keep_latest_obkv, merge_obkvs, MergeFn};
use super::helpers::{
create_sorter, create_writer, keep_latest_obkv, merge_obkvs_and_operations, MergeFn,
};
use super::{IndexDocumentsMethod, IndexerConfig};
use crate::documents::{DocumentsBatchIndex, EnrichedDocument, EnrichedDocumentsBatchReader};
use crate::error::{Error, InternalError, UserError};
@ -50,8 +52,12 @@ pub struct Transform<'a, 'i> {
pub index_documents_method: IndexDocumentsMethod,
available_documents_ids: AvailableDocumentsIds,
// Both grenad follows the same format:
// key | value
// u32 | 1 byte for the Operation byte, the rest is the obkv of the document stored
original_sorter: grenad::Sorter<MergeFn>,
flattened_sorter: grenad::Sorter<MergeFn>,
replaced_documents_ids: RoaringBitmap,
new_documents_ids: RoaringBitmap,
// To increase the cache locality and decrease the heap usage we use compact smartstring.
@ -59,6 +65,14 @@ pub struct Transform<'a, 'i> {
documents_count: usize,
}
/// This enum is specific to the grenad sorter stored in the transform.
/// It's used as the first byte of the grenads and tells you if the document id was an addition or a deletion.
#[repr(u8)]
pub enum Operation {
Addition,
Deletion,
}
/// Create a mapping between the field ids found in the document batch and the one that were
/// already present in the index.
///
@ -94,7 +108,7 @@ impl<'a, 'i> Transform<'a, 'i> {
// with the same user id must be merged or fully replaced in the same batch.
let merge_function = match index_documents_method {
IndexDocumentsMethod::ReplaceDocuments => keep_latest_obkv,
IndexDocumentsMethod::UpdateDocuments => merge_obkvs,
IndexDocumentsMethod::UpdateDocuments => merge_obkvs_and_operations,
};
// We initialize the sorter with the user indexing settings.
@ -151,9 +165,7 @@ impl<'a, 'i> Transform<'a, 'i> {
FA: Fn() -> bool + Sync,
{
let (mut cursor, fields_index) = reader.into_cursor_and_fields_index();
let external_documents_ids = self.index.external_documents_ids(wtxn)?;
let mapping = create_fields_mapping(&mut self.fields_ids_map, &fields_index)?;
let primary_key = cursor.primary_key().to_string();
@ -161,6 +173,7 @@ impl<'a, 'i> Transform<'a, 'i> {
self.fields_ids_map.insert(&primary_key).ok_or(UserError::AttributeLimitReached)?;
let mut obkv_buffer = Vec::new();
let mut document_sorter_buffer = Vec::new();
let mut documents_count = 0;
let mut docid_buffer: Vec<u8> = Vec::new();
let mut field_buffer: Vec<(u16, Cow<[u8]>)> = Vec::new();
@ -212,10 +225,13 @@ impl<'a, 'i> Transform<'a, 'i> {
Entry::Occupied(entry) => *entry.get() as u32,
Entry::Vacant(entry) => {
// If the document was already in the db we mark it as a replaced document.
// It'll be deleted later. We keep its original docid to insert it in the grenad.
// It'll be deleted later.
if let Some(docid) = external_documents_ids.get(entry.key()) {
self.replaced_documents_ids.insert(docid);
original_docid = Some(docid);
// If it was already in the list of replaced documents it means it was deleted
// by the remove_document method. We should starts as if it never existed.
if self.replaced_documents_ids.insert(docid) {
original_docid = Some(docid);
}
}
let docid = self
.available_documents_ids
@ -248,26 +264,46 @@ impl<'a, 'i> Transform<'a, 'i> {
skip_insertion = true;
} else {
// we associate the base document with the new key, everything will get merged later.
self.original_sorter.insert(docid.to_be_bytes(), base_obkv)?;
document_sorter_buffer.clear();
document_sorter_buffer.push(Operation::Addition as u8);
document_sorter_buffer.extend_from_slice(base_obkv);
self.original_sorter.insert(docid.to_be_bytes(), &document_sorter_buffer)?;
match self.flatten_from_fields_ids_map(KvReader::new(base_obkv))? {
Some(buffer) => {
self.flattened_sorter.insert(docid.to_be_bytes(), &buffer)?
Some(flattened_obkv) => {
// we recreate our buffer with the flattened documents
document_sorter_buffer.clear();
document_sorter_buffer.push(Operation::Addition as u8);
document_sorter_buffer.extend_from_slice(&flattened_obkv);
self.flattened_sorter
.insert(docid.to_be_bytes(), &document_sorter_buffer)?
}
None => self.flattened_sorter.insert(docid.to_be_bytes(), base_obkv)?,
None => self
.flattened_sorter
.insert(docid.to_be_bytes(), &document_sorter_buffer)?,
}
}
}
if !skip_insertion {
self.new_documents_ids.insert(docid);
document_sorter_buffer.clear();
document_sorter_buffer.push(Operation::Addition as u8);
document_sorter_buffer.extend_from_slice(&obkv_buffer);
// We use the extracted/generated user id as the key for this document.
self.original_sorter.insert(docid.to_be_bytes(), obkv_buffer.clone())?;
self.original_sorter.insert(docid.to_be_bytes(), &document_sorter_buffer)?;
match self.flatten_from_fields_ids_map(KvReader::new(&obkv_buffer))? {
Some(buffer) => self.flattened_sorter.insert(docid.to_be_bytes(), &buffer)?,
None => {
self.flattened_sorter.insert(docid.to_be_bytes(), obkv_buffer.clone())?
Some(flattened_obkv) => {
document_sorter_buffer.clear();
document_sorter_buffer.push(Operation::Addition as u8);
document_sorter_buffer.extend_from_slice(&flattened_obkv);
self.flattened_sorter
.insert(docid.to_be_bytes(), &document_sorter_buffer)?
}
None => self
.flattened_sorter
.insert(docid.to_be_bytes(), &document_sorter_buffer)?,
}
}
documents_count += 1;
@ -293,6 +329,73 @@ impl<'a, 'i> Transform<'a, 'i> {
Ok(documents_count)
}
/// The counter part of `read_documents` that removes documents either from the transform or the database.
/// It can be called before, after or in between two calls of the `read_documents`.
///
/// It needs to update all the internal datastructure in the transform.
/// - If the document is coming from the database -> it's marked as a to_delete document
/// - If the document to remove was inserted by the `read_documents` method before AND was present in the db,
/// it's marked as `to_delete` + added into the grenad to ensure we don't reinsert it.
/// - If the document to remove was inserted by the `read_documents` method before but was NOT present in the db,
/// it's added into the grenad to ensure we don't insert it + removed from the list of new documents ids.
/// - If the document to remove was not present in either the db or the transform we do nothing.
pub fn remove_documents<FA>(
&mut self,
mut to_remove: Vec<String>,
wtxn: &mut heed::RwTxn,
should_abort: FA,
) -> Result<usize>
where
FA: Fn() -> bool + Sync,
{
// there may be duplicates in the documents to remove.
to_remove.sort_unstable();
to_remove.dedup();
let external_documents_ids = self.index.external_documents_ids(wtxn)?;
let mut documents_deleted = 0;
for to_remove in to_remove {
if should_abort() {
return Err(Error::InternalError(InternalError::AbortedIndexation));
}
match self.new_external_documents_ids_builder.entry((*to_remove).into()) {
// if the document was added in a previous iteration of the transform we make it as deleted in the sorters.
Entry::Occupied(entry) => {
let doc_id = *entry.get() as u32;
self.original_sorter
.insert(doc_id.to_be_bytes(), [Operation::Deletion as u8])?;
self.flattened_sorter
.insert(doc_id.to_be_bytes(), [Operation::Deletion as u8])?;
// we must NOT update the list of replaced_documents_ids
// Either:
// 1. It's already in it and there is nothing to do
// 2. It wasn't in it because the document was created by a previous batch and since
// we're removing it there is nothing to do.
self.new_documents_ids.remove(doc_id);
entry.remove_entry();
}
Entry::Vacant(entry) => {
// If the document was already in the db we mark it as a `to_delete` document.
// It'll be deleted later. We don't need to push anything to the sorters.
if let Some(docid) = external_documents_ids.get(entry.key()) {
self.replaced_documents_ids.insert(docid);
} else {
// if the document is nowehere to be found, there is nothing to do and we must NOT
// increment the count of documents_deleted
continue;
}
}
};
documents_deleted += 1;
}
Ok(documents_deleted)
}
// Flatten a document from the fields ids map contained in self and insert the new
// created fields. Returns `None` if the document doesn't need to be flattened.
fn flatten_from_fields_ids_map(&mut self, obkv: KvReader<FieldId>) -> Result<Option<Vec<u8>>> {
@ -487,6 +590,11 @@ impl<'a, 'i> Transform<'a, 'i> {
let mut documents_count = 0;
while let Some((key, val)) = iter.next()? {
if val[0] == Operation::Deletion as u8 {
continue;
}
let val = &val[1..];
// send a callback to show at which step we are
documents_count += 1;
progress_callback(UpdateIndexingStep::ComputeIdsAndMergeDocuments {
@ -518,9 +626,18 @@ impl<'a, 'i> Transform<'a, 'i> {
self.indexer_settings.chunk_compression_level,
tempfile::tempfile()?,
);
// Once we have written all the documents into the final sorter, we write the documents
// into this writer, extract the file and reset the seek to be able to read it again.
self.flattened_sorter.write_into_stream_writer(&mut writer)?;
// Once we have written all the documents into the final sorter, we write the nested documents
// into this writer.
// We get rids of the `Operation` byte and skip the deleted documents as well.
let mut iter = self.flattened_sorter.into_stream_merger_iter()?;
while let Some((key, val)) = iter.next()? {
if val[0] == Operation::Deletion as u8 {
continue;
}
let val = &val[1..];
writer.insert(key, val)?;
}
let mut flattened_documents = writer.into_inner()?;
flattened_documents.rewind()?;
@ -701,3 +818,45 @@ impl TransformOutput {
.collect())
}
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn merge_obkvs() {
let mut doc_0 = Vec::new();
let mut kv_writer = KvWriter::new(&mut doc_0);
kv_writer.insert(0_u8, [0]).unwrap();
kv_writer.finish().unwrap();
doc_0.insert(0, Operation::Addition as u8);
let ret = merge_obkvs_and_operations(&[], &[Cow::from(doc_0.as_slice())]).unwrap();
assert_eq!(*ret, doc_0);
let ret = merge_obkvs_and_operations(
&[],
&[Cow::from([Operation::Deletion as u8].as_slice()), Cow::from(doc_0.as_slice())],
)
.unwrap();
assert_eq!(*ret, doc_0);
let ret = merge_obkvs_and_operations(
&[],
&[Cow::from(doc_0.as_slice()), Cow::from([Operation::Deletion as u8].as_slice())],
)
.unwrap();
assert_eq!(*ret, [Operation::Deletion as u8]);
let ret = merge_obkvs_and_operations(
&[],
&[
Cow::from([Operation::Addition as u8, 1].as_slice()),
Cow::from([Operation::Deletion as u8].as_slice()),
Cow::from(doc_0.as_slice()),
],
)
.unwrap();
assert_eq!(*ret, doc_0);
}
}