Compare commits

..

7 Commits

Author SHA1 Message Date
Louis Dureuil
7ac1eafceb add facet test 2023-02-09 13:36:21 +01:00
Louis Dureuil
82ca61f366 Remove some clippy type complexity warns by deboxing iters 2023-02-09 13:36:21 +01:00
Louis Dureuil
e8b8319006 Add min and max facet stats 2023-02-09 13:36:21 +01:00
Louis Dureuil
6df9177b3b Update usage of iterators 2023-02-09 13:36:21 +01:00
Louis Dureuil
8656f83a81 facet sort ascending/descending now also return the values 2023-02-09 13:36:21 +01:00
Louis Dureuil
f2d0672453 Add prototype to analytics if any 2023-02-09 13:35:06 +01:00
Louis Dureuil
59cf58e773 If using a prototype, display its name at Meilisearch startup 2023-02-09 13:35:05 +01:00
94 changed files with 1551 additions and 3271 deletions

View File

@@ -3,7 +3,7 @@
# check_tag $current_tag $file_tag $file_name
function check_tag {
if [[ "$1" != "$2" ]]; then
echo "Error: the current tag does not match the version in Cargo.toml: found $2 - expected $1"
echo "Error: the current tag does not match the version in $3: found $2 - expected $1"
ret=1
fi
}
@@ -11,8 +11,12 @@ function check_tag {
ret=0
current_tag=${GITHUB_REF#'refs/tags/v'}
file_tag="$(grep '^version = ' Cargo.toml | cut -d '=' -f 2 | tr -d '"' | tr -d ' ')"
check_tag $current_tag $file_tag
toml_files='*/Cargo.toml'
for toml_file in $toml_files;
do
file_tag="$(grep '^version = ' $toml_file | cut -d '=' -f 2 | tr -d '"' | tr -d ' ')"
check_tag $current_tag $file_tag $toml_file
done
lock_file='Cargo.lock'
lock_tag=$(grep -A 1 'name = "meilisearch-auth"' $lock_file | grep version | cut -d '=' -f 2 | tr -d '"' | tr -d ' ')

View File

@@ -7,8 +7,7 @@ WORKDIR /meilisearch
ARG COMMIT_SHA
ARG COMMIT_DATE
ARG GIT_TAG
ENV COMMIT_SHA=${COMMIT_SHA} COMMIT_DATE=${COMMIT_DATE} VERGEN_GIT_SEMVER_LIGHTWEIGHT=${GIT_TAG}
ENV COMMIT_SHA=${COMMIT_SHA} COMMIT_DATE=${COMMIT_DATE}
ENV RUSTFLAGS="-C target-feature=-crt-static"
COPY . .

View File

@@ -92,7 +92,6 @@ jobs:
build-args: |
COMMIT_SHA=${{ github.sha }}
COMMIT_DATE=${{ steps.build-metadata.outputs.date }}
GIT_TAG=${{ github.ref_name }}
# /!\ Don't touch this without checking with Cloud team
- name: Send CI information to Cloud team

View File

@@ -2,9 +2,6 @@ name: Rust
on:
workflow_dispatch:
schedule:
# Everyday at 5:00am
- cron: '0 5 * * *'
pull_request:
push:
# trying and staging branches are for Bors config
@@ -30,18 +27,10 @@ jobs:
run: |
apt-get update && apt-get install -y curl
apt-get install build-essential -y
- name: Run test with Rust stable
if: github.event_name != 'schedule'
uses: actions-rs/toolchain@v1
- uses: actions-rs/toolchain@v1
with:
toolchain: stable
override: true
- name: Run test with Rust nightly
if: github.event_name == 'schedule'
uses: actions-rs/toolchain@v1
with:
toolchain: nightly
override: true
# Disable cache due to disk space issues with Windows workers in CI
# - name: Cache dependencies
# uses: Swatinem/rust-cache@v2.2.0

View File

@@ -29,7 +29,7 @@ jobs:
run: |
raw_new_version=$(echo $NEW_VERSION | cut -d 'v' -f 2)
new_string="version = \"$raw_new_version\""
sd '^version = "\d+.\d+.\w+"$' "$new_string" Cargo.toml
sd '^version = "\d+.\d+.\w+"$' "$new_string" */Cargo.toml
- name: Build Meilisearch to update Cargo.lock
run: cargo build
- name: Commit and push the changes to the ${{ env.NEW_BRANCH }} branch

2
.gitignore vendored
View File

@@ -1,5 +1,3 @@
.idea/
.vscode/
/target
**/*.csv
**/*.json_lines

View File

@@ -121,19 +121,15 @@ The full Meilisearch release process is described in [this guide](https://github
Depending on the developed feature, you might need to provide a prototyped version of Meilisearch to make it easier to test by the users.
The prototype name must follow this convention: `prototype-X-Y` where
- `X` is the feature name formatted in `kebab-case`. It should not end with a single number.
- `X` is the feature name formatted in `kebab-case`
- `Y` is the version of the prototype, starting from `0`.
Example: `prototype-auto-resize-0`. </br>
❌ Bad example: `auto-resize-0`: lacks the `prototype` prefix. </br>
❌ Bad example: `prototype-auto-resize`: lacks the version suffix. </br>
❌ Bad example: `prototype-auto-resize-0-0`: feature name ends with a single number.
Example: `prototype-auto-resize-0`.
Steps to create a prototype:
1. In your terminal, go to the last commit of your branch (the one you want to provide as a prototype).
2. Create a tag following the convention: `git tag prototype-X-Y`
3. Run Meilisearch and check that its launch summary features a line: `Prototype: prototype-X-Y` (you may need to switch branches and back after tagging for this to work).
3. Push the tag: `git push origin prototype-X-Y`
4. Check the [Docker CI](https://github.com/meilisearch/meilisearch/actions/workflows/publish-docker-images.yml) is now running.
@@ -142,7 +138,7 @@ More information about [how to run Meilisearch with Docker](https://docs.meilise
⚙️ However, no binaries will be created. If the users do not use Docker, they can go to the `prototype-X-Y` tag in the Meilisearch repository and compile from the source code.
⚠️ When sharing a prototype with users, remind them to not use it in production. Prototypes are solely for test purposes.
⚠️ When sharing a prototype with users, prevent them from using it in production. Prototypes are only for test purposes.
### Release assets

59
Cargo.lock generated
View File

@@ -36,9 +36,9 @@ dependencies = [
[[package]]
name = "actix-http"
version = "3.3.0"
version = "3.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0070905b2c4a98d184c4e81025253cb192aa8a73827553f38e9410801ceb35bb"
checksum = "0c83abf9903e1f0ad9973cc4f7b9767fd5a03a583f51a5b7a339e07987cd2724"
dependencies = [
"actix-codec",
"actix-rt",
@@ -46,7 +46,7 @@ dependencies = [
"actix-tls",
"actix-utils",
"ahash",
"base64 0.21.0",
"base64 0.13.1",
"bitflags",
"brotli",
"bytes",
@@ -68,10 +68,7 @@ dependencies = [
"rand",
"sha1",
"smallvec",
"tokio",
"tokio-util",
"tracing",
"zstd 0.12.3+zstd.1.5.2",
]
[[package]]
@@ -167,9 +164,9 @@ dependencies = [
[[package]]
name = "actix-web"
version = "4.3.0"
version = "4.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "464e0fddc668ede5f26ec1f9557a8d44eda948732f40c6b0ad79126930eb775f"
checksum = "d48f7b6534e06c7bfc72ee91db7917d4af6afe23e7d223b51e68fffbb21e96b9"
dependencies = [
"actix-codec",
"actix-http",
@@ -609,9 +606,9 @@ dependencies = [
[[package]]
name = "cargo_toml"
version = "0.14.1"
version = "0.13.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2bfbc36312494041e2cdd5f06697b7e89d4b76f42773a0b5556ac290ff22acc2"
checksum = "497049e9477329f8f6a559972ee42e117487d01d1e8c2cc9f836ea6fa23a9e1a"
dependencies = [
"serde",
"toml",
@@ -1113,26 +1110,20 @@ dependencies = [
[[package]]
name = "deserr"
version = "0.5.0"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c71c14985c842bf1e520b1ebcd22daff6aeece32f510e11f063cecf9b308c04b"
checksum = "28380303ca15ec07e1d5b079baf19cf849b09edad5cab219c1c51b2bd07523de"
dependencies = [
"actix-http",
"actix-utils",
"actix-web",
"deserr-internal",
"futures",
"serde-cs",
"serde_json",
"serde_urlencoded",
"strsim",
]
[[package]]
name = "deserr-internal"
version = "0.5.0"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cae1c51b191528c9e4e5d6cff671de94f61fcda1c206cc891251e0cf438c941a"
checksum = "860928cd8af78d223a3d70dd581f21d7c3de8aa2eecd938e0c0a399ded7c1451"
dependencies = [
"convert_case 0.5.0",
"proc-macro2",
@@ -2536,7 +2527,6 @@ dependencies = [
"base64 0.13.1",
"enum-iterator",
"hmac",
"maplit",
"meilisearch-types",
"rand",
"roaring",
@@ -4432,7 +4422,7 @@ dependencies = [
"pbkdf2",
"sha1",
"time",
"zstd 0.11.2+zstd.1.5.2",
"zstd",
]
[[package]]
@@ -4441,16 +4431,7 @@ version = "0.11.2+zstd.1.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "20cc960326ece64f010d2d2107537f26dc589a6573a316bd5b1dba685fa5fde4"
dependencies = [
"zstd-safe 5.0.2+zstd.1.5.2",
]
[[package]]
name = "zstd"
version = "0.12.3+zstd.1.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "76eea132fb024e0e13fd9c2f5d5d595d8a967aa72382ac2f9d39fcc95afd0806"
dependencies = [
"zstd-safe 6.0.4+zstd.1.5.4",
"zstd-safe",
]
[[package]]
@@ -4463,21 +4444,11 @@ dependencies = [
"zstd-sys",
]
[[package]]
name = "zstd-safe"
version = "6.0.4+zstd.1.5.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7afb4b54b8910cf5447638cb54bf4e8a65cbedd783af98b98c62ffe91f185543"
dependencies = [
"libc",
"zstd-sys",
]
[[package]]
name = "zstd-sys"
version = "2.0.7+zstd.1.5.4"
version = "2.0.5+zstd.1.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "94509c3ba2fe55294d752b79842c530ccfab760192521df74a081a78d2b3c7f5"
checksum = "edc50ffce891ad571e9f9afe5039c4837bede781ac4bb13052ed7ae695518596"
dependencies = [
"cc",
"libc",

View File

@@ -16,15 +16,6 @@ members = [
"benchmarks"
]
[workspace.package]
version = "1.0.0"
authors = ["Quentin de Quelen <quentin@dequelen.me>", "Clément Renault <clement@meilisearch.com>"]
description = "Meilisearch HTTP server"
homepage = "https://meilisearch.com"
readme = "README.md"
edition = "2021"
license = "MIT"
[profile.release]
codegen-units = 1

View File

@@ -1,30 +1,31 @@
# Compile
FROM rust:bullseye AS compiler
FROM rust:alpine3.16 AS compiler
RUN apk add -q --update-cache --no-cache build-base openssl-dev
WORKDIR /meilisearch
ARG COMMIT_SHA
ARG COMMIT_DATE
ARG GIT_TAG
ENV VERGEN_GIT_SHA=${COMMIT_SHA} VERGEN_GIT_COMMIT_TIMESTAMP=${COMMIT_DATE} VERGEN_GIT_SEMVER_LIGHTWEIGHT=${GIT_TAG}
ENV VERGEN_GIT_SHA=${COMMIT_SHA} VERGEN_GIT_COMMIT_TIMESTAMP=${COMMIT_DATE}
ENV RUSTFLAGS="-C target-feature=-crt-static"
COPY . .
RUN set -eux; \
arch="$(dpkg --print-architecture)"; \
if [ "$arch" = "aarch64" ]; then \
apkArch="$(apk --print-arch)"; \
if [ "$apkArch" = "aarch64" ]; then \
export JEMALLOC_SYS_WITH_LG_PAGE=16; \
fi && \
cargo build --release
# Run
FROM debian:11.6
FROM alpine:3.16
ENV MEILI_HTTP_ADDR 0.0.0.0:7700
ENV MEILI_SERVER_PROVIDER docker
RUN apt update -q \
&& apt install -q -y tini
RUN apk update --quiet \
&& apk add -q --no-cache libgcc tini curl
# add meilisearch to the `/bin` so you can run it from anywhere and it's easy
# to find.

View File

@@ -1,15 +1,9 @@
[package]
name = "benchmarks"
version = "1.0.0"
edition = "2018"
publish = false
version.workspace = true
authors.workspace = true
description.workspace = true
homepage.workspace = true
readme.workspace = true
edition.workspace = true
license.workspace = true
[dependencies]
anyhow = "1.0.65"
csv = "1.1.6"

View File

@@ -1,14 +1,7 @@
[package]
name = "dump"
publish = false
version.workspace = true
authors.workspace = true
description.workspace = true
edition.workspace = true
homepage.workspace = true
readme.workspace = true
license.workspace = true
version = "1.0.0"
edition = "2021"
[dependencies]
anyhow = "1.0.65"

View File

@@ -203,11 +203,12 @@ pub(crate) mod test {
use big_s::S;
use maplit::btreeset;
use meilisearch_types::index_uid_pattern::IndexUidPattern;
use meilisearch_types::index_uid::IndexUid;
use meilisearch_types::keys::{Action, Key};
use meilisearch_types::milli::update::Setting;
use meilisearch_types::milli::{self};
use meilisearch_types::settings::{Checked, Settings};
use meilisearch_types::star_or::StarOr;
use meilisearch_types::tasks::{Details, Status};
use serde_json::{json, Map, Value};
use time::macros::datetime;
@@ -340,7 +341,7 @@ pub(crate) mod test {
name: Some(S("doggos_key")),
uid: Uuid::from_str("9f8a34da-b6b2-42f0-939b-dbd4c3448655").unwrap(),
actions: vec![Action::DocumentsAll],
indexes: vec![IndexUidPattern::from_str("doggos").unwrap()],
indexes: vec![StarOr::Other(IndexUid::from_str("doggos").unwrap())],
expires_at: Some(datetime!(4130-03-14 12:21 UTC)),
created_at: datetime!(1960-11-15 0:00 UTC),
updated_at: datetime!(2022-11-10 0:00 UTC),
@@ -350,7 +351,7 @@ pub(crate) mod test {
name: Some(S("master_key")),
uid: Uuid::from_str("4622f717-1c00-47bb-a494-39d76a49b591").unwrap(),
actions: vec![Action::All],
indexes: vec![IndexUidPattern::all()],
indexes: vec![StarOr::Star],
expires_at: None,
created_at: datetime!(0000-01-01 00:01 UTC),
updated_at: datetime!(1964-05-04 17:25 UTC),

View File

@@ -181,8 +181,10 @@ impl CompatV5ToV6 {
.indexes
.into_iter()
.map(|index| match index {
v5::StarOr::Star => v6::IndexUidPattern::all(),
v5::StarOr::Other(uid) => v6::IndexUidPattern::new_unchecked(uid.as_str()),
v5::StarOr::Star => v6::StarOr::Star,
v5::StarOr::Other(uid) => {
v6::StarOr::Other(v6::IndexUid::new_unchecked(uid.as_str()))
}
})
.collect(),
expires_at: key.expires_at,

View File

@@ -34,7 +34,8 @@ pub type PaginationSettings = meilisearch_types::settings::PaginationSettings;
// everything related to the api keys
pub type Action = meilisearch_types::keys::Action;
pub type IndexUidPattern = meilisearch_types::index_uid_pattern::IndexUidPattern;
pub type StarOr<T> = meilisearch_types::star_or::StarOr<T>;
pub type IndexUid = meilisearch_types::index_uid::IndexUid;
// everything related to the errors
pub type ResponseError = meilisearch_types::error::ResponseError;

View File

@@ -1,14 +1,7 @@
[package]
name = "file-store"
publish = false
version.workspace = true
authors.workspace = true
description.workspace = true
homepage.workspace = true
readme.workspace = true
edition.workspace = true
license.workspace = true
version = "1.0.0"
edition = "2021"
[dependencies]
tempfile = "3.3.0"

View File

@@ -1,16 +1,10 @@
[package]
name = "filter-parser"
version = "1.0.0"
edition = "2021"
description = "The parser for the Meilisearch filter syntax"
publish = false
version.workspace = true
authors.workspace = true
# description.workspace = true
homepage.workspace = true
readme.workspace = true
edition.workspace = true
license.workspace = true
[dependencies]
nom = "7.1.1"
nom_locate = "4.0.0"

View File

@@ -1,17 +1,11 @@
[package]
name = "flatten-serde-json"
version = "1.0.0"
edition = "2021"
description = "Flatten serde-json objects like elastic search"
readme = "README.md"
publish = false
version.workspace = true
authors.workspace = true
# description.workspace = true
homepage.workspace = true
# readme.workspace = true
edition.workspace = true
license.workspace = true
[dependencies]
serde_json = "1.0"

View File

@@ -1,14 +1,7 @@
[package]
name = "index-scheduler"
publish = false
version.workspace = true
authors.workspace = true
description.workspace = true
homepage.workspace = true
readme.workspace = true
edition.workspace = true
license.workspace = true
version = "1.0.0"
edition = "2021"
[dependencies]
anyhow = "1.0.64"

View File

@@ -88,11 +88,11 @@ pub enum BatchKind {
DocumentClear {
ids: Vec<TaskId>,
},
DocumentOperation {
DocumentImport {
method: IndexDocumentsMethod,
allow_index_creation: bool,
primary_key: Option<String>,
operation_ids: Vec<TaskId>,
import_ids: Vec<TaskId>,
},
DocumentDeletion {
deletion_ids: Vec<TaskId>,
@@ -102,12 +102,12 @@ pub enum BatchKind {
allow_index_creation: bool,
settings_ids: Vec<TaskId>,
},
SettingsAndDocumentOperation {
SettingsAndDocumentImport {
settings_ids: Vec<TaskId>,
method: IndexDocumentsMethod,
allow_index_creation: bool,
primary_key: Option<String>,
operation_ids: Vec<TaskId>,
import_ids: Vec<TaskId>,
},
Settings {
allow_index_creation: bool,
@@ -131,9 +131,9 @@ impl BatchKind {
#[rustfmt::skip]
fn allow_index_creation(&self) -> Option<bool> {
match self {
BatchKind::DocumentOperation { allow_index_creation, .. }
BatchKind::DocumentImport { allow_index_creation, .. }
| BatchKind::ClearAndSettings { allow_index_creation, .. }
| BatchKind::SettingsAndDocumentOperation { allow_index_creation, .. }
| BatchKind::SettingsAndDocumentImport { allow_index_creation, .. }
| BatchKind::Settings { allow_index_creation, .. } => Some(*allow_index_creation),
_ => None,
}
@@ -141,8 +141,8 @@ impl BatchKind {
fn primary_key(&self) -> Option<Option<&str>> {
match self {
BatchKind::DocumentOperation { primary_key, .. }
| BatchKind::SettingsAndDocumentOperation { primary_key, .. } => {
BatchKind::DocumentImport { primary_key, .. }
| BatchKind::SettingsAndDocumentImport { primary_key, .. } => {
Some(primary_key.as_deref())
}
_ => None,
@@ -173,22 +173,22 @@ impl BatchKind {
if primary_key.is_none() || pk.is_none() || primary_key == pk.as_deref() =>
{
(
Continue(BatchKind::DocumentOperation {
Continue(BatchKind::DocumentImport {
method,
allow_index_creation,
primary_key: pk,
operation_ids: vec![task_id],
import_ids: vec![task_id],
}),
allow_index_creation,
)
}
// if the primary key set in the task was different than ours we should stop and make this batch fail asap.
K::DocumentImport { method, allow_index_creation, primary_key } => (
Break(BatchKind::DocumentOperation {
Break(BatchKind::DocumentImport {
method,
allow_index_creation,
primary_key,
operation_ids: vec![task_id],
import_ids: vec![task_id],
}),
allow_index_creation,
),
@@ -249,7 +249,7 @@ impl BatchKind {
(
BatchKind::DocumentClear { mut ids }
| BatchKind::DocumentDeletion { deletion_ids: mut ids }
| BatchKind::DocumentOperation { method: _, allow_index_creation: _, primary_key: _, operation_ids: mut ids }
| BatchKind::DocumentImport { method: _, allow_index_creation: _, primary_key: _, import_ids: mut ids }
| BatchKind::Settings { allow_index_creation: _, settings_ids: mut ids },
K::IndexDeletion,
) => {
@@ -258,7 +258,7 @@ impl BatchKind {
}
(
BatchKind::ClearAndSettings { settings_ids: mut ids, allow_index_creation: _, mut other }
| BatchKind::SettingsAndDocumentOperation { operation_ids: mut ids, method: _, allow_index_creation: _, primary_key: _, settings_ids: mut other },
| BatchKind::SettingsAndDocumentImport { import_ids: mut ids, method: _, allow_index_creation: _, primary_key: _, settings_ids: mut other },
K::IndexDeletion,
) => {
ids.push(id);
@@ -278,108 +278,63 @@ impl BatchKind {
K::DocumentImport { .. } | K::Settings { .. },
) => Break(this),
(
BatchKind::DocumentOperation { method: _, allow_index_creation: _, primary_key: _, mut operation_ids },
BatchKind::DocumentImport { method: _, allow_index_creation: _, primary_key: _, import_ids: mut ids },
K::DocumentClear,
) => {
operation_ids.push(id);
Continue(BatchKind::DocumentClear { ids: operation_ids })
ids.push(id);
Continue(BatchKind::DocumentClear { ids })
}
// we can autobatch the same kind of document additions / updates
(
BatchKind::DocumentOperation { method: ReplaceDocuments, allow_index_creation, primary_key: _, mut operation_ids },
BatchKind::DocumentImport { method: ReplaceDocuments, allow_index_creation, primary_key: _, mut import_ids },
K::DocumentImport { method: ReplaceDocuments, primary_key: pk, .. },
) => {
operation_ids.push(id);
Continue(BatchKind::DocumentOperation {
import_ids.push(id);
Continue(BatchKind::DocumentImport {
method: ReplaceDocuments,
allow_index_creation,
operation_ids,
import_ids,
primary_key: pk,
})
}
(
BatchKind::DocumentOperation { method: UpdateDocuments, allow_index_creation, primary_key: _, mut operation_ids },
BatchKind::DocumentImport { method: UpdateDocuments, allow_index_creation, primary_key: _, mut import_ids },
K::DocumentImport { method: UpdateDocuments, primary_key: pk, .. },
) => {
operation_ids.push(id);
Continue(BatchKind::DocumentOperation {
import_ids.push(id);
Continue(BatchKind::DocumentImport {
method: UpdateDocuments,
allow_index_creation,
primary_key: pk,
operation_ids,
import_ids,
})
}
(
BatchKind::DocumentOperation { method, allow_index_creation, primary_key, mut operation_ids },
K::DocumentDeletion,
) => {
operation_ids.push(id);
Continue(BatchKind::DocumentOperation {
method,
allow_index_creation,
primary_key,
operation_ids,
})
}
// but we can't autobatch documents if it's not the same kind
// this match branch MUST be AFTER the previous one
(
this @ BatchKind::DocumentOperation { .. },
K::DocumentImport { .. },
this @ BatchKind::DocumentImport { .. },
K::DocumentDeletion | K::DocumentImport { .. },
) => Break(this),
(
BatchKind::DocumentOperation { method, allow_index_creation, primary_key, operation_ids },
BatchKind::DocumentImport { method, allow_index_creation, primary_key, import_ids },
K::Settings { .. },
) => Continue(BatchKind::SettingsAndDocumentOperation {
) => Continue(BatchKind::SettingsAndDocumentImport {
settings_ids: vec![id],
method,
allow_index_creation,
primary_key,
operation_ids,
import_ids,
}),
(BatchKind::DocumentDeletion { mut deletion_ids }, K::DocumentClear) => {
deletion_ids.push(id);
Continue(BatchKind::DocumentClear { ids: deletion_ids })
}
// we can autobatch the deletion and import if the index already exists
(
BatchKind::DocumentDeletion { mut deletion_ids },
K::DocumentImport { method, allow_index_creation, primary_key }
) if index_already_exists => {
deletion_ids.push(id);
Continue(BatchKind::DocumentOperation {
method,
allow_index_creation,
primary_key,
operation_ids: deletion_ids,
})
}
// we can autobatch the deletion and import if both can't create an index
(
BatchKind::DocumentDeletion { mut deletion_ids },
K::DocumentImport { method, allow_index_creation, primary_key }
) if !allow_index_creation => {
deletion_ids.push(id);
Continue(BatchKind::DocumentOperation {
method,
allow_index_creation,
primary_key,
operation_ids: deletion_ids,
})
}
// we can't autobatch a deletion and an import if the index does not exists but would be created by an addition
(
this @ BatchKind::DocumentDeletion { .. },
K::DocumentImport { .. }
) => {
Break(this)
}
(this @ BatchKind::DocumentDeletion { .. }, K::DocumentImport { .. }) => Break(this),
(BatchKind::DocumentDeletion { mut deletion_ids }, K::DocumentDeletion) => {
deletion_ids.push(id);
Continue(BatchKind::DocumentDeletion { deletion_ids })
@@ -448,60 +403,60 @@ impl BatchKind {
})
}
(
BatchKind::SettingsAndDocumentOperation { settings_ids, method: _, mut operation_ids, allow_index_creation, primary_key: _ },
BatchKind::SettingsAndDocumentImport { settings_ids, method: _, import_ids: mut other, allow_index_creation, primary_key: _ },
K::DocumentClear,
) => {
operation_ids.push(id);
other.push(id);
Continue(BatchKind::ClearAndSettings {
settings_ids,
other: operation_ids,
other,
allow_index_creation,
})
}
(
BatchKind::SettingsAndDocumentOperation { settings_ids, method: ReplaceDocuments, mut operation_ids, allow_index_creation, primary_key: _},
BatchKind::SettingsAndDocumentImport { settings_ids, method: ReplaceDocuments, mut import_ids, allow_index_creation, primary_key: _},
K::DocumentImport { method: ReplaceDocuments, primary_key: pk2, .. },
) => {
operation_ids.push(id);
Continue(BatchKind::SettingsAndDocumentOperation {
import_ids.push(id);
Continue(BatchKind::SettingsAndDocumentImport {
settings_ids,
method: ReplaceDocuments,
allow_index_creation,
primary_key: pk2,
operation_ids,
import_ids,
})
}
(
BatchKind::SettingsAndDocumentOperation { settings_ids, method: UpdateDocuments, allow_index_creation, primary_key: _, mut operation_ids },
BatchKind::SettingsAndDocumentImport { settings_ids, method: UpdateDocuments, allow_index_creation, primary_key: _, mut import_ids },
K::DocumentImport { method: UpdateDocuments, primary_key: pk2, .. },
) => {
operation_ids.push(id);
Continue(BatchKind::SettingsAndDocumentOperation {
import_ids.push(id);
Continue(BatchKind::SettingsAndDocumentImport {
settings_ids,
method: UpdateDocuments,
allow_index_creation,
primary_key: pk2,
operation_ids,
import_ids,
})
}
// But we can't batch a settings and a doc op with another doc op
// this MUST be AFTER the two previous branch
(
this @ BatchKind::SettingsAndDocumentOperation { .. },
this @ BatchKind::SettingsAndDocumentImport { .. },
K::DocumentDeletion | K::DocumentImport { .. },
) => Break(this),
(
BatchKind::SettingsAndDocumentOperation { mut settings_ids, method, allow_index_creation,primary_key, operation_ids },
BatchKind::SettingsAndDocumentImport { mut settings_ids, method, allow_index_creation,primary_key, import_ids },
K::Settings { .. },
) => {
settings_ids.push(id);
Continue(BatchKind::SettingsAndDocumentOperation {
Continue(BatchKind::SettingsAndDocumentImport {
settings_ids,
method,
allow_index_creation,
primary_key,
operation_ids,
import_ids,
})
}
(
@@ -633,29 +588,29 @@ mod tests {
fn autobatch_simple_operation_together() {
// we can autobatch one or multiple `ReplaceDocuments` together.
// if the index exists.
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp( ReplaceDocuments, true , None), doc_imp(ReplaceDocuments, true , None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1, 2] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_imp( ReplaceDocuments, false , None), doc_imp(ReplaceDocuments, false , None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1, 2] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, import_ids: [0] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp( ReplaceDocuments, true , None), doc_imp(ReplaceDocuments, true , None)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0, 1, 2] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_imp( ReplaceDocuments, false , None), doc_imp(ReplaceDocuments, false , None)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, import_ids: [0, 1, 2] }, false))");
// if it doesn't exists.
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None), doc_imp( ReplaceDocuments, true , None), doc_imp(ReplaceDocuments, true , None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1, 2] }, true))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), doc_imp( ReplaceDocuments, true , None), doc_imp(ReplaceDocuments, true , None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, import_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None), doc_imp( ReplaceDocuments, true , None), doc_imp(ReplaceDocuments, true , None)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0, 1, 2] }, true))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), doc_imp( ReplaceDocuments, true , None), doc_imp(ReplaceDocuments, true , None)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, import_ids: [0] }, false))");
// we can autobatch one or multiple `UpdateDocuments` together.
// if the index exists.
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1, 2] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None), doc_imp(UpdateDocuments, false, None), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1, 2] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: true, primary_key: None, import_ids: [0, 1, 2] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: false, primary_key: None, import_ids: [0] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None), doc_imp(UpdateDocuments, false, None), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: false, primary_key: None, import_ids: [0, 1, 2] }, false))");
// if it doesn't exists.
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1, 2] }, true))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, false, None), doc_imp(UpdateDocuments, false, None), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1, 2] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: true, primary_key: None, import_ids: [0, 1, 2] }, true))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: false, primary_key: None, import_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, false, None), doc_imp(UpdateDocuments, false, None), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: false, primary_key: None, import_ids: [0, 1, 2] }, false))");
// we can autobatch one or multiple DocumentDeletion together
debug_snapshot!(autobatch_from(true, None, [doc_del()]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))");
@@ -673,83 +628,56 @@ mod tests {
debug_snapshot!(autobatch_from(false,None, [settings(true), settings(true), settings(true)]), @"Some((Settings { allow_index_creation: true, settings_ids: [0, 1, 2] }, true))");
debug_snapshot!(autobatch_from(false,None, [settings(false)]), @"Some((Settings { allow_index_creation: false, settings_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [settings(false), settings(false), settings(false)]), @"Some((Settings { allow_index_creation: false, settings_ids: [0, 1, 2] }, false))");
// We can autobatch document addition with document deletion
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None), doc_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, true))"###);
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, true))"###);
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, true, None), doc_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, true, None), doc_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, false, None), doc_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, false, None), doc_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, true, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, true))"###);
debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, true, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, true))"###);
debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, false, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, false, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
// And the other way around
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(ReplaceDocuments, true, Some("catto"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(UpdateDocuments, true, Some("catto"))]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(ReplaceDocuments, false, Some("catto"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(UpdateDocuments, false, Some("catto"))]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(ReplaceDocuments, false, Some("catto"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(UpdateDocuments, false, Some("catto"))]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###);
}
#[test]
fn simple_document_operation_dont_autobatch_with_other() {
// addition, updates and deletion can't batch together
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_del()]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_del()]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), idx_create()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), idx_create()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), idx_create()]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), idx_create()]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_del(), idx_create()]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), idx_update()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), idx_update()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), idx_update()]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), idx_update()]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_del(), idx_update()]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), idx_swap()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), idx_swap()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), idx_swap()]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), idx_swap()]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_del(), idx_swap()]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))");
}
#[test]
fn document_addition_batch_with_settings() {
// simple case
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true)]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true)]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
// multiple settings and doc addition
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None), settings(true), settings(true)]), @"Some((SettingsAndDocumentOperation { settings_ids: [2, 3], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None), settings(true), settings(true)]), @"Some((SettingsAndDocumentOperation { settings_ids: [2, 3], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None), settings(true), settings(true)]), @"Some((SettingsAndDocumentImport { settings_ids: [2, 3], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None), settings(true), settings(true)]), @"Some((SettingsAndDocumentImport { settings_ids: [2, 3], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0, 1] }, true))");
// addition and setting unordered
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((SettingsAndDocumentOperation { settings_ids: [1, 3], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 2] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_imp(UpdateDocuments, true, None), settings(true)]), @"Some((SettingsAndDocumentOperation { settings_ids: [1, 3], method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 2] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((SettingsAndDocumentImport { settings_ids: [1, 3], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0, 2] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_imp(UpdateDocuments, true, None), settings(true)]), @"Some((SettingsAndDocumentImport { settings_ids: [1, 3], method: UpdateDocuments, allow_index_creation: true, primary_key: None, import_ids: [0, 2] }, true))");
// We ensure this kind of batch doesn't batch with forbidden operations
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_imp(UpdateDocuments, true, None)]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_imp(ReplaceDocuments, true, None)]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_del()]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_del()]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), idx_create()]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), idx_create()]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), idx_update()]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), idx_update()]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), idx_swap()]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), idx_swap()]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_imp(UpdateDocuments, true, None)]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_imp(ReplaceDocuments, true, None)]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_del()]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_del()]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), idx_create()]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), idx_create()]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), idx_update()]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), idx_update()]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), idx_swap()]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), idx_swap()]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: UpdateDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
}
#[test]
@@ -861,73 +789,67 @@ mod tests {
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, false, None), settings(false), doc_clr(), idx_del()]), @"Some((IndexDeletion { ids: [1, 3, 0, 2] }, false))");
// The third and final case is when the first task doesn't create an index but is directly followed by a task creating an index. In this case we can't batch whith what
// follows because we first need to process the erronous batch.
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments,false, None), settings(true), idx_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, false, None), settings(true), idx_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments,false, None), settings(true), doc_clr(), idx_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, false, None), settings(true), doc_clr(), idx_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments,false, None), settings(true), idx_del()]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, import_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, false, None), settings(true), idx_del()]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: false, primary_key: None, import_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments,false, None), settings(true), doc_clr(), idx_del()]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, import_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, false, None), settings(true), doc_clr(), idx_del()]), @"Some((DocumentImport { method: UpdateDocuments, allow_index_creation: false, primary_key: None, import_ids: [0] }, false))");
}
#[test]
fn allowed_and_disallowed_index_creation() {
// `DocumentImport` can't be mixed with those disallowed to do so except if the index already exists.
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), settings(true)]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, import_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, import_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), settings(true)]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: false, primary_key: None, import_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((SettingsAndDocumentOperation { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), settings(true)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))");
// batch deletion and addition
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(ReplaceDocuments, true, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(UpdateDocuments, true, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, import_ids: [0] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, import_ids: [0, 1] }, false))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((SettingsAndDocumentImport { settings_ids: [1], method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), settings(true)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, import_ids: [0] }, false))");
}
#[test]
fn autobatch_primary_key() {
// ==> If I have a pk
// With a single update
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), import_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other"))]), @r###"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), import_ids: [0] }, true))"###);
// With a multiple updates
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0, 1] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0, 1] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("other"))]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("id"))]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), import_ids: [0, 1] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), import_ids: [0, 1] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("other"))]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("id"))]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0, 1] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0, 1] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, Some("other"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), import_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), import_ids: [0, 1] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), import_ids: [0, 1] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, Some("other"))]), @r###"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), import_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), import_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), import_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("other"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), import_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), import_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), import_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("other"))]), @r###"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), import_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), import_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), import_ids: [0] }, true))"###);
// ==> If I don't have a pk
// With a single update
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, Some("other"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), import_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, Some("other"))]), @r###"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), import_ids: [0] }, true))"###);
// With a multiple updates
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("id"))]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###);
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0, 1] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("id"))]), @"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, import_ids: [0] }, true))");
debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentImport { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), import_ids: [0] }, true))"###);
}
}

View File

@@ -28,7 +28,8 @@ use meilisearch_types::heed::{RoTxn, RwTxn};
use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader};
use meilisearch_types::milli::heed::CompactionOption;
use meilisearch_types::milli::update::{
DocumentDeletionResult, IndexDocumentsConfig, IndexDocumentsMethod, Settings as MilliSettings,
DocumentAdditionResult, DocumentDeletionResult, IndexDocumentsConfig, IndexDocumentsMethod,
Settings as MilliSettings,
};
use meilisearch_types::milli::{self, BEU32};
use meilisearch_types::settings::{apply_settings_to_builder, Settings, Unchecked};
@@ -85,21 +86,15 @@ pub(crate) enum Batch {
},
}
#[derive(Debug)]
pub(crate) enum DocumentOperation {
Add(Uuid),
Delete(Vec<String>),
}
/// A [batch](Batch) that combines multiple tasks operating on an index.
#[derive(Debug)]
pub(crate) enum IndexOperation {
DocumentOperation {
DocumentImport {
index_uid: String,
primary_key: Option<String>,
method: IndexDocumentsMethod,
documents_counts: Vec<u64>,
operations: Vec<DocumentOperation>,
content_files: Vec<Uuid>,
tasks: Vec<Task>,
},
DocumentDeletion {
@@ -126,13 +121,13 @@ pub(crate) enum IndexOperation {
settings: Vec<(bool, Settings<Unchecked>)>,
settings_tasks: Vec<Task>,
},
SettingsAndDocumentOperation {
SettingsAndDocumentImport {
index_uid: String,
primary_key: Option<String>,
method: IndexDocumentsMethod,
documents_counts: Vec<u64>,
operations: Vec<DocumentOperation>,
content_files: Vec<Uuid>,
document_import_tasks: Vec<Task>,
// The boolean indicates if it's a settings deletion or creation.
@@ -154,13 +149,13 @@ impl Batch {
tasks.iter().map(|task| task.uid).collect()
}
Batch::IndexOperation { op, .. } => match op {
IndexOperation::DocumentOperation { tasks, .. }
IndexOperation::DocumentImport { tasks, .. }
| IndexOperation::DocumentDeletion { tasks, .. }
| IndexOperation::Settings { tasks, .. }
| IndexOperation::DocumentClear { tasks, .. } => {
tasks.iter().map(|task| task.uid).collect()
}
IndexOperation::SettingsAndDocumentOperation {
IndexOperation::SettingsAndDocumentImport {
document_import_tasks: tasks,
settings_tasks: other,
..
@@ -174,33 +169,17 @@ impl Batch {
Batch::IndexSwap { task } => vec![task.uid],
}
}
/// Return the index UID associated with this batch
pub fn index_uid(&self) -> Option<&str> {
use Batch::*;
match self {
TaskCancelation { .. }
| TaskDeletion(_)
| SnapshotCreation(_)
| Dump(_)
| IndexSwap { .. } => None,
IndexOperation { op, .. } => Some(op.index_uid()),
IndexCreation { index_uid, .. }
| IndexUpdate { index_uid, .. }
| IndexDeletion { index_uid, .. } => Some(index_uid),
}
}
}
impl IndexOperation {
pub fn index_uid(&self) -> &str {
match self {
IndexOperation::DocumentOperation { index_uid, .. }
IndexOperation::DocumentImport { index_uid, .. }
| IndexOperation::DocumentDeletion { index_uid, .. }
| IndexOperation::DocumentClear { index_uid, .. }
| IndexOperation::Settings { index_uid, .. }
| IndexOperation::DocumentClearAndSetting { index_uid, .. }
| IndexOperation::SettingsAndDocumentOperation { index_uid, .. } => index_uid,
| IndexOperation::SettingsAndDocumentImport { index_uid, .. } => index_uid,
}
}
}
@@ -227,22 +206,17 @@ impl IndexScheduler {
},
must_create_index,
})),
BatchKind::DocumentOperation { method, operation_ids, .. } => {
let tasks = self.get_existing_tasks(rtxn, operation_ids)?;
let primary_key = tasks
.iter()
.find_map(|task| match task.kind {
KindWithContent::DocumentAdditionOrUpdate { ref primary_key, .. } => {
// we want to stop on the first document addition
Some(primary_key.clone())
}
KindWithContent::DocumentDeletion { .. } => None,
_ => unreachable!(),
})
.flatten();
BatchKind::DocumentImport { method, import_ids, .. } => {
let tasks = self.get_existing_tasks(rtxn, import_ids)?;
let primary_key = match &tasks[0].kind {
KindWithContent::DocumentAdditionOrUpdate { primary_key, .. } => {
primary_key.clone()
}
_ => unreachable!(),
};
let mut documents_counts = Vec::new();
let mut operations = Vec::new();
let mut content_files = Vec::new();
for task in tasks.iter() {
match task.kind {
@@ -252,23 +226,19 @@ impl IndexScheduler {
..
} => {
documents_counts.push(documents_count);
operations.push(DocumentOperation::Add(content_file));
}
KindWithContent::DocumentDeletion { ref documents_ids, .. } => {
documents_counts.push(documents_ids.len() as u64);
operations.push(DocumentOperation::Delete(documents_ids.clone()));
content_files.push(content_file);
}
_ => unreachable!(),
}
}
Ok(Some(Batch::IndexOperation {
op: IndexOperation::DocumentOperation {
op: IndexOperation::DocumentImport {
index_uid,
primary_key,
method,
documents_counts,
operations,
content_files,
tasks,
},
must_create_index,
@@ -352,12 +322,12 @@ impl IndexScheduler {
must_create_index,
}))
}
BatchKind::SettingsAndDocumentOperation {
BatchKind::SettingsAndDocumentImport {
settings_ids,
method,
allow_index_creation,
primary_key,
operation_ids,
import_ids,
} => {
let settings = self.create_next_batch_index(
rtxn,
@@ -369,11 +339,11 @@ impl IndexScheduler {
let document_import = self.create_next_batch_index(
rtxn,
index_uid.clone(),
BatchKind::DocumentOperation {
BatchKind::DocumentImport {
method,
allow_index_creation,
primary_key,
operation_ids,
import_ids,
},
must_create_index,
)?;
@@ -382,10 +352,10 @@ impl IndexScheduler {
(
Some(Batch::IndexOperation {
op:
IndexOperation::DocumentOperation {
IndexOperation::DocumentImport {
primary_key,
documents_counts,
operations,
content_files,
tasks: document_import_tasks,
..
},
@@ -396,12 +366,12 @@ impl IndexScheduler {
..
}),
) => Ok(Some(Batch::IndexOperation {
op: IndexOperation::SettingsAndDocumentOperation {
op: IndexOperation::SettingsAndDocumentImport {
index_uid,
primary_key,
method,
documents_counts,
operations,
content_files,
document_import_tasks,
settings,
settings_tasks,
@@ -1017,12 +987,12 @@ impl IndexScheduler {
Ok(tasks)
}
IndexOperation::DocumentOperation {
IndexOperation::DocumentImport {
index_uid: _,
primary_key,
method,
documents_counts: _,
operations,
documents_counts,
content_files,
mut tasks,
} => {
let mut primary_key_has_been_set = false;
@@ -1067,82 +1037,26 @@ impl IndexScheduler {
|| must_stop_processing.get(),
)?;
for (operation, task) in operations.into_iter().zip(tasks.iter_mut()) {
match operation {
DocumentOperation::Add(content_uuid) => {
let content_file = self.file_store.get_update(content_uuid)?;
let reader = DocumentsBatchReader::from_reader(content_file)
.map_err(milli::Error::from)?;
let (new_builder, user_result) = builder.add_documents(reader)?;
builder = new_builder;
let mut results = Vec::new();
for content_uuid in content_files.into_iter() {
let content_file = self.file_store.get_update(content_uuid)?;
let reader = DocumentsBatchReader::from_reader(content_file)
.map_err(milli::Error::from)?;
let (new_builder, user_result) = builder.add_documents(reader)?;
builder = new_builder;
let received_documents =
if let Some(Details::DocumentAdditionOrUpdate {
received_documents,
..
}) = task.details
{
received_documents
} else {
// In the case of a `documentAdditionOrUpdate` the details MUST be set
unreachable!();
};
let user_result = match user_result {
Ok(count) => Ok(DocumentAdditionResult {
indexed_documents: count,
number_of_documents: count, // TODO: this is wrong, we should use the value stored in the Details.
}),
Err(e) => Err(milli::Error::from(e)),
};
match user_result {
Ok(count) => {
task.status = Status::Succeeded;
task.details = Some(Details::DocumentAdditionOrUpdate {
received_documents,
indexed_documents: Some(count),
})
}
Err(e) => {
task.status = Status::Failed;
task.details = Some(Details::DocumentAdditionOrUpdate {
received_documents,
indexed_documents: Some(0),
});
task.error = Some(milli::Error::from(e).into());
}
}
}
DocumentOperation::Delete(document_ids) => {
let (new_builder, user_result) =
builder.remove_documents(document_ids)?;
builder = new_builder;
let provided_ids =
if let Some(Details::DocumentDeletion { provided_ids, .. }) =
task.details
{
provided_ids
} else {
// In the case of a `documentAdditionOrUpdate` the details MUST be set
unreachable!();
};
match user_result {
Ok(count) => {
task.status = Status::Succeeded;
task.details = Some(Details::DocumentDeletion {
provided_ids,
deleted_documents: Some(count),
});
}
Err(e) => {
task.status = Status::Failed;
task.details = Some(Details::DocumentDeletion {
provided_ids,
deleted_documents: Some(0),
});
task.error = Some(milli::Error::from(e).into());
}
}
}
}
results.push(user_result);
}
if !tasks.iter().all(|res| res.error.is_some()) {
if results.iter().any(|res| res.is_ok()) {
let addition = builder.execute()?;
info!("document addition done: {:?}", addition);
} else if primary_key_has_been_set {
@@ -1157,6 +1071,29 @@ impl IndexScheduler {
)?;
}
for (task, (ret, count)) in
tasks.iter_mut().zip(results.into_iter().zip(documents_counts))
{
match ret {
Ok(DocumentAdditionResult { indexed_documents, number_of_documents }) => {
task.status = Status::Succeeded;
task.details = Some(Details::DocumentAdditionOrUpdate {
received_documents: number_of_documents,
indexed_documents: Some(indexed_documents),
});
}
Err(error) => {
task.status = Status::Failed;
task.details = Some(Details::DocumentAdditionOrUpdate {
received_documents: count,
// if there was an error we indexed 0 documents.
indexed_documents: Some(0),
});
task.error = Some(error.into())
}
}
}
Ok(tasks)
}
IndexOperation::DocumentDeletion { index_uid: _, documents, mut tasks } => {
@@ -1199,12 +1136,12 @@ impl IndexScheduler {
Ok(tasks)
}
IndexOperation::SettingsAndDocumentOperation {
IndexOperation::SettingsAndDocumentImport {
index_uid,
primary_key,
method,
documents_counts,
operations,
content_files,
document_import_tasks,
settings,
settings_tasks,
@@ -1222,12 +1159,12 @@ impl IndexScheduler {
let mut import_tasks = self.apply_index_operation(
index_wtxn,
index,
IndexOperation::DocumentOperation {
IndexOperation::DocumentImport {
index_uid,
primary_key,
method,
documents_counts,
operations,
content_files,
tasks: document_import_tasks,
},
)?;

View File

@@ -9,11 +9,10 @@ use meilisearch_types::heed::types::Str;
use meilisearch_types::heed::{Database, Env, EnvOpenOptions, RoTxn, RwTxn};
use meilisearch_types::milli::update::IndexerConfig;
use meilisearch_types::milli::Index;
use synchronoise::SignalEvent;
use time::OffsetDateTime;
use uuid::Uuid;
use self::IndexStatus::{Available, BeingDeleted, BeingResized};
use self::IndexStatus::{Available, BeingDeleted};
use crate::uuid_codec::UuidCodec;
use crate::{clamp_to_page_size, Error, Result};
@@ -46,8 +45,6 @@ pub struct IndexMapper {
pub enum IndexStatus {
/// Do not insert it back in the index map as it is currently being deleted.
BeingDeleted,
/// Temporarily do not insert the index in the index map as it is currently being resized.
BeingResized(Arc<SignalEvent>),
/// You can use the index without worrying about anything.
Available(Index),
}
@@ -74,10 +71,9 @@ impl IndexMapper {
&self,
path: &Path,
date: Option<(OffsetDateTime, OffsetDateTime)>,
map_size: usize,
) -> Result<Index> {
let mut options = EnvOpenOptions::new();
options.map_size(clamp_to_page_size(map_size));
options.map_size(clamp_to_page_size(self.index_size));
options.max_readers(1024);
if let Some((created, updated)) = date {
@@ -106,15 +102,14 @@ impl IndexMapper {
let index_path = self.base_path.join(uuid.to_string());
fs::create_dir_all(&index_path)?;
let index = self.create_or_open_index(&index_path, date, self.index_size)?;
let index = self.create_or_open_index(&index_path, date)?;
wtxn.commit()?;
// Error if the UUIDv4 somehow already exists in the map, since it should be fresh.
// This is very unlikely to happen in practice.
// TODO: it would be better to lazily create the index. But we need an Index::open function for milli.
if self.index_map.write().unwrap().insert(uuid, Available(index.clone())).is_some()
if let Some(BeingDeleted) =
self.index_map.write().unwrap().insert(uuid, Available(index.clone()))
{
panic!("Uuid v4 conflict: index with UUID {uuid} already exists.");
panic!("Uuid v4 conflict.");
}
Ok(index)
@@ -136,24 +131,14 @@ impl IndexMapper {
wtxn.commit()?;
// We remove the index from the in-memory index map.
let closing_event = loop {
let mut lock = self.index_map.write().unwrap();
let resize_operation = match lock.insert(uuid, BeingDeleted) {
Some(Available(index)) => break Some(index.prepare_for_closing()),
// The target index is in the middle of a resize operation.
// Wait for this operation to complete, then try again.
Some(BeingResized(resize_operation)) => resize_operation.clone(),
// The index is already being deleted or doesn't exist.
// It's OK to remove it from the map again.
_ => break None,
};
// Avoiding deadlocks: we need to drop the lock before waiting for the end of the resize, which
// will involve operations on the very map we're locking.
drop(lock);
resize_operation.wait();
let mut lock = self.index_map.write().unwrap();
let closing_event = match lock.insert(uuid, BeingDeleted) {
Some(Available(index)) => Some(index.prepare_for_closing()),
_ => None,
};
drop(lock);
let index_map = self.index_map.clone();
let index_path = self.base_path.join(uuid.to_string());
let index_name = name.to_string();
@@ -186,87 +171,6 @@ impl IndexMapper {
Ok(self.index_mapping.get(rtxn, name)?.is_some())
}
/// Resizes the maximum size of the specified index to the double of its current maximum size.
///
/// This operation involves closing the underlying environment and so can take a long time to complete.
///
/// # Panics
///
/// - If the Index corresponding to the passed name is concurrently being deleted/resized or cannot be found in the
/// in memory hash map.
pub fn resize_index(&self, rtxn: &RoTxn, name: &str) -> Result<()> {
// fixme: factor to a function?
let uuid = self
.index_mapping
.get(rtxn, name)?
.ok_or_else(|| Error::IndexNotFound(name.to_string()))?;
// We remove the index from the in-memory index map.
let mut lock = self.index_map.write().unwrap();
// signal that will be sent when the resize operation completes
let resize_operation = Arc::new(SignalEvent::manual(false));
let index = match lock.insert(uuid, BeingResized(resize_operation)) {
Some(Available(index)) => index,
Some(previous_status) => {
lock.insert(uuid, previous_status);
panic!(
"Attempting to resize index {name} that is already being resized or deleted."
)
}
None => {
panic!("Could not find the status of index {name} in the in-memory index mapper.")
}
};
drop(lock);
let resize_succeeded = (move || {
let current_size = index.map_size()?;
let new_size = current_size * 2;
let closing_event = index.prepare_for_closing();
log::debug!("Waiting for index {name} to close");
if !closing_event.wait_timeout(std::time::Duration::from_secs(600)) {
// fail after 10 minutes waiting
panic!("Could not resize index {name} (unable to close it)");
}
log::info!("Resized index {name} from {current_size} to {new_size} bytes");
let index_path = self.base_path.join(uuid.to_string());
let index = self.create_or_open_index(&index_path, None, new_size)?;
Ok(index)
})();
// Put the map back to a consistent state.
// Even if there was an error we don't want to leave the map in an inconsistent state as it would cause
// deadlocks.
let mut lock = self.index_map.write().unwrap();
let (resize_operation, resize_succeeded) = match resize_succeeded {
Ok(index) => {
// insert the resized index
let Some(BeingResized(resize_operation)) = lock.insert(uuid, Available(index)) else {
panic!("Index state for index {name} was modified while it was being resized")
};
(resize_operation, Ok(()))
}
Err(error) => {
// there was an error, not much we can do... delete the index from the in-memory map to prevent future errors
let Some(BeingResized(resize_operation)) = lock.remove(&uuid) else {
panic!("Index state for index {name} was modified while it was being resized")
};
(resize_operation, Err(error))
}
};
// drop the lock before signaling completion so that other threads don't immediately await on the lock after waking up.
drop(lock);
resize_operation.signal();
resize_succeeded
}
/// Return an index, may open it if it wasn't already opened.
pub fn index(&self, rtxn: &RoTxn, name: &str) -> Result<Index> {
let uuid = self
@@ -275,47 +179,31 @@ impl IndexMapper {
.ok_or_else(|| Error::IndexNotFound(name.to_string()))?;
// we clone here to drop the lock before entering the match
let index = loop {
let index = self.index_map.read().unwrap().get(&uuid).cloned();
let index = self.index_map.read().unwrap().get(&uuid).cloned();
let index = match index {
Some(Available(index)) => index,
Some(BeingDeleted) => return Err(Error::IndexNotFound(name.to_string())),
// since we're lazy, it's possible that the index has not been opened yet.
None => {
let mut index_map = self.index_map.write().unwrap();
// between the read lock and the write lock it's not impossible
// that someone already opened the index (eg if two search happens
// at the same time), thus before opening it we check a second time
// if it's not already there.
// Since there is a good chance it's not already there we can use
// the entry method.
match index_map.entry(uuid) {
Entry::Vacant(entry) => {
let index_path = self.base_path.join(uuid.to_string());
match index {
Some(Available(index)) => break index,
Some(BeingResized(ref resize_operation)) => {
// Avoiding deadlocks: no lock taken while doing this operation.
resize_operation.wait();
continue;
}
Some(BeingDeleted) => return Err(Error::IndexNotFound(name.to_string())),
// since we're lazy, it's possible that the index has not been opened yet.
None => {
let mut index_map = self.index_map.write().unwrap();
// between the read lock and the write lock it's not impossible
// that someone already opened the index (eg if two search happens
// at the same time), thus before opening it we check a second time
// if it's not already there.
// Since there is a good chance it's not already there we can use
// the entry method.
match index_map.entry(uuid) {
Entry::Vacant(entry) => {
let index_path = self.base_path.join(uuid.to_string());
let index =
self.create_or_open_index(&index_path, None, self.index_size)?;
entry.insert(Available(index.clone()));
break index;
}
Entry::Occupied(entry) => match entry.get() {
Available(index) => break index.clone(),
BeingResized(resize_operation) => {
// Avoiding the deadlock: we drop the lock before waiting
let resize_operation = resize_operation.clone();
drop(index_map);
resize_operation.wait();
continue;
}
BeingDeleted => return Err(Error::IndexNotFound(name.to_string())),
},
let index = self.create_or_open_index(&index_path, None)?;
entry.insert(Available(index.clone()));
index
}
Entry::Occupied(entry) => match entry.get() {
Available(index) => index.clone(),
BeingDeleted => return Err(Error::IndexNotFound(name.to_string())),
},
}
}
};

View File

@@ -43,7 +43,6 @@ use file_store::FileStore;
use meilisearch_types::error::ResponseError;
use meilisearch_types::heed::types::{OwnedType, SerdeBincode, SerdeJson, Str};
use meilisearch_types::heed::{self, Database, Env, RoTxn};
use meilisearch_types::index_uid_pattern::IndexUidPattern;
use meilisearch_types::milli;
use meilisearch_types::milli::documents::DocumentsBatchBuilder;
use meilisearch_types::milli::update::IndexerConfig;
@@ -423,12 +422,12 @@ impl IndexScheduler {
#[cfg(test)]
run.breakpoint(Breakpoint::Init);
run.wake_up.wait();
loop {
run.wake_up.wait();
match run.tick() {
Ok(TickOutcome::TickAgain(_)) => (),
Ok(TickOutcome::WaitForSignal) => run.wake_up.wait(),
Ok(0) => (),
Ok(_) => run.wake_up.signal(),
Err(e) => {
log::error!("{}", e);
// Wait one second when an irrecoverable error occurs.
@@ -441,6 +440,7 @@ impl IndexScheduler {
) {
std::thread::sleep(Duration::from_secs(1));
}
run.wake_up.signal();
}
}
}
@@ -630,7 +630,7 @@ impl IndexScheduler {
&self,
rtxn: &RoTxn,
query: &Query,
authorized_indexes: &Option<Vec<IndexUidPattern>>,
authorized_indexes: &Option<Vec<String>>,
) -> Result<RoaringBitmap> {
let mut tasks = self.get_task_ids(rtxn, query)?;
@@ -648,7 +648,7 @@ impl IndexScheduler {
let all_indexes_iter = self.index_tasks.iter(rtxn)?;
for result in all_indexes_iter {
let (index, index_tasks) = result?;
if !authorized_indexes.iter().any(|p| p.matches_str(index)) {
if !authorized_indexes.contains(&index.to_owned()) {
tasks -= index_tasks;
}
}
@@ -668,7 +668,7 @@ impl IndexScheduler {
pub fn get_tasks_from_authorized_indexes(
&self,
query: Query,
authorized_indexes: Option<Vec<IndexUidPattern>>,
authorized_indexes: Option<Vec<String>>,
) -> Result<Vec<Task>> {
let rtxn = self.env.read_txn()?;
@@ -764,8 +764,8 @@ impl IndexScheduler {
Ok(task)
}
/// Register a new task coming from a dump in the scheduler.
/// By taking a mutable ref we're pretty sure no one will ever import a dump while actix is running.
/// Register a new task comming from a dump in the scheduler.
/// By takinig a mutable ref we're pretty sure no one will ever import a dump while actix is running.
pub fn register_dumped_task(
&mut self,
task: TaskDump,
@@ -926,7 +926,7 @@ impl IndexScheduler {
/// 5. Reset the in-memory list of processed tasks.
///
/// Returns the number of processed tasks.
fn tick(&self) -> Result<TickOutcome> {
fn tick(&self) -> Result<usize> {
#[cfg(test)]
{
*self.run_loop_iteration.write().unwrap() += 1;
@@ -937,9 +937,8 @@ impl IndexScheduler {
let batch =
match self.create_next_batch(&rtxn).map_err(|e| Error::CreateBatch(Box::new(e)))? {
Some(batch) => batch,
None => return Ok(TickOutcome::WaitForSignal),
None => return Ok(0),
};
let index_uid = batch.index_uid().map(ToOwned::to_owned);
drop(rtxn);
// 1. store the starting date with the bitmap of processing tasks.
@@ -1010,23 +1009,7 @@ impl IndexScheduler {
// the `started_at` date times and `processings` of the current processing tasks.
// This date time is used by the task cancelation to store the right `started_at`
// date in the task on disk.
return Ok(TickOutcome::TickAgain(0));
}
// If an index said it was full, we need to:
// 1. identify which index is full
// 2. close the associated environment
// 3. resize it
// 4. re-schedule tasks
Err(Error::Milli(milli::Error::UserError(
milli::UserError::MaxDatabaseSizeReached,
))) if index_uid.is_some() => {
// fixme: add index_uid to match to avoid the unwrap
let index_uid = index_uid.unwrap();
// fixme: handle error more gracefully? not sure when this could happen
self.index_mapper.resize_index(&wtxn, &index_uid)?;
wtxn.abort().map_err(Error::HeedTransaction)?;
return Ok(TickOutcome::TickAgain(0));
return Ok(0);
}
// In case of a failure we must get back and patch all the tasks with the error.
Err(err) => {
@@ -1066,7 +1049,7 @@ impl IndexScheduler {
#[cfg(test)]
self.breakpoint(Breakpoint::AfterProcessing);
Ok(TickOutcome::TickAgain(processed_tasks))
Ok(processed_tasks)
}
pub(crate) fn delete_persisted_task_data(&self, task: &Task) -> Result<()> {
@@ -1101,16 +1084,6 @@ impl IndexScheduler {
}
}
/// The outcome of calling the [`IndexScheduler::tick`] function.
pub enum TickOutcome {
/// The scheduler should immediately attempt another `tick`.
///
/// The `usize` field contains the number of processed tasks.
TickAgain(usize),
/// The scheduler should wait for an external signal before attempting another `tick`.
WaitForSignal,
}
#[cfg(test)]
mod tests {
use std::io::{BufWriter, Seek, Write};
@@ -1706,105 +1679,6 @@ mod tests {
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "both_task_succeeded");
}
#[test]
fn document_addition_and_document_deletion() {
let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]);
let content = r#"[
{ "id": 1, "doggo": "jean bob" },
{ "id": 2, "catto": "jorts" },
{ "id": 3, "doggo": "bork" }
]"#;
let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0).unwrap();
let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap();
file.persist().unwrap();
index_scheduler
.register(KindWithContent::DocumentAdditionOrUpdate {
index_uid: S("doggos"),
primary_key: Some(S("id")),
method: ReplaceDocuments,
content_file: uuid,
documents_count,
allow_index_creation: true,
})
.unwrap();
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task");
index_scheduler
.register(KindWithContent::DocumentDeletion {
index_uid: S("doggos"),
documents_ids: vec![S("1"), S("2")],
})
.unwrap();
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_second_task");
handle.advance_one_successful_batch(); // The addition AND deletion should've been batched together
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_processing_the_batch");
let index = index_scheduler.index("doggos").unwrap();
let rtxn = index.read_txn().unwrap();
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
let documents = index
.all_documents(&rtxn)
.unwrap()
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
.collect::<Vec<_>>();
snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents");
}
#[test]
fn document_deletion_and_document_addition() {
let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]);
index_scheduler
.register(KindWithContent::DocumentDeletion {
index_uid: S("doggos"),
documents_ids: vec![S("1"), S("2")],
})
.unwrap();
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_first_task");
let content = r#"[
{ "id": 1, "doggo": "jean bob" },
{ "id": 2, "catto": "jorts" },
{ "id": 3, "doggo": "bork" }
]"#;
let (uuid, mut file) = index_scheduler.create_update_file_with_uuid(0).unwrap();
let documents_count = read_json(content.as_bytes(), file.as_file_mut()).unwrap();
file.persist().unwrap();
index_scheduler
.register(KindWithContent::DocumentAdditionOrUpdate {
index_uid: S("doggos"),
primary_key: Some(S("id")),
method: ReplaceDocuments,
content_file: uuid,
documents_count,
allow_index_creation: true,
})
.unwrap();
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "registered_the_second_task");
// The deletion should have failed because it can't create an index
handle.advance_one_failed_batch();
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_failing_the_deletion");
// The addition should works
handle.advance_one_successful_batch();
snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_last_successful_addition");
let index = index_scheduler.index("doggos").unwrap();
let rtxn = index.read_txn().unwrap();
let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
let field_ids = field_ids_map.ids().collect::<Vec<_>>();
let documents = index
.all_documents(&rtxn)
.unwrap()
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
.collect::<Vec<_>>();
snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents");
}
#[test]
fn do_not_batch_task_of_different_indexes() {
let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]);
@@ -2647,11 +2521,7 @@ mod tests {
let query = Query { index_uids: Some(vec!["catto".to_owned()]), ..Default::default() };
let tasks = index_scheduler
.get_task_ids_from_authorized_indexes(
&rtxn,
&query,
&Some(vec![IndexUidPattern::new_unchecked("doggo")]),
)
.get_task_ids_from_authorized_indexes(&rtxn, &query, &Some(vec!["doggo".to_owned()]))
.unwrap();
// we have asked for only the tasks associated with catto, but are only authorized to retrieve the tasks
// associated with doggo -> empty result
@@ -2659,11 +2529,7 @@ mod tests {
let query = Query::default();
let tasks = index_scheduler
.get_task_ids_from_authorized_indexes(
&rtxn,
&query,
&Some(vec![IndexUidPattern::new_unchecked("doggo")]),
)
.get_task_ids_from_authorized_indexes(&rtxn, &query, &Some(vec!["doggo".to_owned()]))
.unwrap();
// we asked for all the tasks, but we are only authorized to retrieve the doggo tasks
// -> only the index creation of doggo should be returned
@@ -2674,10 +2540,7 @@ mod tests {
.get_task_ids_from_authorized_indexes(
&rtxn,
&query,
&Some(vec![
IndexUidPattern::new_unchecked("catto"),
IndexUidPattern::new_unchecked("doggo"),
]),
&Some(vec!["catto".to_owned(), "doggo".to_owned()]),
)
.unwrap();
// we asked for all the tasks, but we are only authorized to retrieve the doggo and catto tasks
@@ -2725,11 +2588,7 @@ mod tests {
let query = Query { canceled_by: Some(vec![task_cancelation.uid]), ..Query::default() };
let tasks = index_scheduler
.get_task_ids_from_authorized_indexes(
&rtxn,
&query,
&Some(vec![IndexUidPattern::new_unchecked("doggo")]),
)
.get_task_ids_from_authorized_indexes(&rtxn, &query, &Some(vec!["doggo".to_string()]))
.unwrap();
// Return only 1 because the user is not authorized to see task 2
snapshot!(snapshot_bitmap(&tasks), @"[1,]");

View File

@@ -1,42 +0,0 @@
---
source: index-scheduler/src/lib.rs
---
### Autobatching Enabled = true
### Processing Tasks:
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: succeeded, details: { received_documents: 3, indexed_documents: Some(3) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }}
1 {uid: 1, status: succeeded, details: { received_document_ids: 2, deleted_documents: Some(2) }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1", "2"] }}
----------------------------------------------------------------------
### Status:
enqueued []
succeeded [0,1,]
----------------------------------------------------------------------
### Kind:
"documentAdditionOrUpdate" [0,]
"documentDeletion" [1,]
----------------------------------------------------------------------
### Index Tasks:
doggos [0,1,]
----------------------------------------------------------------------
### Index Mapper:
["doggos"]
----------------------------------------------------------------------
### Canceled By:
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [0,]
[timestamp] [1,]
----------------------------------------------------------------------
### Started At:
[timestamp] [0,1,]
----------------------------------------------------------------------
### Finished At:
[timestamp] [0,1,]
----------------------------------------------------------------------
### File Store:
----------------------------------------------------------------------

View File

@@ -1,9 +0,0 @@
---
source: index-scheduler/src/lib.rs
---
[
{
"id": 3,
"doggo": "bork"
}
]

View File

@@ -1,37 +0,0 @@
---
source: index-scheduler/src/lib.rs
---
### Autobatching Enabled = true
### Processing Tasks:
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: enqueued, details: { received_documents: 3, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }}
----------------------------------------------------------------------
### Status:
enqueued [0,]
----------------------------------------------------------------------
### Kind:
"documentAdditionOrUpdate" [0,]
----------------------------------------------------------------------
### Index Tasks:
doggos [0,]
----------------------------------------------------------------------
### Index Mapper:
[]
----------------------------------------------------------------------
### Canceled By:
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [0,]
----------------------------------------------------------------------
### Started At:
----------------------------------------------------------------------
### Finished At:
----------------------------------------------------------------------
### File Store:
00000000-0000-0000-0000-000000000000
----------------------------------------------------------------------

View File

@@ -1,40 +0,0 @@
---
source: index-scheduler/src/lib.rs
---
### Autobatching Enabled = true
### Processing Tasks:
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: enqueued, details: { received_documents: 3, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }}
1 {uid: 1, status: enqueued, details: { received_document_ids: 2, deleted_documents: None }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1", "2"] }}
----------------------------------------------------------------------
### Status:
enqueued [0,1,]
----------------------------------------------------------------------
### Kind:
"documentAdditionOrUpdate" [0,]
"documentDeletion" [1,]
----------------------------------------------------------------------
### Index Tasks:
doggos [0,1,]
----------------------------------------------------------------------
### Index Mapper:
[]
----------------------------------------------------------------------
### Canceled By:
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [0,]
[timestamp] [1,]
----------------------------------------------------------------------
### Started At:
----------------------------------------------------------------------
### Finished At:
----------------------------------------------------------------------
### File Store:
00000000-0000-0000-0000-000000000000
----------------------------------------------------------------------

View File

@@ -1,43 +0,0 @@
---
source: index-scheduler/src/lib.rs
---
### Autobatching Enabled = true
### Processing Tasks:
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: failed, error: ResponseError { code: 200, message: "Index `doggos` not found.", error_code: "index_not_found", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_not_found" }, details: { received_document_ids: 2, deleted_documents: Some(0) }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1", "2"] }}
1 {uid: 1, status: enqueued, details: { received_documents: 3, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }}
----------------------------------------------------------------------
### Status:
enqueued [1,]
failed [0,]
----------------------------------------------------------------------
### Kind:
"documentAdditionOrUpdate" [1,]
"documentDeletion" [0,]
----------------------------------------------------------------------
### Index Tasks:
doggos [0,1,]
----------------------------------------------------------------------
### Index Mapper:
[]
----------------------------------------------------------------------
### Canceled By:
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [0,]
[timestamp] [1,]
----------------------------------------------------------------------
### Started At:
[timestamp] [0,]
----------------------------------------------------------------------
### Finished At:
[timestamp] [0,]
----------------------------------------------------------------------
### File Store:
00000000-0000-0000-0000-000000000000
----------------------------------------------------------------------

View File

@@ -1,45 +0,0 @@
---
source: index-scheduler/src/lib.rs
---
### Autobatching Enabled = true
### Processing Tasks:
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: failed, error: ResponseError { code: 200, message: "Index `doggos` not found.", error_code: "index_not_found", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_not_found" }, details: { received_document_ids: 2, deleted_documents: Some(0) }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1", "2"] }}
1 {uid: 1, status: succeeded, details: { received_documents: 3, indexed_documents: Some(3) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }}
----------------------------------------------------------------------
### Status:
enqueued []
succeeded [1,]
failed [0,]
----------------------------------------------------------------------
### Kind:
"documentAdditionOrUpdate" [1,]
"documentDeletion" [0,]
----------------------------------------------------------------------
### Index Tasks:
doggos [0,1,]
----------------------------------------------------------------------
### Index Mapper:
["doggos"]
----------------------------------------------------------------------
### Canceled By:
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [0,]
[timestamp] [1,]
----------------------------------------------------------------------
### Started At:
[timestamp] [0,]
[timestamp] [1,]
----------------------------------------------------------------------
### Finished At:
[timestamp] [0,]
[timestamp] [1,]
----------------------------------------------------------------------
### File Store:
----------------------------------------------------------------------

View File

@@ -1,17 +0,0 @@
---
source: index-scheduler/src/lib.rs
---
[
{
"id": 1,
"doggo": "jean bob"
},
{
"id": 2,
"catto": "jorts"
},
{
"id": 3,
"doggo": "bork"
}
]

View File

@@ -1,36 +0,0 @@
---
source: index-scheduler/src/lib.rs
---
### Autobatching Enabled = true
### Processing Tasks:
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: enqueued, details: { received_document_ids: 2, deleted_documents: None }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1", "2"] }}
----------------------------------------------------------------------
### Status:
enqueued [0,]
----------------------------------------------------------------------
### Kind:
"documentDeletion" [0,]
----------------------------------------------------------------------
### Index Tasks:
doggos [0,]
----------------------------------------------------------------------
### Index Mapper:
[]
----------------------------------------------------------------------
### Canceled By:
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [0,]
----------------------------------------------------------------------
### Started At:
----------------------------------------------------------------------
### Finished At:
----------------------------------------------------------------------
### File Store:
----------------------------------------------------------------------

View File

@@ -1,40 +0,0 @@
---
source: index-scheduler/src/lib.rs
---
### Autobatching Enabled = true
### Processing Tasks:
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: enqueued, details: { received_document_ids: 2, deleted_documents: None }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1", "2"] }}
1 {uid: 1, status: enqueued, details: { received_documents: 3, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }}
----------------------------------------------------------------------
### Status:
enqueued [0,1,]
----------------------------------------------------------------------
### Kind:
"documentAdditionOrUpdate" [1,]
"documentDeletion" [0,]
----------------------------------------------------------------------
### Index Tasks:
doggos [0,1,]
----------------------------------------------------------------------
### Index Mapper:
[]
----------------------------------------------------------------------
### Canceled By:
----------------------------------------------------------------------
### Enqueued At:
[timestamp] [0,]
[timestamp] [1,]
----------------------------------------------------------------------
### Started At:
----------------------------------------------------------------------
### Finished At:
----------------------------------------------------------------------
### File Store:
00000000-0000-0000-0000-000000000000
----------------------------------------------------------------------

View File

@@ -439,29 +439,20 @@ impl IndexScheduler {
provided_ids: received_document_ids,
deleted_documents,
} => {
assert_eq!(kind.as_kind(), Kind::DocumentDeletion);
let (index_uid, documents_ids) =
if let KindWithContent::DocumentDeletion {
ref index_uid,
ref documents_ids,
} = kind
{
(index_uid, documents_ids)
} else {
unreachable!()
};
assert_eq!(&task_index_uid.unwrap(), index_uid);
if let Some(deleted_documents) = deleted_documents {
assert_eq!(status, Status::Succeeded);
assert!(deleted_documents <= received_document_ids as u64);
assert_eq!(kind.as_kind(), Kind::DocumentDeletion);
match status {
Status::Enqueued | Status::Processing => (),
Status::Succeeded => {
assert!(deleted_documents.unwrap() <= received_document_ids as u64);
assert!(documents_ids.len() == received_document_ids);
}
Status::Failed | Status::Canceled => {
assert!(deleted_documents == Some(0));
assert!(documents_ids.len() == received_document_ids);
match &kind {
KindWithContent::DocumentDeletion { index_uid, documents_ids } => {
assert_eq!(&task_index_uid.unwrap(), index_uid);
assert!(documents_ids.len() >= received_document_ids);
}
_ => panic!(),
}
} else {
assert_ne!(status, Status::Succeeded);
}
}
Details::ClearAll { deleted_documents } => {

View File

@@ -1,16 +1,10 @@
[package]
name = "json-depth-checker"
version = "1.0.0"
edition = "2021"
description = "A library that indicates if a JSON must be flattened"
publish = false
version.workspace = true
authors.workspace = true
# description.workspace = true
homepage.workspace = true
readme.workspace = true
edition.workspace = true
license.workspace = true
[dependencies]
serde_json = "1.0"

View File

@@ -1,14 +1,7 @@
[package]
name = "meili-snap"
publish = false
version.workspace = true
authors.workspace = true
description.workspace = true
homepage.workspace = true
readme.workspace = true
edition.workspace = true
license.workspace = true
version = "1.0.0"
edition = "2021"
[dependencies]
insta = { version = "^1.19.1", features = ["json", "redactions"] }

View File

@@ -1,20 +1,12 @@
[package]
name = "meilisearch-auth"
publish = false
version.workspace = true
authors.workspace = true
description.workspace = true
homepage.workspace = true
readme.workspace = true
edition.workspace = true
license.workspace = true
version = "1.0.0"
edition = "2021"
[dependencies]
base64 = "0.13.1"
enum-iterator = "1.1.3"
hmac = "0.12.1"
maplit = "1.0.2"
meilisearch-types = { path = "../meilisearch-types" }
rand = "0.8.5"
roaring = { version = "0.10.0", features = ["serde"] }

View File

@@ -7,10 +7,9 @@ use std::path::Path;
use std::sync::Arc;
use error::{AuthControllerError, Result};
use maplit::hashset;
use meilisearch_types::index_uid_pattern::IndexUidPattern;
use meilisearch_types::keys::{Action, CreateApiKey, Key, PatchApiKey};
use meilisearch_types::milli::update::Setting;
use meilisearch_types::star_or::StarOr;
use serde::{Deserialize, Serialize};
pub use store::open_auth_store_env;
use store::{generate_key_as_hexa, HeedAuthStore};
@@ -86,12 +85,29 @@ impl AuthController {
search_rules: Option<SearchRules>,
) -> Result<AuthFilter> {
let mut filters = AuthFilter::default();
let key = self.get_key(uid)?;
let key = self
.store
.get_api_key(uid)?
.ok_or_else(|| AuthControllerError::ApiKeyNotFound(uid.to_string()))?;
filters.search_rules = match search_rules {
Some(search_rules) => search_rules,
None => SearchRules::Set(key.indexes.into_iter().collect()),
};
if !key.indexes.iter().any(|i| i == &StarOr::Star) {
filters.search_rules = match search_rules {
// Intersect search_rules with parent key authorized indexes.
Some(search_rules) => SearchRules::Map(
key.indexes
.into_iter()
.filter_map(|index| {
search_rules.get_index_search_rules(&format!("{index}")).map(
|index_search_rules| (index.to_string(), Some(index_search_rules)),
)
})
.collect(),
),
None => SearchRules::Set(key.indexes.into_iter().map(|x| x.to_string()).collect()),
};
} else if let Some(search_rules) = search_rules {
filters.search_rules = search_rules;
}
filters.allow_index_creation = self.is_key_authorized(uid, Action::IndexesAdd, None)?;
@@ -134,7 +150,9 @@ impl AuthController {
.get_expiration_date(uid, action, None)?
.or(match index {
// else check if the key has access to the requested index.
Some(index) => self.store.get_expiration_date(uid, action, Some(index))?,
Some(index) => {
self.store.get_expiration_date(uid, action, Some(index.as_bytes()))?
}
// or to any index if no index has been requested.
None => self.store.prefix_first_expiration_date(uid, action)?,
}) {
@@ -174,54 +192,42 @@ impl Default for AuthFilter {
#[derive(Debug, Serialize, Deserialize, Clone)]
#[serde(untagged)]
pub enum SearchRules {
Set(HashSet<IndexUidPattern>),
Map(HashMap<IndexUidPattern, Option<IndexSearchRules>>),
Set(HashSet<String>),
Map(HashMap<String, Option<IndexSearchRules>>),
}
impl Default for SearchRules {
fn default() -> Self {
Self::Set(hashset! { IndexUidPattern::all() })
Self::Set(Some("*".to_string()).into_iter().collect())
}
}
impl SearchRules {
pub fn is_index_authorized(&self, index: &str) -> bool {
match self {
Self::Set(set) => {
set.contains("*")
|| set.contains(index)
|| set.iter().any(|pattern| pattern.matches_str(index))
}
Self::Map(map) => {
map.contains_key("*")
|| map.contains_key(index)
|| map.keys().any(|pattern| pattern.matches_str(index))
}
Self::Set(set) => set.contains("*") || set.contains(index),
Self::Map(map) => map.contains_key("*") || map.contains_key(index),
}
}
pub fn get_index_search_rules(&self, index: &str) -> Option<IndexSearchRules> {
match self {
Self::Set(_) => {
if self.is_index_authorized(index) {
Self::Set(set) => {
if set.contains("*") || set.contains(index) {
Some(IndexSearchRules::default())
} else {
None
}
}
Self::Map(map) => {
// We must take the most retrictive rule of this index uid patterns set of rules.
map.iter()
.filter(|(pattern, _)| pattern.matches_str(index))
.max_by_key(|(pattern, _)| (pattern.is_exact(), pattern.len()))
.and_then(|(_, rule)| rule.clone())
map.get(index).or_else(|| map.get("*")).map(|isr| isr.clone().unwrap_or_default())
}
}
}
/// Return the list of indexes such that `self.is_index_authorized(index) == true`,
/// or `None` if all indexes satisfy this condition.
pub fn authorized_indexes(&self) -> Option<Vec<IndexUidPattern>> {
pub fn authorized_indexes(&self) -> Option<Vec<String>> {
match self {
SearchRules::Set(set) => {
if set.contains("*") {
@@ -242,7 +248,7 @@ impl SearchRules {
}
impl IntoIterator for SearchRules {
type Item = (IndexUidPattern, IndexSearchRules);
type Item = (String, IndexSearchRules);
type IntoIter = Box<dyn Iterator<Item = Self::Item>>;
fn into_iter(self) -> Self::IntoIter {

View File

@@ -5,21 +5,20 @@ use std::convert::{TryFrom, TryInto};
use std::fs::create_dir_all;
use std::path::Path;
use std::str;
use std::str::FromStr;
use std::sync::Arc;
use hmac::{Hmac, Mac};
use meilisearch_types::index_uid_pattern::IndexUidPattern;
use meilisearch_types::keys::KeyId;
use meilisearch_types::milli;
use meilisearch_types::milli::heed::types::{ByteSlice, DecodeIgnore, SerdeJson};
use meilisearch_types::milli::heed::{Database, Env, EnvOpenOptions, RwTxn};
use meilisearch_types::star_or::StarOr;
use sha2::Sha256;
use time::OffsetDateTime;
use uuid::fmt::Hyphenated;
use uuid::Uuid;
use super::error::{AuthControllerError, Result};
use super::error::Result;
use super::{Action, Key};
const AUTH_STORE_SIZE: usize = 1_073_741_824; //1GiB
@@ -130,7 +129,7 @@ impl HeedAuthStore {
}
}
let no_index_restriction = key.indexes.iter().any(|p| p.matches_all());
let no_index_restriction = key.indexes.contains(&StarOr::Star);
for action in actions {
if no_index_restriction {
// If there is no index restriction we put None.
@@ -215,28 +214,11 @@ impl HeedAuthStore {
&self,
uid: Uuid,
action: Action,
index: Option<&str>,
index: Option<&[u8]>,
) -> Result<Option<Option<OffsetDateTime>>> {
let rtxn = self.env.read_txn()?;
let tuple = (&uid, &action, index.map(|s| s.as_bytes()));
match self.action_keyid_index_expiration.get(&rtxn, &tuple)? {
Some(expiration) => Ok(Some(expiration)),
None => {
let tuple = (&uid, &action, None);
for result in self.action_keyid_index_expiration.prefix_iter(&rtxn, &tuple)? {
let ((_, _, index_uid_pattern), expiration) = result?;
if let Some((pattern, index)) = index_uid_pattern.zip(index) {
let index_uid_pattern = str::from_utf8(pattern)?;
let pattern = IndexUidPattern::from_str(index_uid_pattern)
.map_err(|e| AuthControllerError::Internal(Box::new(e)))?;
if pattern.matches_str(index) {
return Ok(Some(expiration));
}
}
}
Ok(None)
}
}
let tuple = (&uid, &action, index);
Ok(self.action_keyid_index_expiration.get(&rtxn, &tuple)?)
}
pub fn prefix_first_expiration_date(

View File

@@ -1,21 +1,15 @@
[package]
name = "meilisearch-types"
publish = false
version.workspace = true
authors.workspace = true
description.workspace = true
homepage.workspace = true
readme.workspace = true
edition.workspace = true
license.workspace = true
version = "1.0.0"
authors = ["marin <postma.marin@protonmail.com>"]
edition = "2021"
[dependencies]
actix-web = { version = "4.2.1", default-features = false }
anyhow = "1.0.65"
convert_case = "0.6.0"
csv = "1.1.6"
deserr = "0.5.0"
deserr = "0.3.0"
either = { version = "1.6.1", features = ["serde"] }
enum-iterator = "1.1.3"
file-store = { path = "../file-store" }

View File

@@ -0,0 +1,328 @@
/*!
This module implements the error messages of deserialization errors.
We try to:
1. Give a human-readable description of where the error originated.
2. Use the correct terms depending on the format of the request (json/query param)
3. Categorise the type of the error (e.g. missing field, wrong value type, unexpected error, etc.)
*/
use deserr::{ErrorKind, IntoValue, ValueKind, ValuePointerRef};
use super::{DeserrJsonError, DeserrQueryParamError};
use crate::error::{Code, ErrorCode};
/// Return a description of the given location in a Json, preceded by the given article.
/// e.g. `at .key1[8].key2`. If the location is the origin, the given article will not be
/// included in the description.
pub fn location_json_description(location: ValuePointerRef, article: &str) -> String {
fn rec(location: ValuePointerRef) -> String {
match location {
ValuePointerRef::Origin => String::new(),
ValuePointerRef::Key { key, prev } => rec(*prev) + "." + key,
ValuePointerRef::Index { index, prev } => format!("{}[{index}]", rec(*prev)),
}
}
match location {
ValuePointerRef::Origin => String::new(),
_ => {
format!("{article} `{}`", rec(location))
}
}
}
/// Return a description of the list of value kinds for a Json payload.
fn value_kinds_description_json(kinds: &[ValueKind]) -> String {
// Rank each value kind so that they can be sorted (and deduplicated)
// Having a predictable order helps with pattern matching
fn order(kind: &ValueKind) -> u8 {
match kind {
ValueKind::Null => 0,
ValueKind::Boolean => 1,
ValueKind::Integer => 2,
ValueKind::NegativeInteger => 3,
ValueKind::Float => 4,
ValueKind::String => 5,
ValueKind::Sequence => 6,
ValueKind::Map => 7,
}
}
// Return a description of a single value kind, preceded by an article
fn single_description(kind: &ValueKind) -> &'static str {
match kind {
ValueKind::Null => "null",
ValueKind::Boolean => "a boolean",
ValueKind::Integer => "a positive integer",
ValueKind::NegativeInteger => "a negative integer",
ValueKind::Float => "a number",
ValueKind::String => "a string",
ValueKind::Sequence => "an array",
ValueKind::Map => "an object",
}
}
fn description_rec(kinds: &[ValueKind], count_items: &mut usize, message: &mut String) {
let (msg_part, rest): (_, &[ValueKind]) = match kinds {
[] => (String::new(), &[]),
[ValueKind::Integer | ValueKind::NegativeInteger, ValueKind::Float, rest @ ..] => {
("a number".to_owned(), rest)
}
[ValueKind::Integer, ValueKind::NegativeInteger, ValueKind::Float, rest @ ..] => {
("a number".to_owned(), rest)
}
[ValueKind::Integer, ValueKind::NegativeInteger, rest @ ..] => {
("an integer".to_owned(), rest)
}
[a] => (single_description(a).to_owned(), &[]),
[a, rest @ ..] => (single_description(a).to_owned(), rest),
};
if rest.is_empty() {
if *count_items == 0 {
message.push_str(&msg_part);
} else if *count_items == 1 {
message.push_str(&format!(" or {msg_part}"));
} else {
message.push_str(&format!(", or {msg_part}"));
}
} else {
if *count_items == 0 {
message.push_str(&msg_part);
} else {
message.push_str(&format!(", {msg_part}"));
}
*count_items += 1;
description_rec(rest, count_items, message);
}
}
let mut kinds = kinds.to_owned();
kinds.sort_by_key(order);
kinds.dedup();
if kinds.is_empty() {
// Should not happen ideally
"a different value".to_owned()
} else {
let mut message = String::new();
description_rec(kinds.as_slice(), &mut 0, &mut message);
message
}
}
/// Return the JSON string of the value preceded by a description of its kind
fn value_description_with_kind_json(v: &serde_json::Value) -> String {
match v.kind() {
ValueKind::Null => "null".to_owned(),
kind => {
format!(
"{}: `{}`",
value_kinds_description_json(&[kind]),
serde_json::to_string(v).unwrap()
)
}
}
}
impl<C: Default + ErrorCode> deserr::DeserializeError for DeserrJsonError<C> {
fn error<V: IntoValue>(
_self_: Option<Self>,
error: deserr::ErrorKind<V>,
location: ValuePointerRef,
) -> Result<Self, Self> {
let mut message = String::new();
message.push_str(&match error {
ErrorKind::IncorrectValueKind { actual, accepted } => {
let expected = value_kinds_description_json(accepted);
let received = value_description_with_kind_json(&serde_json::Value::from(actual));
let location = location_json_description(location, " at");
format!("Invalid value type{location}: expected {expected}, but found {received}")
}
ErrorKind::MissingField { field } => {
let location = location_json_description(location, " inside");
format!("Missing field `{field}`{location}")
}
ErrorKind::UnknownKey { key, accepted } => {
let location = location_json_description(location, " inside");
format!(
"Unknown field `{}`{location}: expected one of {}",
key,
accepted
.iter()
.map(|accepted| format!("`{}`", accepted))
.collect::<Vec<String>>()
.join(", ")
)
}
ErrorKind::UnknownValue { value, accepted } => {
let location = location_json_description(location, " at");
format!(
"Unknown value `{}`{location}: expected one of {}",
value,
accepted
.iter()
.map(|accepted| format!("`{}`", accepted))
.collect::<Vec<String>>()
.join(", "),
)
}
ErrorKind::Unexpected { msg } => {
let location = location_json_description(location, " at");
format!("Invalid value{location}: {msg}")
}
});
Err(DeserrJsonError::new(message, C::default().error_code()))
}
}
pub fn immutable_field_error(field: &str, accepted: &[&str], code: Code) -> DeserrJsonError {
let msg = format!(
"Immutable field `{field}`: expected one of {}",
accepted
.iter()
.map(|accepted| format!("`{}`", accepted))
.collect::<Vec<String>>()
.join(", ")
);
DeserrJsonError::new(msg, code)
}
/// Return a description of the given location in query parameters, preceded by the
/// given article. e.g. `at key5[2]`. If the location is the origin, the given article
/// will not be included in the description.
pub fn location_query_param_description(location: ValuePointerRef, article: &str) -> String {
fn rec(location: ValuePointerRef) -> String {
match location {
ValuePointerRef::Origin => String::new(),
ValuePointerRef::Key { key, prev } => {
if matches!(prev, ValuePointerRef::Origin) {
key.to_owned()
} else {
rec(*prev) + "." + key
}
}
ValuePointerRef::Index { index, prev } => format!("{}[{index}]", rec(*prev)),
}
}
match location {
ValuePointerRef::Origin => String::new(),
_ => {
format!("{article} `{}`", rec(location))
}
}
}
impl<C: Default + ErrorCode> deserr::DeserializeError for DeserrQueryParamError<C> {
fn error<V: IntoValue>(
_self_: Option<Self>,
error: deserr::ErrorKind<V>,
location: ValuePointerRef,
) -> Result<Self, Self> {
let mut message = String::new();
message.push_str(&match error {
ErrorKind::IncorrectValueKind { actual, accepted } => {
let expected = value_kinds_description_query_param(accepted);
let received = value_description_with_kind_query_param(actual);
let location = location_query_param_description(location, " for parameter");
format!("Invalid value type{location}: expected {expected}, but found {received}")
}
ErrorKind::MissingField { field } => {
let location = location_query_param_description(location, " inside");
format!("Missing parameter `{field}`{location}")
}
ErrorKind::UnknownKey { key, accepted } => {
let location = location_query_param_description(location, " inside");
format!(
"Unknown parameter `{}`{location}: expected one of {}",
key,
accepted
.iter()
.map(|accepted| format!("`{}`", accepted))
.collect::<Vec<String>>()
.join(", ")
)
}
ErrorKind::UnknownValue { value, accepted } => {
let location = location_query_param_description(location, " for parameter");
format!(
"Unknown value `{}`{location}: expected one of {}",
value,
accepted
.iter()
.map(|accepted| format!("`{}`", accepted))
.collect::<Vec<String>>()
.join(", "),
)
}
ErrorKind::Unexpected { msg } => {
let location = location_query_param_description(location, " in parameter");
format!("Invalid value{location}: {msg}")
}
});
Err(DeserrQueryParamError::new(message, C::default().error_code()))
}
}
/// Return a description of the list of value kinds for query parameters
/// Since query parameters are always treated as strings, we always return
/// "a string" for now.
fn value_kinds_description_query_param(_accepted: &[ValueKind]) -> String {
"a string".to_owned()
}
fn value_description_with_kind_query_param<V: IntoValue>(actual: deserr::Value<V>) -> String {
match actual {
deserr::Value::Null => "null".to_owned(),
deserr::Value::Boolean(x) => format!("a boolean: `{x}`"),
deserr::Value::Integer(x) => format!("an integer: `{x}`"),
deserr::Value::NegativeInteger(x) => {
format!("an integer: `{x}`")
}
deserr::Value::Float(x) => {
format!("a number: `{x}`")
}
deserr::Value::String(x) => {
format!("a string: `{x}`")
}
deserr::Value::Sequence(_) => "multiple values".to_owned(),
deserr::Value::Map(_) => "multiple parameters".to_owned(),
}
}
#[cfg(test)]
mod tests {
use deserr::ValueKind;
use crate::deserr::error_messages::value_kinds_description_json;
#[test]
fn test_value_kinds_description_json() {
insta::assert_display_snapshot!(value_kinds_description_json(&[]), @"a different value");
insta::assert_display_snapshot!(value_kinds_description_json(&[ValueKind::Boolean]), @"a boolean");
insta::assert_display_snapshot!(value_kinds_description_json(&[ValueKind::Integer]), @"a positive integer");
insta::assert_display_snapshot!(value_kinds_description_json(&[ValueKind::NegativeInteger]), @"a negative integer");
insta::assert_display_snapshot!(value_kinds_description_json(&[ValueKind::Integer]), @"a positive integer");
insta::assert_display_snapshot!(value_kinds_description_json(&[ValueKind::String]), @"a string");
insta::assert_display_snapshot!(value_kinds_description_json(&[ValueKind::Sequence]), @"an array");
insta::assert_display_snapshot!(value_kinds_description_json(&[ValueKind::Map]), @"an object");
insta::assert_display_snapshot!(value_kinds_description_json(&[ValueKind::Integer, ValueKind::Boolean]), @"a boolean or a positive integer");
insta::assert_display_snapshot!(value_kinds_description_json(&[ValueKind::Null, ValueKind::Integer]), @"null or a positive integer");
insta::assert_display_snapshot!(value_kinds_description_json(&[ValueKind::Sequence, ValueKind::NegativeInteger]), @"a negative integer or an array");
insta::assert_display_snapshot!(value_kinds_description_json(&[ValueKind::Integer, ValueKind::Float]), @"a number");
insta::assert_display_snapshot!(value_kinds_description_json(&[ValueKind::Integer, ValueKind::Float, ValueKind::NegativeInteger]), @"a number");
insta::assert_display_snapshot!(value_kinds_description_json(&[ValueKind::Integer, ValueKind::Float, ValueKind::NegativeInteger, ValueKind::Null]), @"null or a number");
insta::assert_display_snapshot!(value_kinds_description_json(&[ValueKind::Boolean, ValueKind::Integer, ValueKind::Float, ValueKind::NegativeInteger, ValueKind::Null]), @"null, a boolean, or a number");
insta::assert_display_snapshot!(value_kinds_description_json(&[ValueKind::Null, ValueKind::Boolean, ValueKind::Integer, ValueKind::Float, ValueKind::NegativeInteger, ValueKind::Null]), @"null, a boolean, or a number");
}
}

View File

@@ -1,19 +1,18 @@
use std::convert::Infallible;
use std::fmt;
use std::marker::PhantomData;
use std::ops::ControlFlow;
use deserr::errors::{JsonError, QueryParamError};
use deserr::{take_cf_content, DeserializeError, IntoValue, MergeWithError, ValuePointerRef};
use deserr::{DeserializeError, MergeWithError, ValuePointerRef};
use crate::error::deserr_codes::*;
use crate::error::deserr_codes::{self, *};
use crate::error::{
Code, DeserrParseBoolError, DeserrParseIntError, ErrorCode, InvalidTaskDateError,
unwrap_any, Code, DeserrParseBoolError, DeserrParseIntError, ErrorCode, InvalidTaskDateError,
ParseOffsetDateTimeError,
};
use crate::index_uid::IndexUidFormatError;
use crate::tasks::{ParseTaskKindError, ParseTaskStatusError};
pub mod error_messages;
pub mod query_params;
/// Marker type for the Json format
@@ -21,8 +20,8 @@ pub struct DeserrJson;
/// Marker type for the Query Parameter format
pub struct DeserrQueryParam;
pub type DeserrJsonError<C = BadRequest> = DeserrError<DeserrJson, C>;
pub type DeserrQueryParamError<C = BadRequest> = DeserrError<DeserrQueryParam, C>;
pub type DeserrJsonError<C = deserr_codes::BadRequest> = DeserrError<DeserrJson, C>;
pub type DeserrQueryParamError<C = deserr_codes::BadRequest> = DeserrError<DeserrQueryParam, C>;
/// A request deserialization error.
///
@@ -38,7 +37,6 @@ impl<Format, C: Default + ErrorCode> DeserrError<Format, C> {
Self { msg, code, _phantom: PhantomData }
}
}
impl<Format, C: Default + ErrorCode> std::fmt::Debug for DeserrError<Format, C> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("DeserrError").field("msg", &self.msg).field("code", &self.code).finish()
@@ -51,16 +49,6 @@ impl<Format, C: Default + ErrorCode> std::fmt::Display for DeserrError<Format, C
}
}
impl<F, C: Default + ErrorCode> actix_web::ResponseError for DeserrError<F, C> {
fn status_code(&self) -> actix_web::http::StatusCode {
self.code.http()
}
fn error_response(&self) -> actix_web::HttpResponse<actix_web::body::BoxBody> {
crate::error::ResponseError::from_msg(self.msg.to_string(), self.code).error_response()
}
}
impl<Format, C: Default + ErrorCode> std::error::Error for DeserrError<Format, C> {}
impl<Format, C: Default + ErrorCode> ErrorCode for DeserrError<Format, C> {
fn error_code(&self) -> Code {
@@ -76,8 +64,8 @@ impl<Format, C1: Default + ErrorCode, C2: Default + ErrorCode>
_self_: Option<Self>,
other: DeserrError<Format, C2>,
_merge_location: ValuePointerRef,
) -> ControlFlow<Self, Self> {
ControlFlow::Break(DeserrError { msg: other.msg, code: other.code, _phantom: PhantomData })
) -> Result<Self, Self> {
Err(DeserrError { msg: other.msg, code: other.code, _phantom: PhantomData })
}
}
@@ -86,56 +74,17 @@ impl<Format, C: Default + ErrorCode> MergeWithError<Infallible> for DeserrError<
_self_: Option<Self>,
_other: Infallible,
_merge_location: ValuePointerRef,
) -> ControlFlow<Self, Self> {
) -> Result<Self, Self> {
unreachable!()
}
}
impl<C: Default + ErrorCode> DeserializeError for DeserrJsonError<C> {
fn error<V: IntoValue>(
_self_: Option<Self>,
error: deserr::ErrorKind<V>,
location: ValuePointerRef,
) -> ControlFlow<Self, Self> {
ControlFlow::Break(DeserrJsonError::new(
take_cf_content(JsonError::error(None, error, location)).to_string(),
C::default().error_code(),
))
}
}
impl<C: Default + ErrorCode> DeserializeError for DeserrQueryParamError<C> {
fn error<V: IntoValue>(
_self_: Option<Self>,
error: deserr::ErrorKind<V>,
location: ValuePointerRef,
) -> ControlFlow<Self, Self> {
ControlFlow::Break(DeserrQueryParamError::new(
take_cf_content(QueryParamError::error(None, error, location)).to_string(),
C::default().error_code(),
))
}
}
pub fn immutable_field_error(field: &str, accepted: &[&str], code: Code) -> DeserrJsonError {
let msg = format!(
"Immutable field `{field}`: expected one of {}",
accepted
.iter()
.map(|accepted| format!("`{}`", accepted))
.collect::<Vec<String>>()
.join(", ")
);
DeserrJsonError::new(msg, code)
}
// Implement a convenience function to build a `missing_field` error
macro_rules! make_missing_field_convenience_builder {
($err_code:ident, $fn_name:ident) => {
impl DeserrJsonError<$err_code> {
pub fn $fn_name(field: &str, location: ValuePointerRef) -> Self {
let x = deserr::take_cf_content(Self::error::<Infallible>(
let x = unwrap_any(Self::error::<Infallible>(
None,
deserr::ErrorKind::MissingField { field },
location,
@@ -163,7 +112,7 @@ macro_rules! merge_with_error_impl_take_error_message {
_self_: Option<Self>,
other: $err_type,
merge_location: ValuePointerRef,
) -> ControlFlow<Self, Self> {
) -> Result<Self, Self> {
DeserrError::<Format, C>::error::<Infallible>(
None,
deserr::ErrorKind::Unexpected { msg: other.to_string() },

View File

@@ -15,9 +15,10 @@ use std::convert::Infallible;
use std::ops::Deref;
use std::str::FromStr;
use deserr::{DeserializeError, Deserr, MergeWithError, ValueKind};
use deserr::{DeserializeError, DeserializeFromValue, MergeWithError, ValueKind};
use super::{DeserrParseBoolError, DeserrParseIntError};
use crate::error::unwrap_any;
use crate::index_uid::IndexUid;
use crate::tasks::{Kind, Status};
@@ -37,7 +38,7 @@ impl<T> Deref for Param<T> {
}
}
impl<T, E> Deserr<E> for Param<T>
impl<T, E> DeserializeFromValue<E> for Param<T>
where
E: DeserializeError + MergeWithError<T::Err>,
T: FromQueryParameter,
@@ -49,9 +50,9 @@ where
match value {
deserr::Value::String(s) => match T::from_query_param(&s) {
Ok(x) => Ok(Param(x)),
Err(e) => Err(deserr::take_cf_content(E::merge(None, e, location))),
Err(e) => Err(unwrap_any(E::merge(None, e, location))),
},
_ => Err(deserr::take_cf_content(E::error(
_ => Err(unwrap_any(E::error(
None,
deserr::ErrorKind::IncorrectValueKind {
actual: value,

View File

@@ -19,7 +19,7 @@ type Result<T> = std::result::Result<T, DocumentFormatError>;
pub enum PayloadType {
Ndjson,
Json,
Csv { delimiter: u8 },
Csv,
}
impl fmt::Display for PayloadType {
@@ -27,7 +27,7 @@ impl fmt::Display for PayloadType {
match self {
PayloadType::Ndjson => f.write_str("ndjson"),
PayloadType::Json => f.write_str("json"),
PayloadType::Csv { .. } => f.write_str("csv"),
PayloadType::Csv => f.write_str("csv"),
}
}
}
@@ -105,11 +105,11 @@ impl ErrorCode for DocumentFormatError {
}
/// Reads CSV from input and write an obkv batch to writer.
pub fn read_csv(file: &File, writer: impl Write + Seek, delimiter: u8) -> Result<u64> {
pub fn read_csv(file: &File, writer: impl Write + Seek) -> Result<u64> {
let mut builder = DocumentsBatchBuilder::new(writer);
let mmap = unsafe { MmapOptions::new().map(file)? };
let csv = csv::ReaderBuilder::new().delimiter(delimiter).from_reader(mmap.as_ref());
builder.append_csv(csv).map_err(|e| (PayloadType::Csv { delimiter }, e))?;
let csv = csv::Reader::from_reader(mmap.as_ref());
builder.append_csv(csv).map_err(|e| (PayloadType::Csv, e))?;
let count = builder.documents_count();
let _ = builder.into_inner().map_err(DocumentFormatError::Io)?;

View File

@@ -127,7 +127,7 @@ macro_rules! make_error_codes {
}
impl Code {
/// return the HTTP status code associated with the `Code`
pub fn http(&self) -> StatusCode {
fn http(&self) -> StatusCode {
match self {
$(
Code::$code_ident => StatusCode::$status
@@ -220,7 +220,6 @@ InvalidDocumentOffset , InvalidRequest , BAD_REQUEST ;
InvalidIndexLimit , InvalidRequest , BAD_REQUEST ;
InvalidIndexOffset , InvalidRequest , BAD_REQUEST ;
InvalidIndexPrimaryKey , InvalidRequest , BAD_REQUEST ;
InvalidIndexCsvDelimiter , InvalidRequest , BAD_REQUEST ;
InvalidIndexUid , InvalidRequest , BAD_REQUEST ;
InvalidSearchAttributesToCrop , InvalidRequest , BAD_REQUEST ;
InvalidSearchAttributesToHighlight , InvalidRequest , BAD_REQUEST ;
@@ -382,6 +381,14 @@ impl ErrorCode for io::Error {
}
}
/// Unwrap a result, either its Ok or Err value.
pub fn unwrap_any<T>(any: Result<T, T>) -> T {
match any {
Ok(any) => any,
Err(any) => any,
}
}
/// Deserialization when `deserr` cannot parse an API key date.
#[derive(Debug)]
pub struct ParseOffsetDateTimeError(pub String);

View File

@@ -2,14 +2,14 @@ use std::error::Error;
use std::fmt;
use std::str::FromStr;
use deserr::Deserr;
use deserr::DeserializeFromValue;
use crate::error::{Code, ErrorCode};
/// An index uid is composed of only ascii alphanumeric characters, - and _, between 1 and 400
/// bytes long
#[derive(Debug, Clone, PartialEq, Eq, Deserr)]
#[deserr(try_from(String) = IndexUid::try_from -> IndexUidFormatError)]
#[derive(Debug, Clone, PartialEq, Eq, DeserializeFromValue)]
#[deserr(from(String) = IndexUid::try_from -> IndexUidFormatError)]
pub struct IndexUid(String);
impl IndexUid {

View File

@@ -1,124 +0,0 @@
use std::borrow::Borrow;
use std::error::Error;
use std::fmt;
use std::ops::Deref;
use std::str::FromStr;
use deserr::Deserr;
use serde::{Deserialize, Serialize};
use crate::error::{Code, ErrorCode};
use crate::index_uid::{IndexUid, IndexUidFormatError};
/// An index uid pattern is composed of only ascii alphanumeric characters, - and _, between 1 and 400
/// bytes long and optionally ending with a *.
#[derive(Serialize, Deserialize, Deserr, Debug, Clone, PartialEq, Eq, Hash)]
#[deserr(try_from(&String) = FromStr::from_str -> IndexUidPatternFormatError)]
pub struct IndexUidPattern(String);
impl IndexUidPattern {
pub fn new_unchecked(s: impl AsRef<str>) -> Self {
Self(s.as_ref().to_string())
}
/// Matches any index name.
pub fn all() -> Self {
IndexUidPattern::from_str("*").unwrap()
}
/// Returns `true` if it matches any index.
pub fn matches_all(&self) -> bool {
self.0 == "*"
}
/// Returns `true` if the pattern matches a specific index name.
pub fn is_exact(&self) -> bool {
!self.0.ends_with('*')
}
/// Returns wether this index uid matches this index uid pattern.
pub fn matches(&self, uid: &IndexUid) -> bool {
self.matches_str(uid.as_str())
}
/// Returns wether this string matches this index uid pattern.
pub fn matches_str(&self, uid: &str) -> bool {
match self.0.strip_suffix('*') {
Some(prefix) => uid.starts_with(prefix),
None => self.0 == uid,
}
}
}
impl Deref for IndexUidPattern {
type Target = str;
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl Borrow<str> for IndexUidPattern {
fn borrow(&self) -> &str {
&self.0
}
}
impl TryFrom<String> for IndexUidPattern {
type Error = IndexUidPatternFormatError;
fn try_from(uid: String) -> Result<Self, Self::Error> {
let result = match uid.strip_suffix('*') {
Some("") => Ok(IndexUidPattern(uid)),
Some(prefix) => IndexUid::from_str(prefix).map(|_| IndexUidPattern(uid)),
None => IndexUid::try_from(uid).map(IndexUid::into_inner).map(IndexUidPattern),
};
match result {
Ok(index_uid_pattern) => Ok(index_uid_pattern),
Err(IndexUidFormatError { invalid_uid }) => {
Err(IndexUidPatternFormatError { invalid_uid })
}
}
}
}
impl FromStr for IndexUidPattern {
type Err = IndexUidPatternFormatError;
fn from_str(uid: &str) -> Result<IndexUidPattern, IndexUidPatternFormatError> {
uid.to_string().try_into()
}
}
impl From<IndexUidPattern> for String {
fn from(IndexUidPattern(uid): IndexUidPattern) -> Self {
uid
}
}
#[derive(Debug)]
pub struct IndexUidPatternFormatError {
pub invalid_uid: String,
}
impl fmt::Display for IndexUidPatternFormatError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"`{}` is not a valid index uid pattern. Index uid patterns \
can be an integer or a string containing only alphanumeric \
characters, hyphens (-), underscores (_), and \
optionally end with a star (*).",
self.invalid_uid,
)
}
}
impl Error for IndexUidPatternFormatError {}
impl ErrorCode for IndexUidPatternFormatError {
fn error_code(&self) -> Code {
Code::InvalidIndexUid
}
}

View File

@@ -2,7 +2,7 @@ use std::convert::Infallible;
use std::hash::Hash;
use std::str::FromStr;
use deserr::{DeserializeError, Deserr, MergeWithError, ValuePointerRef};
use deserr::{DeserializeError, DeserializeFromValue, ValuePointerRef};
use enum_iterator::Sequence;
use milli::update::Setting;
use serde::{Deserialize, Serialize};
@@ -11,44 +11,31 @@ use time::macros::{format_description, time};
use time::{Date, OffsetDateTime, PrimitiveDateTime};
use uuid::Uuid;
use crate::deserr::{immutable_field_error, DeserrError, DeserrJsonError};
use crate::deserr::error_messages::immutable_field_error;
use crate::deserr::DeserrJsonError;
use crate::error::deserr_codes::*;
use crate::error::{Code, ErrorCode, ParseOffsetDateTimeError};
use crate::index_uid_pattern::{IndexUidPattern, IndexUidPatternFormatError};
use crate::error::{unwrap_any, Code, ParseOffsetDateTimeError};
use crate::index_uid::IndexUid;
use crate::star_or::StarOr;
pub type KeyId = Uuid;
impl<C: Default + ErrorCode> MergeWithError<IndexUidPatternFormatError> for DeserrJsonError<C> {
fn merge(
_self_: Option<Self>,
other: IndexUidPatternFormatError,
merge_location: deserr::ValuePointerRef,
) -> std::ops::ControlFlow<Self, Self> {
DeserrError::error::<Infallible>(
None,
deserr::ErrorKind::Unexpected { msg: other.to_string() },
merge_location,
)
}
}
#[derive(Debug, Deserr)]
#[derive(Debug, DeserializeFromValue)]
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
pub struct CreateApiKey {
#[deserr(default, error = DeserrJsonError<InvalidApiKeyDescription>)]
pub description: Option<String>,
#[deserr(default, error = DeserrJsonError<InvalidApiKeyName>)]
pub name: Option<String>,
#[deserr(default = Uuid::new_v4(), error = DeserrJsonError<InvalidApiKeyUid>, try_from(&String) = Uuid::from_str -> uuid::Error)]
#[deserr(default = Uuid::new_v4(), error = DeserrJsonError<InvalidApiKeyUid>, from(&String) = Uuid::from_str -> uuid::Error)]
pub uid: KeyId,
#[deserr(error = DeserrJsonError<InvalidApiKeyActions>, missing_field_error = DeserrJsonError::missing_api_key_actions)]
pub actions: Vec<Action>,
#[deserr(error = DeserrJsonError<InvalidApiKeyIndexes>, missing_field_error = DeserrJsonError::missing_api_key_indexes)]
pub indexes: Vec<IndexUidPattern>,
#[deserr(error = DeserrJsonError<InvalidApiKeyExpiresAt>, try_from(Option<String>) = parse_expiration_date -> ParseOffsetDateTimeError, missing_field_error = DeserrJsonError::missing_api_key_expires_at)]
pub indexes: Vec<StarOr<IndexUid>>,
#[deserr(error = DeserrJsonError<InvalidApiKeyExpiresAt>, from(Option<String>) = parse_expiration_date -> ParseOffsetDateTimeError, missing_field_error = DeserrJsonError::missing_api_key_expires_at)]
pub expires_at: Option<OffsetDateTime>,
}
impl CreateApiKey {
pub fn to_key(self) -> Key {
let CreateApiKey { description, name, uid, actions, indexes, expires_at } = self;
@@ -78,7 +65,7 @@ fn deny_immutable_fields_api_key(
"expiresAt" => immutable_field_error(field, accepted, Code::ImmutableApiKeyExpiresAt),
"createdAt" => immutable_field_error(field, accepted, Code::ImmutableApiKeyCreatedAt),
"updatedAt" => immutable_field_error(field, accepted, Code::ImmutableApiKeyUpdatedAt),
_ => deserr::take_cf_content(DeserrJsonError::<BadRequest>::error::<Infallible>(
_ => unwrap_any(DeserrJsonError::<BadRequest>::error::<Infallible>(
None,
deserr::ErrorKind::UnknownKey { key: field, accepted },
location,
@@ -86,7 +73,7 @@ fn deny_immutable_fields_api_key(
}
}
#[derive(Debug, Deserr)]
#[derive(Debug, DeserializeFromValue)]
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields = deny_immutable_fields_api_key)]
pub struct PatchApiKey {
#[deserr(default, error = DeserrJsonError<InvalidApiKeyDescription>)]
@@ -103,7 +90,7 @@ pub struct Key {
pub name: Option<String>,
pub uid: KeyId,
pub actions: Vec<Action>,
pub indexes: Vec<IndexUidPattern>,
pub indexes: Vec<StarOr<IndexUid>>,
#[serde(with = "time::serde::rfc3339::option")]
pub expires_at: Option<OffsetDateTime>,
#[serde(with = "time::serde::rfc3339")]
@@ -121,7 +108,7 @@ impl Key {
description: Some("Use it for anything that is not a search operation. Caution! Do not expose it on a public frontend".to_string()),
uid,
actions: vec![Action::All],
indexes: vec![IndexUidPattern::all()],
indexes: vec![StarOr::Star],
expires_at: None,
created_at: now,
updated_at: now,
@@ -136,7 +123,7 @@ impl Key {
description: Some("Use it to search from the frontend".to_string()),
uid,
actions: vec![Action::Search],
indexes: vec![IndexUidPattern::all()],
indexes: vec![StarOr::Star],
expires_at: None,
created_at: now,
updated_at: now,
@@ -181,7 +168,9 @@ fn parse_expiration_date(
}
}
#[derive(Copy, Clone, Serialize, Deserialize, Debug, Eq, PartialEq, Hash, Sequence, Deserr)]
#[derive(
Copy, Clone, Serialize, Deserialize, Debug, Eq, PartialEq, Hash, Sequence, DeserializeFromValue,
)]
#[repr(u8)]
pub enum Action {
#[serde(rename = "*")]

View File

@@ -3,7 +3,6 @@ pub mod deserr;
pub mod document_formats;
pub mod error;
pub mod index_uid;
pub mod index_uid_pattern;
pub mod keys;
pub mod settings;
pub mod star_or;

View File

@@ -3,10 +3,9 @@ use std::convert::Infallible;
use std::fmt;
use std::marker::PhantomData;
use std::num::NonZeroUsize;
use std::ops::ControlFlow;
use std::str::FromStr;
use deserr::{DeserializeError, Deserr, ErrorKind, MergeWithError, ValuePointerRef};
use deserr::{DeserializeError, DeserializeFromValue, ErrorKind, MergeWithError, ValuePointerRef};
use fst::IntoStreamer;
use milli::update::Setting;
use milli::{Criterion, CriterionError, Index, DEFAULT_VALUES_PER_FACET};
@@ -14,6 +13,7 @@ use serde::{Deserialize, Serialize, Serializer};
use crate::deserr::DeserrJsonError;
use crate::error::deserr_codes::*;
use crate::error::unwrap_any;
/// The maximimum number of results that the engine
/// will be able to return in one search call.
@@ -41,7 +41,7 @@ pub struct Checked;
#[derive(Clone, Default, Debug, Serialize, Deserialize, PartialEq, Eq)]
pub struct Unchecked;
impl<E> Deserr<E> for Unchecked
impl<E> DeserializeFromValue<E> for Unchecked
where
E: DeserializeError,
{
@@ -59,13 +59,13 @@ fn validate_min_word_size_for_typo_setting<E: DeserializeError>(
) -> Result<MinWordSizeTyposSetting, E> {
if let (Setting::Set(one), Setting::Set(two)) = (s.one_typo, s.two_typos) {
if one > two {
return Err(deserr::take_cf_content(E::error::<Infallible>(None, ErrorKind::Unexpected { msg: format!("`minWordSizeForTypos` setting is invalid. `oneTypo` and `twoTypos` fields should be between `0` and `255`, and `twoTypos` should be greater or equals to `oneTypo` but found `oneTypo: {one}` and twoTypos: {two}`.") }, location)));
return Err(unwrap_any(E::error::<Infallible>(None, ErrorKind::Unexpected { msg: format!("`minWordSizeForTypos` setting is invalid. `oneTypo` and `twoTypos` fields should be between `0` and `255`, and `twoTypos` should be greater or equals to `oneTypo` but found `oneTypo: {one}` and twoTypos: {two}`.") }, location)));
}
}
Ok(s)
}
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq, Deserr)]
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq, DeserializeFromValue)]
#[serde(deny_unknown_fields, rename_all = "camelCase")]
#[deserr(deny_unknown_fields, rename_all = camelCase, validate = validate_min_word_size_for_typo_setting -> DeserrJsonError<InvalidSettingsTypoTolerance>)]
pub struct MinWordSizeTyposSetting {
@@ -77,7 +77,7 @@ pub struct MinWordSizeTyposSetting {
pub two_typos: Setting<u8>,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq, Deserr)]
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq, DeserializeFromValue)]
#[serde(deny_unknown_fields, rename_all = "camelCase")]
#[deserr(deny_unknown_fields, rename_all = camelCase, where_predicate = __Deserr_E: deserr::MergeWithError<DeserrJsonError<InvalidSettingsTypoTolerance>>)]
pub struct TypoSettings {
@@ -95,7 +95,7 @@ pub struct TypoSettings {
pub disable_on_attributes: Setting<BTreeSet<String>>,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq, Deserr)]
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq, DeserializeFromValue)]
#[serde(deny_unknown_fields, rename_all = "camelCase")]
#[deserr(rename_all = camelCase, deny_unknown_fields)]
pub struct FacetingSettings {
@@ -104,7 +104,7 @@ pub struct FacetingSettings {
pub max_values_per_facet: Setting<usize>,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq, Deserr)]
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq, DeserializeFromValue)]
#[serde(deny_unknown_fields, rename_all = "camelCase")]
#[deserr(rename_all = camelCase, deny_unknown_fields)]
pub struct PaginationSettings {
@@ -118,7 +118,7 @@ impl MergeWithError<milli::CriterionError> for DeserrJsonError<InvalidSettingsRa
_self_: Option<Self>,
other: milli::CriterionError,
merge_location: ValuePointerRef,
) -> ControlFlow<Self, Self> {
) -> Result<Self, Self> {
Self::error::<Infallible>(
None,
ErrorKind::Unexpected { msg: other.to_string() },
@@ -130,7 +130,7 @@ impl MergeWithError<milli::CriterionError> for DeserrJsonError<InvalidSettingsRa
/// Holds all the settings for an index. `T` can either be `Checked` if they represents settings
/// whose validity is guaranteed, or `Unchecked` if they need to be validated. In the later case, a
/// call to `check` will return a `Settings<Checked>` from a `Settings<Unchecked>`.
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq, Deserr)]
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq, DeserializeFromValue)]
#[serde(
deny_unknown_fields,
rename_all = "camelCase",
@@ -509,8 +509,8 @@ pub fn settings(
})
}
#[derive(Debug, Clone, PartialEq, Eq, Deserr)]
#[deserr(try_from(&String) = FromStr::from_str -> CriterionError)]
#[derive(Debug, Clone, PartialEq, Eq, DeserializeFromValue)]
#[deserr(from(&String) = FromStr::from_str -> CriterionError)]
pub enum RankingRuleView {
/// Sorted by decreasing number of matched query terms.
/// Query words at the front of an attribute is considered better than if it was at the back.

View File

@@ -1,13 +1,13 @@
use std::fmt;
use std::marker::PhantomData;
use std::ops::ControlFlow;
use std::str::FromStr;
use deserr::{DeserializeError, Deserr, MergeWithError, ValueKind};
use deserr::{DeserializeError, DeserializeFromValue, MergeWithError, ValueKind};
use serde::de::Visitor;
use serde::{Deserialize, Deserializer, Serialize, Serializer};
use crate::deserr::query_params::FromQueryParameter;
use crate::error::unwrap_any;
/// A type that tries to match either a star (*) or
/// any other thing that implements `FromStr`.
@@ -111,7 +111,7 @@ where
}
}
impl<T, E> Deserr<E> for StarOr<T>
impl<T, E> DeserializeFromValue<E> for StarOr<T>
where
T: FromStr,
E: DeserializeError + MergeWithError<T::Err>,
@@ -127,11 +127,11 @@ where
} else {
match T::from_str(&v) {
Ok(parsed) => Ok(StarOr::Other(parsed)),
Err(e) => Err(deserr::take_cf_content(E::merge(None, e, location))),
Err(e) => Err(unwrap_any(E::merge(None, e, location))),
}
}
}
_ => Err(deserr::take_cf_content(E::error::<V>(
_ => Err(unwrap_any(E::error::<V>(
None,
deserr::ErrorKind::IncorrectValueKind {
actual: value,
@@ -191,7 +191,7 @@ where
}
}
impl<T, E> Deserr<E> for OptionStarOr<T>
impl<T, E> DeserializeFromValue<E> for OptionStarOr<T>
where
E: DeserializeError + MergeWithError<T::Err>,
T: FromQueryParameter,
@@ -205,10 +205,10 @@ where
"*" => Ok(OptionStarOr::Star),
s => match T::from_query_param(s) {
Ok(x) => Ok(OptionStarOr::Other(x)),
Err(e) => Err(deserr::take_cf_content(E::merge(None, e, location))),
Err(e) => Err(unwrap_any(E::merge(None, e, location))),
},
},
_ => Err(deserr::take_cf_content(E::error::<V>(
_ => Err(unwrap_any(E::error::<V>(
None,
deserr::ErrorKind::IncorrectValueKind {
actual: value,
@@ -271,7 +271,7 @@ impl<T> OptionStarOrList<T> {
}
}
impl<T, E> Deserr<E> for OptionStarOrList<T>
impl<T, E> DeserializeFromValue<E> for OptionStarOrList<T>
where
E: DeserializeError + MergeWithError<T::Err>,
T: FromQueryParameter,
@@ -299,10 +299,7 @@ where
Err(e) => {
let location =
if len_cs > 1 { location.push_index(i) } else { location };
error = match E::merge(error, e, location) {
ControlFlow::Continue(e) => Some(e),
ControlFlow::Break(e) => return Err(e),
};
error = Some(E::merge(error, e, location)?);
}
}
}
@@ -317,7 +314,7 @@ where
Ok(OptionStarOrList::List(els))
}
}
_ => Err(deserr::take_cf_content(E::error::<V>(
_ => Err(unwrap_any(E::error::<V>(
None,
deserr::ErrorKind::IncorrectValueKind {
actual: value,

View File

@@ -1,16 +1,10 @@
[package]
authors = ["Quentin de Quelen <quentin@dequelen.me>", "Clément Renault <clement@meilisearch.com>"]
description = "Meilisearch HTTP server"
edition = "2021"
license = "MIT"
name = "meilisearch"
publish = false
version.workspace = true
authors.workspace = true
description.workspace = true
homepage.workspace = true
readme.workspace = true
edition.workspace = true
license.workspace = true
default-run = "meilisearch"
version = "1.0.0"
[dependencies]
actix-cors = "0.6.3"
@@ -25,7 +19,7 @@ byte-unit = { version = "4.0.14", default-features = false, features = ["std", "
bytes = "1.2.1"
clap = { version = "4.0.9", features = ["derive", "env"] }
crossbeam-channel = "0.5.6"
deserr = "0.5.0"
deserr = "0.3.0"
dump = { path = "../dump" }
either = "1.8.0"
env_logger = "0.9.1"
@@ -96,7 +90,7 @@ yaup = "0.2.1"
[build-dependencies]
anyhow = { version = "1.0.65", optional = true }
cargo_toml = { version = "0.14.0", optional = true }
cargo_toml = { version = "0.13.0", optional = true }
hex = { version = "0.4.3", optional = true }
reqwest = { version = "0.11.12", features = ["blocking", "rustls-tls"], default-features = false, optional = true }
sha-1 = { version = "0.10.0", optional = true }
@@ -116,5 +110,5 @@ japanese = ["meilisearch-types/japanese"]
thai = ["meilisearch-types/thai"]
[package.metadata.mini-dashboard]
assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.6/build.zip"
sha1 = "dce0aba16bceab5549edf9f01de89858800f7422"
assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.5/build.zip"
sha1 = "6fe959b78511b32e9ff857fd9fd31740633b9fce"

View File

@@ -1,12 +1,11 @@
use vergen::{vergen, Config, SemverKind};
fn main() {
// Note: any code that needs VERGEN_ environment variables should take care to define them manually in the Dockerfile and pass them
// in the corresponding GitHub workflow (publish_docker.yml).
// This is due to the Dockerfile building the binary outside of the git directory.
let mut config = Config::default();
// allow using non-annotated tags
*config.git_mut().semver_kind_mut() = SemverKind::Lightweight;
// add -dirty suffix when we're not right on the tag
*config.git_mut().semver_dirty_mut() = Some("-dirty");
if let Err(e) = vergen(config) {
println!("cargo:warning=vergen: {}", e);

View File

@@ -11,8 +11,6 @@ pub enum MeilisearchHttpError {
#[error("A Content-Type header is missing. Accepted values for the Content-Type header are: {}",
.0.iter().map(|s| format!("`{}`", s)).collect::<Vec<_>>().join(", "))]
MissingContentType(Vec<String>),
#[error("The Content-Type `{0}` does not support the use of a csv delimiter. The csv delimiter can only be used with the Content-Type `text/csv`.")]
CsvDelimiterWithWrongContentType(String),
#[error(
"The Content-Type `{0}` is invalid. Accepted values for the Content-Type header are: {}",
.1.iter().map(|s| format!("`{}`", s)).collect::<Vec<_>>().join(", ")
@@ -54,7 +52,6 @@ impl ErrorCode for MeilisearchHttpError {
fn error_code(&self) -> Code {
match self {
MeilisearchHttpError::MissingContentType(_) => Code::MissingContentType,
MeilisearchHttpError::CsvDelimiterWithWrongContentType(_) => Code::InvalidContentType,
MeilisearchHttpError::MissingPayload(_) => Code::MissingPayload,
MeilisearchHttpError::InvalidContentType(_, _) => Code::InvalidContentType,
MeilisearchHttpError::DocumentNotFound(_) => Code::DocumentNotFound,

View File

@@ -199,9 +199,6 @@ pub mod policies {
token: &str,
index: Option<&str>,
) -> Option<AuthFilter> {
// A tenant token only has access to the search route which always defines an index.
let index = index?;
// Only search action can be accessed by a tenant token.
if A != actions::SEARCH {
return None;
@@ -209,7 +206,7 @@ pub mod policies {
let uid = extract_key_id(token)?;
// check if parent key is authorized to do the action.
if auth.is_key_authorized(uid, Action::Search, Some(index)).ok()? {
if auth.is_key_authorized(uid, Action::Search, index).ok()? {
// Check if tenant token is valid.
let key = auth.generate_key(uid)?;
let data = decode::<Claims>(
@@ -220,8 +217,10 @@ pub mod policies {
.ok()?;
// Check index access if an index restriction is provided.
if !data.claims.search_rules.is_index_authorized(index) {
return None;
if let Some(index) = index {
if !data.claims.search_rules.is_index_authorized(index) {
return None;
}
}
// Check if token is expired.
@@ -231,10 +230,7 @@ pub mod policies {
}
}
return match auth.get_key_filters(uid, Some(data.claims.search_rules)) {
Ok(auth) if auth.search_rules.is_index_authorized(index) => Some(auth),
_ => None,
};
return auth.get_key_filters(uid, Some(data.claims.search_rules)).ok();
}
None

View File

@@ -0,0 +1,78 @@
use std::fmt::Debug;
use std::future::Future;
use std::marker::PhantomData;
use std::pin::Pin;
use std::task::{Context, Poll};
use actix_web::dev::Payload;
use actix_web::web::Json;
use actix_web::{FromRequest, HttpRequest};
use deserr::{DeserializeError, DeserializeFromValue};
use futures::ready;
use meilisearch_types::error::{ErrorCode, ResponseError};
/// Extractor for typed data from Json request payloads
/// deserialised by deserr.
///
/// # Extractor
/// To extract typed data from a request body, the inner type `T` must implement the
/// [`deserr::DeserializeFromError<E>`] trait. The inner type `E` must implement the
/// [`ErrorCode`](meilisearch_error::ErrorCode) trait.
#[derive(Debug)]
pub struct ValidatedJson<T, E>(pub T, PhantomData<*const E>);
impl<T, E> ValidatedJson<T, E> {
pub fn new(data: T) -> Self {
ValidatedJson(data, PhantomData)
}
pub fn into_inner(self) -> T {
self.0
}
}
impl<T, E> FromRequest for ValidatedJson<T, E>
where
E: DeserializeError + ErrorCode + std::error::Error + 'static,
T: DeserializeFromValue<E>,
{
type Error = actix_web::Error;
type Future = ValidatedJsonExtractFut<T, E>;
#[inline]
fn from_request(req: &HttpRequest, payload: &mut Payload) -> Self::Future {
ValidatedJsonExtractFut {
fut: Json::<serde_json::Value>::from_request(req, payload),
_phantom: PhantomData,
}
}
}
pub struct ValidatedJsonExtractFut<T, E> {
fut: <Json<serde_json::Value> as FromRequest>::Future,
_phantom: PhantomData<*const (T, E)>,
}
impl<T, E> Future for ValidatedJsonExtractFut<T, E>
where
T: DeserializeFromValue<E>,
E: DeserializeError + ErrorCode + std::error::Error + 'static,
{
type Output = Result<ValidatedJson<T, E>, actix_web::Error>;
fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<Self::Output> {
let ValidatedJsonExtractFut { fut, .. } = self.get_mut();
let fut = Pin::new(fut);
let res = ready!(fut.poll(cx));
let res = match res {
Err(err) => Err(err),
Ok(data) => match deserr::deserialize::<_, _, E>(data.into_inner()) {
Ok(data) => Ok(ValidatedJson::new(data)),
Err(e) => Err(ResponseError::from(e).into()),
},
};
Poll::Ready(res)
}
}

View File

@@ -1,4 +1,6 @@
pub mod payload;
#[macro_use]
pub mod authentication;
pub mod json;
pub mod query_parameters;
pub mod sequential_extractor;

View File

@@ -0,0 +1,70 @@
//! A module to parse query parameter with deserr
use std::marker::PhantomData;
use std::{fmt, ops};
use actix_http::Payload;
use actix_utils::future::{err, ok, Ready};
use actix_web::{FromRequest, HttpRequest};
use deserr::{DeserializeError, DeserializeFromValue};
use meilisearch_types::error::{Code, ErrorCode, ResponseError};
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
pub struct QueryParameter<T, E>(pub T, PhantomData<*const E>);
impl<T, E> QueryParameter<T, E> {
/// Unwrap into inner `T` value.
pub fn into_inner(self) -> T {
self.0
}
}
impl<T, E> QueryParameter<T, E>
where
T: DeserializeFromValue<E>,
E: DeserializeError + ErrorCode + std::error::Error + 'static,
{
pub fn from_query(query_str: &str) -> Result<Self, actix_web::Error> {
let value = serde_urlencoded::from_str::<serde_json::Value>(query_str)
.map_err(|e| ResponseError::from_msg(e.to_string(), Code::BadRequest))?;
match deserr::deserialize::<_, _, E>(value) {
Ok(data) => Ok(QueryParameter(data, PhantomData)),
Err(e) => Err(ResponseError::from(e).into()),
}
}
}
impl<T, E> ops::Deref for QueryParameter<T, E> {
type Target = T;
fn deref(&self) -> &T {
&self.0
}
}
impl<T, E> ops::DerefMut for QueryParameter<T, E> {
fn deref_mut(&mut self) -> &mut T {
&mut self.0
}
}
impl<T: fmt::Display, E> fmt::Display for QueryParameter<T, E> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.0.fmt(f)
}
}
impl<T, E> FromRequest for QueryParameter<T, E>
where
T: DeserializeFromValue<E>,
E: DeserializeError + ErrorCode + std::error::Error + 'static,
{
type Error = actix_web::Error;
type Future = Ready<Result<Self, actix_web::Error>>;
#[inline]
fn from_request(req: &HttpRequest, _: &mut Payload) -> Self::Future {
QueryParameter::from_query(req.query_string()).map(ok).unwrap_or_else(err)
}
}

View File

@@ -435,13 +435,18 @@ pub fn configure_metrics_route(config: &mut web::ServiceConfig, enable_metrics_r
/// Returns `Some(prototype_name)` if the following conditions are met on this value:
///
/// 1. starts with `prototype-`,
/// 2. ends with `-<some_number>`,
/// 3. does not end with `<some_number>-<some_number>`.
/// 2. does not end with `dirty-`,
/// 3. ends with `-<some_number>`,
/// 4. does not end with `<some_number>-<some_number>`.
///
/// Otherwise, returns `None`.
pub fn prototype_name() -> Option<&'static str> {
let prototype: &'static str = option_env!("VERGEN_GIT_SEMVER_LIGHTWEIGHT")?;
if prototype.ends_with("-dirty") {
return None;
}
if !prototype.starts_with("prototype-") {
return None;
}

View File

@@ -1,8 +1,7 @@
use std::str;
use actix_web::{web, HttpRequest, HttpResponse};
use deserr::actix_web::{AwebJson, AwebQueryParameter};
use deserr::Deserr;
use deserr::DeserializeFromValue;
use meilisearch_auth::error::AuthControllerError;
use meilisearch_auth::AuthController;
use meilisearch_types::deserr::query_params::Param;
@@ -17,6 +16,8 @@ use uuid::Uuid;
use super::PAGINATION_DEFAULT_LIMIT;
use crate::extractors::authentication::policies::*;
use crate::extractors::authentication::GuardedData;
use crate::extractors::json::ValidatedJson;
use crate::extractors::query_parameters::QueryParameter;
use crate::extractors::sequential_extractor::SeqHandler;
use crate::routes::Pagination;
@@ -36,7 +37,7 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
pub async fn create_api_key(
auth_controller: GuardedData<ActionPolicy<{ actions::KEYS_CREATE }>, AuthController>,
body: AwebJson<CreateApiKey, DeserrJsonError>,
body: ValidatedJson<CreateApiKey, DeserrJsonError>,
_req: HttpRequest,
) -> Result<HttpResponse, ResponseError> {
let v = body.into_inner();
@@ -50,7 +51,7 @@ pub async fn create_api_key(
Ok(HttpResponse::Created().json(res))
}
#[derive(Deserr, Debug, Clone, Copy)]
#[derive(DeserializeFromValue, Debug, Clone, Copy)]
#[deserr(error = DeserrQueryParamError, rename_all = camelCase, deny_unknown_fields)]
pub struct ListApiKeys {
#[deserr(default, error = DeserrQueryParamError<InvalidApiKeyOffset>)]
@@ -58,7 +59,6 @@ pub struct ListApiKeys {
#[deserr(default = Param(PAGINATION_DEFAULT_LIMIT), error = DeserrQueryParamError<InvalidApiKeyLimit>)]
pub limit: Param<usize>,
}
impl ListApiKeys {
fn as_pagination(self) -> Pagination {
Pagination { offset: self.offset.0, limit: self.limit.0 }
@@ -67,7 +67,7 @@ impl ListApiKeys {
pub async fn list_api_keys(
auth_controller: GuardedData<ActionPolicy<{ actions::KEYS_GET }>, AuthController>,
list_api_keys: AwebQueryParameter<ListApiKeys, DeserrQueryParamError>,
list_api_keys: QueryParameter<ListApiKeys, DeserrQueryParamError>,
) -> Result<HttpResponse, ResponseError> {
let paginate = list_api_keys.into_inner().as_pagination();
let page_view = tokio::task::spawn_blocking(move || -> Result<_, AuthControllerError> {
@@ -104,7 +104,7 @@ pub async fn get_api_key(
pub async fn patch_api_key(
auth_controller: GuardedData<ActionPolicy<{ actions::KEYS_UPDATE }>, AuthController>,
body: AwebJson<PatchApiKey, DeserrJsonError>,
body: ValidatedJson<PatchApiKey, DeserrJsonError>,
path: web::Path<AuthParam>,
) -> Result<HttpResponse, ResponseError> {
let key = path.into_inner().key;

View File

@@ -4,16 +4,15 @@ use actix_web::http::header::CONTENT_TYPE;
use actix_web::web::Data;
use actix_web::{web, HttpMessage, HttpRequest, HttpResponse};
use bstr::ByteSlice;
use deserr::actix_web::AwebQueryParameter;
use deserr::Deserr;
use deserr::DeserializeFromValue;
use futures::StreamExt;
use index_scheduler::IndexScheduler;
use log::debug;
use meilisearch_types::deserr::query_params::Param;
use meilisearch_types::deserr::DeserrQueryParamError;
use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError};
use meilisearch_types::document_formats::{read_csv, read_json, read_ndjson, PayloadType};
use meilisearch_types::error::deserr_codes::*;
use meilisearch_types::error::{Code, ResponseError};
use meilisearch_types::error::ResponseError;
use meilisearch_types::heed::RoTxn;
use meilisearch_types::index_uid::IndexUid;
use meilisearch_types::milli::update::IndexDocumentsMethod;
@@ -34,6 +33,7 @@ use crate::error::PayloadError::ReceivePayload;
use crate::extractors::authentication::policies::*;
use crate::extractors::authentication::GuardedData;
use crate::extractors::payload::Payload;
use crate::extractors::query_parameters::QueryParameter;
use crate::extractors::sequential_extractor::SeqHandler;
use crate::routes::{PaginationView, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT};
@@ -67,7 +67,7 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
cfg.service(
web::resource("")
.route(web::get().to(SeqHandler(get_all_documents)))
.route(web::post().to(SeqHandler(replace_documents)))
.route(web::post().to(SeqHandler(add_documents)))
.route(web::put().to(SeqHandler(update_documents)))
.route(web::delete().to(SeqHandler(clear_all_documents))),
)
@@ -80,7 +80,7 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
);
}
#[derive(Debug, Deserr)]
#[derive(Debug, DeserializeFromValue)]
#[deserr(error = DeserrQueryParamError, rename_all = camelCase, deny_unknown_fields)]
pub struct GetDocument {
#[deserr(default, error = DeserrQueryParamError<InvalidDocumentFields>)]
@@ -90,7 +90,7 @@ pub struct GetDocument {
pub async fn get_document(
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_GET }>, Data<IndexScheduler>>,
document_param: web::Path<DocumentParam>,
params: AwebQueryParameter<GetDocument, DeserrQueryParamError>,
params: QueryParameter<GetDocument, DeserrQueryParamError>,
) -> Result<HttpResponse, ResponseError> {
let DocumentParam { index_uid, document_id } = document_param.into_inner();
let index_uid = IndexUid::try_from(index_uid)?;
@@ -125,7 +125,7 @@ pub async fn delete_document(
Ok(HttpResponse::Accepted().json(task))
}
#[derive(Debug, Deserr)]
#[derive(Debug, DeserializeFromValue)]
#[deserr(error = DeserrQueryParamError, rename_all = camelCase, deny_unknown_fields)]
pub struct BrowseQuery {
#[deserr(default, error = DeserrQueryParamError<InvalidDocumentOffset>)]
@@ -139,7 +139,7 @@ pub struct BrowseQuery {
pub async fn get_all_documents(
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_GET }>, Data<IndexScheduler>>,
index_uid: web::Path<String>,
params: AwebQueryParameter<BrowseQuery, DeserrQueryParamError>,
params: QueryParameter<BrowseQuery, DeserrQueryParamError>,
) -> Result<HttpResponse, ResponseError> {
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
debug!("called with params: {:?}", params);
@@ -155,32 +155,17 @@ pub async fn get_all_documents(
Ok(HttpResponse::Ok().json(ret))
}
#[derive(Deserialize, Debug, Deserr)]
#[deserr(error = DeserrQueryParamError, rename_all = camelCase, deny_unknown_fields)]
#[derive(Deserialize, Debug, DeserializeFromValue)]
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
pub struct UpdateDocumentsQuery {
#[deserr(default, error = DeserrQueryParamError<InvalidIndexPrimaryKey>)]
#[deserr(default, error = DeserrJsonError<InvalidIndexPrimaryKey>)]
pub primary_key: Option<String>,
#[deserr(default, try_from(char) = from_char_csv_delimiter -> DeserrQueryParamError<InvalidIndexCsvDelimiter>, error = DeserrQueryParamError<InvalidIndexCsvDelimiter>)]
pub csv_delimiter: Option<u8>,
}
fn from_char_csv_delimiter(
c: char,
) -> Result<Option<u8>, DeserrQueryParamError<InvalidIndexCsvDelimiter>> {
if c.is_ascii() {
Ok(Some(c as u8))
} else {
Err(DeserrQueryParamError::new(
format!("csv delimiter must be an ascii character. Found: `{}`", c),
Code::InvalidIndexCsvDelimiter,
))
}
}
pub async fn replace_documents(
pub async fn add_documents(
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_ADD }>, Data<IndexScheduler>>,
index_uid: web::Path<String>,
params: AwebQueryParameter<UpdateDocumentsQuery, DeserrQueryParamError>,
params: QueryParameter<UpdateDocumentsQuery, DeserrJsonError>,
body: Payload,
req: HttpRequest,
analytics: web::Data<dyn Analytics>,
@@ -198,7 +183,6 @@ pub async fn replace_documents(
index_scheduler,
index_uid,
params.primary_key,
params.csv_delimiter,
body,
IndexDocumentsMethod::ReplaceDocuments,
allow_index_creation,
@@ -211,7 +195,7 @@ pub async fn replace_documents(
pub async fn update_documents(
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_ADD }>, Data<IndexScheduler>>,
index_uid: web::Path<String>,
params: AwebQueryParameter<UpdateDocumentsQuery, DeserrQueryParamError>,
params: QueryParameter<UpdateDocumentsQuery, DeserrJsonError>,
body: Payload,
req: HttpRequest,
analytics: web::Data<dyn Analytics>,
@@ -219,7 +203,6 @@ pub async fn update_documents(
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
debug!("called with params: {:?}", params);
let params = params.into_inner();
analytics.update_documents(&params, index_scheduler.index(&index_uid).is_err(), &req);
@@ -228,8 +211,7 @@ pub async fn update_documents(
extract_mime_type(&req)?,
index_scheduler,
index_uid,
params.primary_key,
params.csv_delimiter,
params.into_inner().primary_key,
body,
IndexDocumentsMethod::UpdateDocuments,
allow_index_creation,
@@ -239,43 +221,26 @@ pub async fn update_documents(
Ok(HttpResponse::Accepted().json(task))
}
#[allow(clippy::too_many_arguments)]
async fn document_addition(
mime_type: Option<Mime>,
index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_ADD }>, Data<IndexScheduler>>,
index_uid: IndexUid,
primary_key: Option<String>,
csv_delimiter: Option<u8>,
mut body: Payload,
method: IndexDocumentsMethod,
allow_index_creation: bool,
) -> Result<SummarizedTaskView, MeilisearchHttpError> {
let format = match (
mime_type.as_ref().map(|m| (m.type_().as_str(), m.subtype().as_str())),
csv_delimiter,
) {
(Some(("application", "json")), None) => PayloadType::Json,
(Some(("application", "x-ndjson")), None) => PayloadType::Ndjson,
(Some(("text", "csv")), None) => PayloadType::Csv { delimiter: b',' },
(Some(("text", "csv")), Some(delimiter)) => PayloadType::Csv { delimiter },
(Some(("application", "json")), Some(_)) => {
return Err(MeilisearchHttpError::CsvDelimiterWithWrongContentType(String::from(
"application/json",
)))
}
(Some(("application", "x-ndjson")), Some(_)) => {
return Err(MeilisearchHttpError::CsvDelimiterWithWrongContentType(String::from(
"application/x-ndjson",
)))
}
(Some((type_, subtype)), _) => {
let format = match mime_type.as_ref().map(|m| (m.type_().as_str(), m.subtype().as_str())) {
Some(("application", "json")) => PayloadType::Json,
Some(("application", "x-ndjson")) => PayloadType::Ndjson,
Some(("text", "csv")) => PayloadType::Csv,
Some((type_, subtype)) => {
return Err(MeilisearchHttpError::InvalidContentType(
format!("{}/{}", type_, subtype),
ACCEPTED_CONTENT_TYPE.clone(),
))
}
(None, _) => {
None => {
return Err(MeilisearchHttpError::MissingContentType(ACCEPTED_CONTENT_TYPE.clone()))
}
};
@@ -320,9 +285,7 @@ async fn document_addition(
let documents_count = tokio::task::spawn_blocking(move || {
let documents_count = match format {
PayloadType::Json => read_json(&read_file, update_file.as_file_mut())?,
PayloadType::Csv { delimiter } => {
read_csv(&read_file, update_file.as_file_mut(), delimiter)?
}
PayloadType::Csv => read_csv(&read_file, update_file.as_file_mut())?,
PayloadType::Ndjson => read_ndjson(&read_file, update_file.as_file_mut())?,
};
// we NEED to persist the file here because we moved the `udpate_file` in another task.

View File

@@ -2,14 +2,14 @@ use std::convert::Infallible;
use actix_web::web::Data;
use actix_web::{web, HttpRequest, HttpResponse};
use deserr::actix_web::{AwebJson, AwebQueryParameter};
use deserr::{DeserializeError, Deserr, ValuePointerRef};
use deserr::{DeserializeError, DeserializeFromValue, ValuePointerRef};
use index_scheduler::IndexScheduler;
use log::debug;
use meilisearch_types::deserr::error_messages::immutable_field_error;
use meilisearch_types::deserr::query_params::Param;
use meilisearch_types::deserr::{immutable_field_error, DeserrJsonError, DeserrQueryParamError};
use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError};
use meilisearch_types::error::deserr_codes::*;
use meilisearch_types::error::{Code, ResponseError};
use meilisearch_types::error::{unwrap_any, Code, ResponseError};
use meilisearch_types::index_uid::IndexUid;
use meilisearch_types::milli::{self, FieldDistribution, Index};
use meilisearch_types::tasks::KindWithContent;
@@ -21,6 +21,8 @@ use super::{Pagination, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT};
use crate::analytics::Analytics;
use crate::extractors::authentication::policies::*;
use crate::extractors::authentication::{AuthenticationError, GuardedData};
use crate::extractors::json::ValidatedJson;
use crate::extractors::query_parameters::QueryParameter;
use crate::extractors::sequential_extractor::SeqHandler;
pub mod documents;
@@ -71,7 +73,7 @@ impl IndexView {
}
}
#[derive(Deserr, Debug, Clone, Copy)]
#[derive(DeserializeFromValue, Debug, Clone, Copy)]
#[deserr(error = DeserrQueryParamError, rename_all = camelCase, deny_unknown_fields)]
pub struct ListIndexes {
#[deserr(default, error = DeserrQueryParamError<InvalidIndexOffset>)]
@@ -87,7 +89,7 @@ impl ListIndexes {
pub async fn list_indexes(
index_scheduler: GuardedData<ActionPolicy<{ actions::INDEXES_GET }>, Data<IndexScheduler>>,
paginate: AwebQueryParameter<ListIndexes, DeserrQueryParamError>,
paginate: QueryParameter<ListIndexes, DeserrQueryParamError>,
) -> Result<HttpResponse, ResponseError> {
let search_rules = &index_scheduler.filters().search_rules;
let indexes: Vec<_> = index_scheduler.indexes()?;
@@ -103,7 +105,7 @@ pub async fn list_indexes(
Ok(HttpResponse::Ok().json(ret))
}
#[derive(Deserr, Debug)]
#[derive(DeserializeFromValue, Debug)]
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
pub struct IndexCreateRequest {
#[deserr(error = DeserrJsonError<InvalidIndexUid>, missing_field_error = DeserrJsonError::missing_index_uid)]
@@ -114,7 +116,7 @@ pub struct IndexCreateRequest {
pub async fn create_index(
index_scheduler: GuardedData<ActionPolicy<{ actions::INDEXES_CREATE }>, Data<IndexScheduler>>,
body: AwebJson<IndexCreateRequest, DeserrJsonError>,
body: ValidatedJson<IndexCreateRequest, DeserrJsonError>,
req: HttpRequest,
analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {
@@ -147,7 +149,7 @@ fn deny_immutable_fields_index(
"uid" => immutable_field_error(field, accepted, Code::ImmutableIndexUid),
"createdAt" => immutable_field_error(field, accepted, Code::ImmutableIndexCreatedAt),
"updatedAt" => immutable_field_error(field, accepted, Code::ImmutableIndexUpdatedAt),
_ => deserr::take_cf_content(DeserrJsonError::<BadRequest>::error::<Infallible>(
_ => unwrap_any(DeserrJsonError::<BadRequest>::error::<Infallible>(
None,
deserr::ErrorKind::UnknownKey { key: field, accepted },
location,
@@ -155,7 +157,7 @@ fn deny_immutable_fields_index(
}
}
#[derive(Deserr, Debug)]
#[derive(DeserializeFromValue, Debug)]
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields = deny_immutable_fields_index)]
pub struct UpdateIndexRequest {
#[deserr(default, error = DeserrJsonError<InvalidIndexPrimaryKey>)]
@@ -179,7 +181,7 @@ pub async fn get_index(
pub async fn update_index(
index_scheduler: GuardedData<ActionPolicy<{ actions::INDEXES_UPDATE }>, Data<IndexScheduler>>,
index_uid: web::Path<String>,
body: AwebJson<UpdateIndexRequest, DeserrJsonError>,
body: ValidatedJson<UpdateIndexRequest, DeserrJsonError>,
req: HttpRequest,
analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {

View File

@@ -1,6 +1,5 @@
use actix_web::web::Data;
use actix_web::{web, HttpRequest, HttpResponse};
use deserr::actix_web::{AwebJson, AwebQueryParameter};
use index_scheduler::IndexScheduler;
use log::debug;
use meilisearch_auth::IndexSearchRules;
@@ -15,6 +14,8 @@ use serde_json::Value;
use crate::analytics::{Analytics, SearchAggregator};
use crate::extractors::authentication::policies::*;
use crate::extractors::authentication::GuardedData;
use crate::extractors::json::ValidatedJson;
use crate::extractors::query_parameters::QueryParameter;
use crate::extractors::sequential_extractor::SeqHandler;
use crate::search::{
perform_search, MatchingStrategy, SearchQuery, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
@@ -30,7 +31,7 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
);
}
#[derive(Debug, deserr::Deserr)]
#[derive(Debug, deserr::DeserializeFromValue)]
#[deserr(error = DeserrQueryParamError, rename_all = camelCase, deny_unknown_fields)]
pub struct SearchQueryGet {
#[deserr(default, error = DeserrQueryParamError<InvalidSearchQ>)]
@@ -149,7 +150,7 @@ fn fix_sort_query_parameters(sort_query: &str) -> Vec<String> {
pub async fn search_with_url_query(
index_scheduler: GuardedData<ActionPolicy<{ actions::SEARCH }>, Data<IndexScheduler>>,
index_uid: web::Path<String>,
params: AwebQueryParameter<SearchQueryGet, DeserrQueryParamError>,
params: QueryParameter<SearchQueryGet, DeserrQueryParamError>,
req: HttpRequest,
analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {
@@ -183,7 +184,7 @@ pub async fn search_with_url_query(
pub async fn search_with_post(
index_scheduler: GuardedData<ActionPolicy<{ actions::SEARCH }>, Data<IndexScheduler>>,
index_uid: web::Path<String>,
params: AwebJson<SearchQuery, DeserrJsonError>,
params: ValidatedJson<SearchQuery, DeserrJsonError>,
req: HttpRequest,
analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {

View File

@@ -1,6 +1,5 @@
use actix_web::web::Data;
use actix_web::{web, HttpRequest, HttpResponse};
use deserr::actix_web::AwebJson;
use index_scheduler::IndexScheduler;
use log::debug;
use meilisearch_types::deserr::DeserrJsonError;
@@ -13,6 +12,7 @@ use serde_json::json;
use crate::analytics::Analytics;
use crate::extractors::authentication::policies::*;
use crate::extractors::authentication::GuardedData;
use crate::extractors::json::ValidatedJson;
use crate::routes::SummarizedTaskView;
#[macro_export]
@@ -68,7 +68,7 @@ macro_rules! make_setting_route {
Data<IndexScheduler>,
>,
index_uid: actix_web::web::Path<String>,
body: deserr::actix_web::AwebJson<Option<$type>, $err_ty>,
body: $crate::routes::indexes::ValidatedJson<Option<$type>, $err_ty>,
req: HttpRequest,
$analytics_var: web::Data<dyn Analytics>,
) -> std::result::Result<HttpResponse, ResponseError> {
@@ -468,7 +468,7 @@ generate_configure!(
pub async fn update_all(
index_scheduler: GuardedData<ActionPolicy<{ actions::SETTINGS_UPDATE }>, Data<IndexScheduler>>,
index_uid: web::Path<String>,
body: AwebJson<Settings<Unchecked>, DeserrJsonError>,
body: ValidatedJson<Settings<Unchecked>, DeserrJsonError>,
req: HttpRequest,
analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {

View File

@@ -17,8 +17,6 @@ use crate::analytics::Analytics;
use crate::extractors::authentication::policies::*;
use crate::extractors::authentication::GuardedData;
const PAGINATION_DEFAULT_LIMIT: usize = 20;
mod api_key;
mod dump;
pub mod indexes;
@@ -36,6 +34,8 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
.service(web::scope("/swap-indexes").configure(swap_indexes::configure));
}
const PAGINATION_DEFAULT_LIMIT: usize = 20;
#[derive(Debug, Serialize)]
#[serde(rename_all = "camelCase")]
pub struct SummarizedTaskView {
@@ -59,7 +59,6 @@ impl From<Task> for SummarizedTaskView {
}
}
}
pub struct Pagination {
pub offset: usize,
pub limit: usize,

View File

@@ -1,7 +1,6 @@
use actix_web::web::Data;
use actix_web::{web, HttpRequest, HttpResponse};
use deserr::actix_web::AwebJson;
use deserr::Deserr;
use deserr::DeserializeFromValue;
use index_scheduler::IndexScheduler;
use meilisearch_types::deserr::DeserrJsonError;
use meilisearch_types::error::deserr_codes::InvalidSwapIndexes;
@@ -15,13 +14,14 @@ use crate::analytics::Analytics;
use crate::error::MeilisearchHttpError;
use crate::extractors::authentication::policies::*;
use crate::extractors::authentication::{AuthenticationError, GuardedData};
use crate::extractors::json::ValidatedJson;
use crate::extractors::sequential_extractor::SeqHandler;
pub fn configure(cfg: &mut web::ServiceConfig) {
cfg.service(web::resource("").route(web::post().to(SeqHandler(swap_indexes))));
}
#[derive(Deserr, Debug, Clone, PartialEq, Eq)]
#[derive(DeserializeFromValue, Debug, Clone, PartialEq, Eq)]
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
pub struct SwapIndexesPayload {
#[deserr(error = DeserrJsonError<InvalidSwapIndexes>, missing_field_error = DeserrJsonError::missing_swap_indexes)]
@@ -30,7 +30,7 @@ pub struct SwapIndexesPayload {
pub async fn swap_indexes(
index_scheduler: GuardedData<ActionPolicy<{ actions::INDEXES_SWAP }>, Data<IndexScheduler>>,
params: AwebJson<Vec<SwapIndexesPayload>, DeserrJsonError>,
params: ValidatedJson<Vec<SwapIndexesPayload>, DeserrJsonError>,
req: HttpRequest,
analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {

View File

@@ -1,7 +1,6 @@
use actix_web::web::Data;
use actix_web::{web, HttpRequest, HttpResponse};
use deserr::actix_web::AwebQueryParameter;
use deserr::Deserr;
use deserr::DeserializeFromValue;
use index_scheduler::{IndexScheduler, Query, TaskId};
use meilisearch_types::deserr::query_params::Param;
use meilisearch_types::deserr::DeserrQueryParamError;
@@ -24,6 +23,7 @@ use super::SummarizedTaskView;
use crate::analytics::Analytics;
use crate::extractors::authentication::policies::*;
use crate::extractors::authentication::GuardedData;
use crate::extractors::query_parameters::QueryParameter;
use crate::extractors::sequential_extractor::SeqHandler;
const DEFAULT_LIMIT: u32 = 20;
@@ -162,7 +162,7 @@ impl From<Details> for DetailsView {
}
}
#[derive(Debug, Deserr)]
#[derive(Debug, DeserializeFromValue)]
#[deserr(error = DeserrQueryParamError, rename_all = camelCase, deny_unknown_fields)]
pub struct TasksFilterQuery {
#[deserr(default = Param(DEFAULT_LIMIT), error = DeserrQueryParamError<InvalidTaskLimit>)]
@@ -181,20 +181,19 @@ pub struct TasksFilterQuery {
#[deserr(default, error = DeserrQueryParamError<InvalidIndexUid>)]
pub index_uids: OptionStarOrList<IndexUid>,
#[deserr(default, error = DeserrQueryParamError<InvalidTaskAfterEnqueuedAt>, try_from(OptionStarOr<String>) = deserialize_date_after -> InvalidTaskDateError)]
#[deserr(default, error = DeserrQueryParamError<InvalidTaskAfterEnqueuedAt>, from(OptionStarOr<String>) = deserialize_date_after -> InvalidTaskDateError)]
pub after_enqueued_at: OptionStarOr<OffsetDateTime>,
#[deserr(default, error = DeserrQueryParamError<InvalidTaskBeforeEnqueuedAt>, try_from(OptionStarOr<String>) = deserialize_date_before -> InvalidTaskDateError)]
#[deserr(default, error = DeserrQueryParamError<InvalidTaskBeforeEnqueuedAt>, from(OptionStarOr<String>) = deserialize_date_before -> InvalidTaskDateError)]
pub before_enqueued_at: OptionStarOr<OffsetDateTime>,
#[deserr(default, error = DeserrQueryParamError<InvalidTaskAfterStartedAt>, try_from(OptionStarOr<String>) = deserialize_date_after -> InvalidTaskDateError)]
#[deserr(default, error = DeserrQueryParamError<InvalidTaskAfterStartedAt>, from(OptionStarOr<String>) = deserialize_date_after -> InvalidTaskDateError)]
pub after_started_at: OptionStarOr<OffsetDateTime>,
#[deserr(default, error = DeserrQueryParamError<InvalidTaskBeforeStartedAt>, try_from(OptionStarOr<String>) = deserialize_date_before -> InvalidTaskDateError)]
#[deserr(default, error = DeserrQueryParamError<InvalidTaskBeforeStartedAt>, from(OptionStarOr<String>) = deserialize_date_before -> InvalidTaskDateError)]
pub before_started_at: OptionStarOr<OffsetDateTime>,
#[deserr(default, error = DeserrQueryParamError<InvalidTaskAfterFinishedAt>, try_from(OptionStarOr<String>) = deserialize_date_after -> InvalidTaskDateError)]
#[deserr(default, error = DeserrQueryParamError<InvalidTaskAfterFinishedAt>, from(OptionStarOr<String>) = deserialize_date_after -> InvalidTaskDateError)]
pub after_finished_at: OptionStarOr<OffsetDateTime>,
#[deserr(default, error = DeserrQueryParamError<InvalidTaskBeforeFinishedAt>, try_from(OptionStarOr<String>) = deserialize_date_before -> InvalidTaskDateError)]
#[deserr(default, error = DeserrQueryParamError<InvalidTaskBeforeFinishedAt>, from(OptionStarOr<String>) = deserialize_date_before -> InvalidTaskDateError)]
pub before_finished_at: OptionStarOr<OffsetDateTime>,
}
impl TasksFilterQuery {
fn into_query(self) -> Query {
Query {
@@ -236,7 +235,7 @@ impl TaskDeletionOrCancelationQuery {
}
}
#[derive(Debug, Deserr)]
#[derive(Debug, DeserializeFromValue)]
#[deserr(error = DeserrQueryParamError, rename_all = camelCase, deny_unknown_fields)]
pub struct TaskDeletionOrCancelationQuery {
#[deserr(default, error = DeserrQueryParamError<InvalidTaskUids>)]
@@ -250,20 +249,19 @@ pub struct TaskDeletionOrCancelationQuery {
#[deserr(default, error = DeserrQueryParamError<InvalidIndexUid>)]
pub index_uids: OptionStarOrList<IndexUid>,
#[deserr(default, error = DeserrQueryParamError<InvalidTaskAfterEnqueuedAt>, try_from(OptionStarOr<String>) = deserialize_date_after -> InvalidTaskDateError)]
#[deserr(default, error = DeserrQueryParamError<InvalidTaskAfterEnqueuedAt>, from(OptionStarOr<String>) = deserialize_date_after -> InvalidTaskDateError)]
pub after_enqueued_at: OptionStarOr<OffsetDateTime>,
#[deserr(default, error = DeserrQueryParamError<InvalidTaskBeforeEnqueuedAt>, try_from(OptionStarOr<String>) = deserialize_date_before -> InvalidTaskDateError)]
#[deserr(default, error = DeserrQueryParamError<InvalidTaskBeforeEnqueuedAt>, from(OptionStarOr<String>) = deserialize_date_before -> InvalidTaskDateError)]
pub before_enqueued_at: OptionStarOr<OffsetDateTime>,
#[deserr(default, error = DeserrQueryParamError<InvalidTaskAfterStartedAt>, try_from(OptionStarOr<String>) = deserialize_date_after -> InvalidTaskDateError)]
#[deserr(default, error = DeserrQueryParamError<InvalidTaskAfterStartedAt>, from(OptionStarOr<String>) = deserialize_date_after -> InvalidTaskDateError)]
pub after_started_at: OptionStarOr<OffsetDateTime>,
#[deserr(default, error = DeserrQueryParamError<InvalidTaskBeforeStartedAt>, try_from(OptionStarOr<String>) = deserialize_date_before -> InvalidTaskDateError)]
#[deserr(default, error = DeserrQueryParamError<InvalidTaskBeforeStartedAt>, from(OptionStarOr<String>) = deserialize_date_before -> InvalidTaskDateError)]
pub before_started_at: OptionStarOr<OffsetDateTime>,
#[deserr(default, error = DeserrQueryParamError<InvalidTaskAfterFinishedAt>, try_from(OptionStarOr<String>) = deserialize_date_after -> InvalidTaskDateError)]
#[deserr(default, error = DeserrQueryParamError<InvalidTaskAfterFinishedAt>, from(OptionStarOr<String>) = deserialize_date_after -> InvalidTaskDateError)]
pub after_finished_at: OptionStarOr<OffsetDateTime>,
#[deserr(default, error = DeserrQueryParamError<InvalidTaskBeforeFinishedAt>, try_from(OptionStarOr<String>) = deserialize_date_before -> InvalidTaskDateError)]
#[deserr(default, error = DeserrQueryParamError<InvalidTaskBeforeFinishedAt>, from(OptionStarOr<String>) = deserialize_date_before -> InvalidTaskDateError)]
pub before_finished_at: OptionStarOr<OffsetDateTime>,
}
impl TaskDeletionOrCancelationQuery {
fn into_query(self) -> Query {
Query {
@@ -286,7 +284,7 @@ impl TaskDeletionOrCancelationQuery {
async fn cancel_tasks(
index_scheduler: GuardedData<ActionPolicy<{ actions::TASKS_CANCEL }>, Data<IndexScheduler>>,
params: AwebQueryParameter<TaskDeletionOrCancelationQuery, DeserrQueryParamError>,
params: QueryParameter<TaskDeletionOrCancelationQuery, DeserrQueryParamError>,
req: HttpRequest,
analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {
@@ -332,7 +330,7 @@ async fn cancel_tasks(
async fn delete_tasks(
index_scheduler: GuardedData<ActionPolicy<{ actions::TASKS_DELETE }>, Data<IndexScheduler>>,
params: AwebQueryParameter<TaskDeletionOrCancelationQuery, DeserrQueryParamError>,
params: QueryParameter<TaskDeletionOrCancelationQuery, DeserrQueryParamError>,
req: HttpRequest,
analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {
@@ -385,7 +383,7 @@ pub struct AllTasks {
async fn get_tasks(
index_scheduler: GuardedData<ActionPolicy<{ actions::TASKS_GET }>, Data<IndexScheduler>>,
params: AwebQueryParameter<TasksFilterQuery, DeserrQueryParamError>,
params: QueryParameter<TasksFilterQuery, DeserrQueryParamError>,
req: HttpRequest,
analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {
@@ -500,7 +498,7 @@ pub fn deserialize_date_before(
#[cfg(test)]
mod tests {
use deserr::Deserr;
use deserr::DeserializeFromValue;
use meili_snap::snapshot;
use meilisearch_types::deserr::DeserrQueryParamError;
use meilisearch_types::error::{Code, ResponseError};
@@ -509,7 +507,7 @@ mod tests {
fn deserr_query_params<T>(j: &str) -> Result<T, ResponseError>
where
T: Deserr<DeserrQueryParamError>,
T: DeserializeFromValue<DeserrQueryParamError>,
{
let value = serde_urlencoded::from_str::<serde_json::Value>(j)
.map_err(|e| ResponseError::from_msg(e.to_string(), Code::BadRequest))?;

View File

@@ -3,7 +3,7 @@ use std::collections::{BTreeMap, BTreeSet, HashSet};
use std::str::FromStr;
use std::time::Instant;
use deserr::Deserr;
use deserr::DeserializeFromValue;
use either::Either;
use meilisearch_types::deserr::DeserrJsonError;
use meilisearch_types::error::deserr_codes::*;
@@ -29,7 +29,7 @@ pub const DEFAULT_CROP_MARKER: fn() -> String = || "…".to_string();
pub const DEFAULT_HIGHLIGHT_PRE_TAG: fn() -> String = || "<em>".to_string();
pub const DEFAULT_HIGHLIGHT_POST_TAG: fn() -> String = || "</em>".to_string();
#[derive(Debug, Clone, Default, PartialEq, Eq, Deserr)]
#[derive(Debug, Clone, Default, PartialEq, Eq, DeserializeFromValue)]
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
pub struct SearchQuery {
#[deserr(default, error = DeserrJsonError<InvalidSearchQ>)]
@@ -74,7 +74,7 @@ impl SearchQuery {
}
}
#[derive(Debug, Clone, PartialEq, Eq, Deserr)]
#[derive(Debug, Clone, PartialEq, Eq, DeserializeFromValue)]
#[deserr(rename_all = camelCase)]
pub enum MatchingStrategy {
/// Remove query words from last to first
@@ -108,7 +108,7 @@ pub struct SearchHit {
pub matches_position: Option<MatchesPosition>,
}
#[derive(Serialize, Debug, Clone, PartialEq, Eq)]
#[derive(Serialize, Debug, Clone, PartialEq)]
#[serde(rename_all = "camelCase")]
pub struct SearchResult {
pub hits: Vec<SearchHit>,
@@ -118,6 +118,8 @@ pub struct SearchResult {
pub hits_info: HitsInfo,
#[serde(skip_serializing_if = "Option::is_none")]
pub facet_distribution: Option<BTreeMap<String, BTreeMap<String, u64>>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub facet_stats: Option<BTreeMap<String, FacetStats>>,
}
#[derive(Serialize, Debug, Clone, PartialEq, Eq)]
@@ -129,6 +131,12 @@ pub enum HitsInfo {
OffsetLimit { limit: usize, offset: usize, estimated_total_hits: usize },
}
#[derive(Serialize, Debug, Clone, PartialEq)]
pub struct FacetStats {
pub min: f64,
pub max: f64,
}
pub fn perform_search(
index: &Index,
query: SearchQuery,
@@ -300,7 +308,7 @@ pub fn perform_search(
HitsInfo::OffsetLimit { limit: query.limit, offset, estimated_total_hits: number_of_hits }
};
let facet_distribution = match query.facets {
let (facet_distribution, facet_stats) = match query.facets {
Some(ref fields) => {
let mut facet_distribution = index.facets_distribution(&rtxn);
@@ -314,18 +322,23 @@ pub fn perform_search(
facet_distribution.facets(fields);
}
let distribution = facet_distribution.candidates(candidates).execute()?;
Some(distribution)
let stats = facet_distribution.compute_stats()?;
(Some(distribution), Some(stats))
}
None => None,
None => (None, None),
};
let facet_stats = facet_stats.map(|stats| {
stats.into_iter().map(|(k, (min, max))| (k, FacetStats { min, max })).collect()
});
let result = SearchResult {
hits: documents,
hits_info,
query: query.q.clone().unwrap_or_default(),
processing_time_ms: before_search.elapsed().as_millis(),
facet_distribution,
facet_stats,
};
Ok(result)
}

View File

@@ -377,7 +377,7 @@ async fn error_add_api_key_invalid_index_uids() {
meili_snap::snapshot!(code, @"400 Bad Request");
meili_snap::snapshot!(meili_snap::json_string!(response, { ".createdAt" => "[ignored]", ".updatedAt" => "[ignored]" }), @r###"
{
"message": "Invalid value at `.indexes[0]`: `invalid index # / \\name with spaces` is not a valid index uid pattern. Index uid patterns can be an integer or a string containing only alphanumeric characters, hyphens (-), underscores (_), and optionally end with a star (*).",
"message": "Invalid value at `.indexes[0]`: `invalid index # / \\name with spaces` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_).",
"code": "invalid_api_key_indexes",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_api_key_indexes"

View File

@@ -77,14 +77,12 @@ static INVALID_RESPONSE: Lazy<Value> = Lazy::new(|| {
})
});
const MASTER_KEY: &str = "MASTER_KEY";
#[actix_rt::test]
async fn error_access_expired_key() {
use std::{thread, time};
let mut server = Server::new_auth().await;
server.use_api_key(MASTER_KEY);
server.use_api_key("MASTER_KEY");
let content = json!({
"indexes": ["products"],
@@ -113,7 +111,7 @@ async fn error_access_expired_key() {
#[actix_rt::test]
async fn error_access_unauthorized_index() {
let mut server = Server::new_auth().await;
server.use_api_key(MASTER_KEY);
server.use_api_key("MASTER_KEY");
let content = json!({
"indexes": ["sales"],
@@ -146,7 +144,7 @@ async fn error_access_unauthorized_action() {
for ((method, route), action) in AUTHORIZATIONS.iter() {
// create a new API key letting only the needed action.
server.use_api_key(MASTER_KEY);
server.use_api_key("MASTER_KEY");
let content = json!({
"indexes": ["products"],
@@ -170,7 +168,7 @@ async fn error_access_unauthorized_action() {
#[actix_rt::test]
async fn access_authorized_master_key() {
let mut server = Server::new_auth().await;
server.use_api_key(MASTER_KEY);
server.use_api_key("MASTER_KEY");
// master key must have access to all routes.
for ((method, route), _) in AUTHORIZATIONS.iter() {
@@ -187,7 +185,7 @@ async fn access_authorized_restricted_index() {
for ((method, route), actions) in AUTHORIZATIONS.iter() {
for action in actions {
// create a new API key letting only the needed action.
server.use_api_key(MASTER_KEY);
server.use_api_key("MASTER_KEY");
let content = json!({
"indexes": ["products"],
@@ -224,7 +222,7 @@ async fn access_authorized_no_index_restriction() {
for ((method, route), actions) in AUTHORIZATIONS.iter() {
for action in actions {
// create a new API key letting only the needed action.
server.use_api_key(MASTER_KEY);
server.use_api_key("MASTER_KEY");
let content = json!({
"indexes": ["*"],
@@ -257,7 +255,7 @@ async fn access_authorized_no_index_restriction() {
#[actix_rt::test]
async fn access_authorized_stats_restricted_index() {
let mut server = Server::new_auth().await;
server.use_admin_key(MASTER_KEY).await;
server.use_admin_key("MASTER_KEY").await;
// create index `test`
let index = server.index("test");
@@ -297,7 +295,7 @@ async fn access_authorized_stats_restricted_index() {
#[actix_rt::test]
async fn access_authorized_stats_no_index_restriction() {
let mut server = Server::new_auth().await;
server.use_admin_key(MASTER_KEY).await;
server.use_admin_key("MASTER_KEY").await;
// create index `test`
let index = server.index("test");
@@ -337,7 +335,7 @@ async fn access_authorized_stats_no_index_restriction() {
#[actix_rt::test]
async fn list_authorized_indexes_restricted_index() {
let mut server = Server::new_auth().await;
server.use_admin_key(MASTER_KEY).await;
server.use_admin_key("MASTER_KEY").await;
// create index `test`
let index = server.index("test");
@@ -378,7 +376,7 @@ async fn list_authorized_indexes_restricted_index() {
#[actix_rt::test]
async fn list_authorized_indexes_no_index_restriction() {
let mut server = Server::new_auth().await;
server.use_admin_key(MASTER_KEY).await;
server.use_admin_key("MASTER_KEY").await;
// create index `test`
let index = server.index("test");
@@ -416,194 +414,10 @@ async fn list_authorized_indexes_no_index_restriction() {
assert!(response.iter().any(|index| index["uid"] == "test"));
}
#[actix_rt::test]
async fn access_authorized_index_patterns() {
let mut server = Server::new_auth().await;
server.use_admin_key(MASTER_KEY).await;
// create products_1 index
let index_1 = server.index("products_1");
let (response, code) = index_1.create(Some("id")).await;
assert_eq!(202, code, "{:?}", &response);
// create products index
let index_ = server.index("products");
let (response, code) = index_.create(Some("id")).await;
assert_eq!(202, code, "{:?}", &response);
// create key with all document access on indices with product_* pattern.
let content = json!({
"indexes": ["products_*"],
"actions": ["documents.*"],
"expiresAt": (OffsetDateTime::now_utc() + Duration::hours(1)).format(&Rfc3339).unwrap(),
});
// Register the key
let (response, code) = server.add_api_key(content).await;
assert_eq!(201, code, "{:?}", &response);
assert!(response["key"].is_string());
// use created key.
let key = response["key"].as_str().unwrap();
server.use_api_key(key);
// refer to products_1 and products with modified api key.
let index_1 = server.index("products_1");
let index_ = server.index("products");
// try to create a index via add documents route
let documents = json!([
{
"id": 1,
"content": "foo",
}
]);
// Adding document to products_1 index. Should succeed with 202
let (response, code) = index_1.add_documents(documents.clone(), None).await;
assert_eq!(202, code, "{:?}", &response);
let task_id = response["taskUid"].as_u64().unwrap();
// Adding document to products index. Should Fail with 403 -- invalid_api_key
let (response, code) = index_.add_documents(documents, None).await;
assert_eq!(403, code, "{:?}", &response);
server.use_api_key(MASTER_KEY);
// refer to products_1 with modified api key.
let index_1 = server.index("products_1");
index_1.wait_task(task_id).await;
let (response, code) = index_1.get_task(task_id).await;
assert_eq!(200, code, "{:?}", &response);
assert_eq!(response["status"], "succeeded");
}
#[actix_rt::test]
async fn raise_error_non_authorized_index_patterns() {
let mut server = Server::new_auth().await;
server.use_admin_key(MASTER_KEY).await;
// create products_1 index
let product_1_index = server.index("products_1");
let (response, code) = product_1_index.create(Some("id")).await;
assert_eq!(202, code, "{:?}", &response);
// create products_2 index
let product_2_index = server.index("products_2");
let (response, code) = product_2_index.create(Some("id")).await;
assert_eq!(202, code, "{:?}", &response);
// create test index
let test_index = server.index("test");
let (response, code) = test_index.create(Some("id")).await;
assert_eq!(202, code, "{:?}", &response);
// create key with all document access on indices with product_* pattern.
let content = json!({
"indexes": ["products_*"],
"actions": ["documents.*"],
"expiresAt": (OffsetDateTime::now_utc() + Duration::hours(1)).format(&Rfc3339).unwrap(),
});
// Register the key
let (response, code) = server.add_api_key(content).await;
assert_eq!(201, code, "{:?}", &response);
assert!(response["key"].is_string());
// use created key.
let key = response["key"].as_str().unwrap();
server.use_api_key(key);
// refer to products_1 and products_2 with modified api key.
let product_1_index = server.index("products_1");
let product_2_index = server.index("products_2");
// refer to test index
let test_index = server.index("test");
// try to create a index via add documents route
let documents = json!([
{
"id": 1,
"content": "foo",
}
]);
// Adding document to products_1 index. Should succeed with 202
let (response, code) = product_1_index.add_documents(documents.clone(), None).await;
assert_eq!(202, code, "{:?}", &response);
let task1_id = response["taskUid"].as_u64().unwrap();
// Adding document to products_2 index. Should succeed with 202
let (response, code) = product_2_index.add_documents(documents.clone(), None).await;
assert_eq!(202, code, "{:?}", &response);
let task2_id = response["taskUid"].as_u64().unwrap();
// Adding document to test index. Should Fail with 403 -- invalid_api_key
let (response, code) = test_index.add_documents(documents, None).await;
assert_eq!(403, code, "{:?}", &response);
server.use_api_key(MASTER_KEY);
// refer to products_1 with modified api key.
let product_1_index = server.index("products_1");
// refer to products_2 with modified api key.
let product_2_index = server.index("products_2");
product_1_index.wait_task(task1_id).await;
product_2_index.wait_task(task2_id).await;
let (response, code) = product_1_index.get_task(task1_id).await;
assert_eq!(200, code, "{:?}", &response);
assert_eq!(response["status"], "succeeded");
let (response, code) = product_1_index.get_task(task2_id).await;
assert_eq!(200, code, "{:?}", &response);
assert_eq!(response["status"], "succeeded");
}
#[actix_rt::test]
async fn pattern_indexes() {
// Create server with master key
let mut server = Server::new_auth().await;
server.use_admin_key(MASTER_KEY).await;
// index.* constraints on products_* index pattern
let content = json!({
"indexes": ["products_*"],
"actions": ["indexes.*"],
"expiresAt": (OffsetDateTime::now_utc() + Duration::hours(1)).format(&Rfc3339).unwrap(),
});
// Generate and use the api key
let (response, code) = server.add_api_key(content).await;
assert_eq!(201, code, "{:?}", &response);
let key = response["key"].as_str().expect("Key is not string");
server.use_api_key(key);
// Create Index products_1 using generated api key
let products_1 = server.index("products_1");
let (response, code) = products_1.create(Some("id")).await;
assert_eq!(202, code, "{:?}", &response);
// Fail to create products_* using generated api key
let products_1 = server.index("products_*");
let (response, code) = products_1.create(Some("id")).await;
assert_eq!(400, code, "{:?}", &response);
// Fail to create test_1 using generated api key
let products_1 = server.index("test_1");
let (response, code) = products_1.create(Some("id")).await;
assert_eq!(403, code, "{:?}", &response);
}
#[actix_rt::test]
async fn list_authorized_tasks_restricted_index() {
let mut server = Server::new_auth().await;
server.use_admin_key(MASTER_KEY).await;
server.use_admin_key("MASTER_KEY").await;
// create index `test`
let index = server.index("test");
@@ -632,6 +446,7 @@ async fn list_authorized_tasks_restricted_index() {
let (response, code) = server.service.get("/tasks").await;
assert_eq!(200, code, "{:?}", &response);
println!("{}", response);
let response = response["results"].as_array().unwrap();
// key should have access on `products` index.
assert!(response.iter().any(|task| task["indexUid"] == "products"));
@@ -643,7 +458,7 @@ async fn list_authorized_tasks_restricted_index() {
#[actix_rt::test]
async fn list_authorized_tasks_no_index_restriction() {
let mut server = Server::new_auth().await;
server.use_admin_key(MASTER_KEY).await;
server.use_admin_key("MASTER_KEY").await;
// create index `test`
let index = server.index("test");
@@ -684,7 +499,7 @@ async fn list_authorized_tasks_no_index_restriction() {
#[actix_rt::test]
async fn error_creating_index_without_action() {
let mut server = Server::new_auth().await;
server.use_api_key(MASTER_KEY);
server.use_api_key("MASTER_KEY");
// create key with access on all indexes.
let content = json!({
@@ -772,7 +587,7 @@ async fn lazy_create_index() {
];
for content in contents {
server.use_api_key(MASTER_KEY);
server.use_api_key("MASTER_KEY");
let (response, code) = server.add_api_key(content).await;
assert_eq!(201, code, "{:?}", &response);
assert!(response["key"].is_string());
@@ -828,114 +643,14 @@ async fn lazy_create_index() {
}
}
#[actix_rt::test]
async fn lazy_create_index_from_pattern() {
let mut server = Server::new_auth().await;
// create key with access on all indexes.
let contents = vec![
json!({
"indexes": ["products_*"],
"actions": ["*"],
"expiresAt": "2050-11-13T00:00:00Z"
}),
json!({
"indexes": ["products_*"],
"actions": ["indexes.*", "documents.*", "settings.*", "tasks.*"],
"expiresAt": "2050-11-13T00:00:00Z"
}),
json!({
"indexes": ["products_*"],
"actions": ["indexes.create", "documents.add", "settings.update", "tasks.get"],
"expiresAt": "2050-11-13T00:00:00Z"
}),
];
for content in contents {
server.use_api_key(MASTER_KEY);
let (response, code) = server.add_api_key(content).await;
assert_eq!(201, code, "{:?}", &response);
assert!(response["key"].is_string());
// use created key.
let key = response["key"].as_str().unwrap();
server.use_api_key(key);
// try to create a index via add documents route
let index = server.index("products_1");
let test = server.index("test");
let documents = json!([
{
"id": 1,
"content": "foo",
}
]);
let (response, code) = index.add_documents(documents.clone(), None).await;
assert_eq!(202, code, "{:?}", &response);
let task_id = response["taskUid"].as_u64().unwrap();
index.wait_task(task_id).await;
let (response, code) = index.get_task(task_id).await;
assert_eq!(200, code, "{:?}", &response);
assert_eq!(response["status"], "succeeded");
// Fail to create test index
let (response, code) = test.add_documents(documents, None).await;
assert_eq!(403, code, "{:?}", &response);
// try to create a index via add settings route
let index = server.index("products_2");
let settings = json!({ "distinctAttribute": "test"});
let (response, code) = index.update_settings(settings).await;
assert_eq!(202, code, "{:?}", &response);
let task_id = response["taskUid"].as_u64().unwrap();
index.wait_task(task_id).await;
let (response, code) = index.get_task(task_id).await;
assert_eq!(200, code, "{:?}", &response);
assert_eq!(response["status"], "succeeded");
// Fail to create test index
let index = server.index("test");
let settings = json!({ "distinctAttribute": "test"});
let (response, code) = index.update_settings(settings).await;
assert_eq!(403, code, "{:?}", &response);
// try to create a index via add specialized settings route
let index = server.index("products_3");
let (response, code) = index.update_distinct_attribute(json!("test")).await;
assert_eq!(202, code, "{:?}", &response);
let task_id = response["taskUid"].as_u64().unwrap();
index.wait_task(task_id).await;
let (response, code) = index.get_task(task_id).await;
assert_eq!(200, code, "{:?}", &response);
assert_eq!(response["status"], "succeeded");
// Fail to create test index
let index = server.index("test");
let settings = json!({ "distinctAttribute": "test"});
let (response, code) = index.update_settings(settings).await;
assert_eq!(403, code, "{:?}", &response);
}
}
#[actix_rt::test]
async fn error_creating_index_without_index() {
let mut server = Server::new_auth().await;
server.use_api_key(MASTER_KEY);
server.use_api_key("MASTER_KEY");
// create key with access on all indexes.
let content = json!({
"indexes": ["unexpected","products_*"],
"indexes": ["unexpected"],
"actions": ["*"],
"expiresAt": "2050-11-13T00:00:00Z"
});
@@ -975,32 +690,4 @@ async fn error_creating_index_without_index() {
let index = server.index("test3");
let (response, code) = index.create(None).await;
assert_eq!(403, code, "{:?}", &response);
// try to create a index via add documents route
let index = server.index("products");
let documents = json!([
{
"id": 1,
"content": "foo",
}
]);
let (response, code) = index.add_documents(documents, None).await;
assert_eq!(403, code, "{:?}", &response);
// try to create a index via add settings route
let index = server.index("products");
let settings = json!({ "distinctAttribute": "test"});
let (response, code) = index.update_settings(settings).await;
assert_eq!(403, code, "{:?}", &response);
// try to create a index via add specialized settings route
let index = server.index("products");
let (response, code) = index.update_distinct_attribute(json!("test")).await;
assert_eq!(403, code, "{:?}", &response);
// try to create a index via create index route
let index = server.index("products");
let (response, code) = index.create(None).await;
assert_eq!(403, code, "{:?}", &response);
}

View File

@@ -120,7 +120,7 @@ async fn create_api_key_bad_indexes() {
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Invalid value at `.indexes[0]`: `good doggo` is not a valid index uid pattern. Index uid patterns can be an integer or a string containing only alphanumeric characters, hyphens (-), underscores (_), and optionally end with a star (*).",
"message": "Invalid value at `.indexes[0]`: `good doggo` is not a valid index uid. Index uid can be an integer or a string containing only alphanumeric characters, hyphens (-) and underscores (_).",
"code": "invalid_api_key_indexes",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_api_key_indexes"
@@ -138,7 +138,7 @@ async fn create_api_key_bad_expires_at() {
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Unknown field `expires_at`: did you mean `expiresAt`? expected one of `description`, `name`, `uid`, `actions`, `indexes`, `expiresAt`",
"message": "Unknown field `expires_at`: expected one of `description`, `name`, `uid`, `actions`, `indexes`, `expiresAt`",
"code": "bad_request",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request"
@@ -150,7 +150,7 @@ async fn create_api_key_bad_expires_at() {
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Unknown field `expires_at`: did you mean `expiresAt`? expected one of `description`, `name`, `uid`, `actions`, `indexes`, `expiresAt`",
"message": "Unknown field `expires_at`: expected one of `description`, `name`, `uid`, `actions`, `indexes`, `expiresAt`",
"code": "bad_request",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request"

View File

@@ -82,11 +82,6 @@ static ACCEPTED_KEYS: Lazy<Vec<Value>> = Lazy::new(|| {
"actions": ["search"],
"expiresAt": (OffsetDateTime::now_utc() + Duration::days(1)).format(&Rfc3339).unwrap()
}),
json!({
"indexes": ["sal*", "prod*"],
"actions": ["search"],
"expiresAt": (OffsetDateTime::now_utc() + Duration::days(1)).format(&Rfc3339).unwrap()
}),
]
});
@@ -109,11 +104,6 @@ static REFUSED_KEYS: Lazy<Vec<Value>> = Lazy::new(|| {
"actions": ["*"],
"expiresAt": (OffsetDateTime::now_utc() + Duration::days(1)).format(&Rfc3339).unwrap()
}),
json!({
"indexes": ["prod*", "p*"],
"actions": ["*"],
"expiresAt": (OffsetDateTime::now_utc() + Duration::days(1)).format(&Rfc3339).unwrap()
}),
json!({
"indexes": ["products"],
"actions": ["search"],
@@ -255,10 +245,6 @@ async fn search_authorized_simple_token() {
"searchRules" => json!(["sales"]),
"exp" => Value::Null
},
hashmap! {
"searchRules" => json!(["sa*"]),
"exp" => Value::Null
},
];
compute_authorized_search!(tenant_tokens, {}, 5);
@@ -365,19 +351,11 @@ async fn filter_search_authorized_filter_token() {
}),
"exp" => json!((OffsetDateTime::now_utc() + Duration::hours(1)).unix_timestamp())
},
hashmap! {
"searchRules" => json!({
"*": {},
"sal*": {"filter": ["color = blue"]}
}),
"exp" => json!((OffsetDateTime::now_utc() + Duration::hours(1)).unix_timestamp())
},
];
compute_authorized_search!(tenant_tokens, "color = yellow", 1);
}
/// Tests that those Tenant Token are incompatible with the REFUSED_KEYS defined above.
#[actix_rt::test]
async fn error_search_token_forbidden_parent_key() {
let tenant_tokens = vec![
@@ -405,10 +383,6 @@ async fn error_search_token_forbidden_parent_key() {
"searchRules" => json!(["sales"]),
"exp" => json!((OffsetDateTime::now_utc() + Duration::hours(1)).unix_timestamp())
},
hashmap! {
"searchRules" => json!(["sali*", "s*", "sales*"]),
"exp" => json!((OffsetDateTime::now_utc() + Duration::hours(1)).unix_timestamp())
},
];
compute_forbidden_search!(tenant_tokens, REFUSED_KEYS);

View File

@@ -30,7 +30,7 @@ impl Index<'_> {
.post_str(
url,
include_str!("../assets/test_set.json"),
vec![("content-type", "application/json")],
("content-type", "application/json"),
)
.await;
assert_eq!(code, 202);
@@ -46,7 +46,7 @@ impl Index<'_> {
.post_str(
url,
include_str!("../assets/test_set.ndjson"),
vec![("content-type", "application/x-ndjson")],
("content-type", "application/x-ndjson"),
)
.await;
assert_eq!(code, 202);
@@ -96,21 +96,6 @@ impl Index<'_> {
self.service.post_encoded(url, documents, self.encoder).await
}
pub async fn raw_add_documents(
&self,
payload: &str,
content_type: Option<&str>,
query_parameter: &str,
) -> (Value, StatusCode) {
let url = format!("/indexes/{}/documents{}", urlencode(self.uid.as_ref()), query_parameter);
if let Some(content_type) = content_type {
self.service.post_str(url, payload, vec![("Content-Type", content_type)]).await
} else {
self.service.post_str(url, payload, Vec::new()).await
}
}
pub async fn update_documents(
&self,
documents: Value,
@@ -125,21 +110,6 @@ impl Index<'_> {
self.service.put_encoded(url, documents, self.encoder).await
}
pub async fn raw_update_documents(
&self,
payload: &str,
content_type: Option<&str>,
query_parameter: &str,
) -> (Value, StatusCode) {
let url = format!("/indexes/{}/documents{}", urlencode(self.uid.as_ref()), query_parameter);
if let Some(content_type) = content_type {
self.service.put_str(url, payload, vec![("Content-Type", content_type)]).await
} else {
self.service.put_str(url, payload, Vec::new()).await
}
}
pub async fn wait_task(&self, update_id: u64) -> Value {
// try several times to get status, or panic to not wait forever
let url = format!("/tasks/{}", update_id);

View File

@@ -34,18 +34,17 @@ impl Service {
self.request(req).await
}
/// Send a test post request from a text body.
/// Send a test post request from a text body, with a `content-type:application/json` header.
pub async fn post_str(
&self,
url: impl AsRef<str>,
body: impl AsRef<str>,
headers: Vec<(&str, &str)>,
header: (&str, &str),
) -> (Value, StatusCode) {
let mut req =
test::TestRequest::post().uri(url.as_ref()).set_payload(body.as_ref().to_string());
for header in headers {
req = req.insert_header(header);
}
let req = test::TestRequest::post()
.uri(url.as_ref())
.set_payload(body.as_ref().to_string())
.insert_header(header);
self.request(req).await
}
@@ -58,21 +57,6 @@ impl Service {
self.put_encoded(url, body, Encoder::Plain).await
}
/// Send a test put request from a text body.
pub async fn put_str(
&self,
url: impl AsRef<str>,
body: impl AsRef<str>,
headers: Vec<(&str, &str)>,
) -> (Value, StatusCode) {
let mut req =
test::TestRequest::put().uri(url.as_ref()).set_payload(body.as_ref().to_string());
for header in headers {
req = req.insert_header(header);
}
self.request(req).await
}
pub async fn put_encoded(
&self,
url: impl AsRef<str>,

View File

@@ -216,133 +216,6 @@ async fn add_single_document_with_every_encoding() {
}
}
#[actix_rt::test]
async fn add_csv_document() {
let server = Server::new().await;
let index = server.index("pets");
let document = "#id,name,race
0,jean,bernese mountain
1,jorts,orange cat";
let (response, code) = index.raw_update_documents(document, Some("text/csv"), "").await;
snapshot!(code, @"202 Accepted");
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
{
"taskUid": 0,
"indexUid": "pets",
"status": "enqueued",
"type": "documentAdditionOrUpdate",
"enqueuedAt": "[date]"
}
"###);
let response = index.wait_task(response["taskUid"].as_u64().unwrap()).await;
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]" }), @r###"
{
"uid": 0,
"indexUid": "pets",
"status": "succeeded",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 2,
"indexedDocuments": 2
},
"error": null,
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
let (documents, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await;
snapshot!(code, @"200 OK");
snapshot!(json_string!(documents), @r###"
{
"results": [
{
"#id": "0",
"name": "jean",
"race": "bernese mountain"
},
{
"#id": "1",
"name": "jorts",
"race": "orange cat"
}
],
"offset": 0,
"limit": 20,
"total": 2
}
"###);
}
#[actix_rt::test]
async fn add_csv_document_with_custom_delimiter() {
let server = Server::new().await;
let index = server.index("pets");
let document = "#id|name|race
0|jean|bernese mountain
1|jorts|orange cat";
let (response, code) =
index.raw_update_documents(document, Some("text/csv"), "?csvDelimiter=|").await;
snapshot!(code, @"202 Accepted");
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
{
"taskUid": 0,
"indexUid": "pets",
"status": "enqueued",
"type": "documentAdditionOrUpdate",
"enqueuedAt": "[date]"
}
"###);
let response = index.wait_task(response["taskUid"].as_u64().unwrap()).await;
snapshot!(json_string!(response, { ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]" }), @r###"
{
"uid": 0,
"indexUid": "pets",
"status": "succeeded",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 2,
"indexedDocuments": 2
},
"error": null,
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
let (documents, code) = index.get_all_documents(GetAllDocumentsOptions::default()).await;
snapshot!(code, @"200 OK");
snapshot!(json_string!(documents), @r###"
{
"results": [
{
"#id": "0",
"name": "jean",
"race": "bernese mountain"
},
{
"#id": "1",
"name": "jorts",
"race": "orange cat"
}
],
"offset": 0,
"limit": 20,
"total": 2
}
"###);
}
/// any other content-type is must be refused
#[actix_rt::test]
async fn error_add_documents_test_bad_content_types() {
@@ -1154,53 +1027,6 @@ async fn error_document_field_limit_reached() {
@"");
}
#[actix_rt::test]
async fn add_documents_with_geo_field() {
let server = Server::new().await;
let index = server.index("doggo");
index.update_settings(json!({"sortableAttributes": ["_geo"]})).await;
let documents = json!([
{
"id": "1",
},
{
"id": "2",
"_geo": null,
},
{
"id": "3",
"_geo": { "lat": 1, "lng": 1 },
},
{
"id": "4",
"_geo": { "lat": "1", "lng": "1" },
},
]);
index.add_documents(documents, None).await;
let response = index.wait_task(1).await;
snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }),
@r###"
{
"uid": 1,
"indexUid": "doggo",
"status": "succeeded",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 4,
"indexedDocuments": 4
},
"error": null,
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
}
#[actix_rt::test]
async fn add_documents_invalid_geo_field() {
let server = Server::new().await;

View File

@@ -1,6 +1,5 @@
use meili_snap::*;
use serde_json::json;
use urlencoding::encode;
use crate::common::Server;
@@ -98,323 +97,3 @@ async fn delete_documents_batch() {
}
"###);
}
#[actix_rt::test]
async fn replace_documents_missing_payload() {
let server = Server::new().await;
let index = server.index("test");
let (response, code) = index.raw_add_documents("", Some("application/json"), "").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "A json payload is missing.",
"code": "missing_payload",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#missing_payload"
}
"###);
let (response, code) = index.raw_add_documents("", Some("application/x-ndjson"), "").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "A ndjson payload is missing.",
"code": "missing_payload",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#missing_payload"
}
"###);
let (response, code) = index.raw_add_documents("", Some("text/csv"), "").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "A csv payload is missing.",
"code": "missing_payload",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#missing_payload"
}
"###);
}
#[actix_rt::test]
async fn update_documents_missing_payload() {
let server = Server::new().await;
let index = server.index("test");
let (response, code) = index.raw_update_documents("", Some("application/json"), "").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "A json payload is missing.",
"code": "missing_payload",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#missing_payload"
}
"###);
let (response, code) = index.raw_update_documents("", Some("application/x-ndjson"), "").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "A ndjson payload is missing.",
"code": "missing_payload",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#missing_payload"
}
"###);
let (response, code) = index.raw_update_documents("", Some("text/csv"), "").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "A csv payload is missing.",
"code": "missing_payload",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#missing_payload"
}
"###);
}
#[actix_rt::test]
async fn replace_documents_missing_content_type() {
let server = Server::new().await;
let index = server.index("test");
let (response, code) = index.raw_add_documents("", None, "").await;
snapshot!(code, @"415 Unsupported Media Type");
snapshot!(json_string!(response), @r###"
{
"message": "A Content-Type header is missing. Accepted values for the Content-Type header are: `application/json`, `application/x-ndjson`, `text/csv`",
"code": "missing_content_type",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#missing_content_type"
}
"###);
// even with a csv delimiter specified this error is triggered first
let (response, code) = index.raw_add_documents("", None, "?csvDelimiter=;").await;
snapshot!(code, @"415 Unsupported Media Type");
snapshot!(json_string!(response), @r###"
{
"message": "A Content-Type header is missing. Accepted values for the Content-Type header are: `application/json`, `application/x-ndjson`, `text/csv`",
"code": "missing_content_type",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#missing_content_type"
}
"###);
}
#[actix_rt::test]
async fn update_documents_missing_content_type() {
let server = Server::new().await;
let index = server.index("test");
let (response, code) = index.raw_update_documents("", None, "").await;
snapshot!(code, @"415 Unsupported Media Type");
snapshot!(json_string!(response), @r###"
{
"message": "A Content-Type header is missing. Accepted values for the Content-Type header are: `application/json`, `application/x-ndjson`, `text/csv`",
"code": "missing_content_type",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#missing_content_type"
}
"###);
// even with a csv delimiter specified this error is triggered first
let (response, code) = index.raw_update_documents("", None, "?csvDelimiter=;").await;
snapshot!(code, @"415 Unsupported Media Type");
snapshot!(json_string!(response), @r###"
{
"message": "A Content-Type header is missing. Accepted values for the Content-Type header are: `application/json`, `application/x-ndjson`, `text/csv`",
"code": "missing_content_type",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#missing_content_type"
}
"###);
}
#[actix_rt::test]
async fn replace_documents_bad_content_type() {
let server = Server::new().await;
let index = server.index("test");
let (response, code) = index.raw_add_documents("", Some("doggo"), "").await;
snapshot!(code, @"415 Unsupported Media Type");
snapshot!(json_string!(response), @r###"
{
"message": "The Content-Type `doggo` is invalid. Accepted values for the Content-Type header are: `application/json`, `application/x-ndjson`, `text/csv`",
"code": "invalid_content_type",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_content_type"
}
"###);
}
#[actix_rt::test]
async fn update_documents_bad_content_type() {
let server = Server::new().await;
let index = server.index("test");
let (response, code) = index.raw_update_documents("", Some("doggo"), "").await;
snapshot!(code, @"415 Unsupported Media Type");
snapshot!(json_string!(response), @r###"
{
"message": "The Content-Type `doggo` is invalid. Accepted values for the Content-Type header are: `application/json`, `application/x-ndjson`, `text/csv`",
"code": "invalid_content_type",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_content_type"
}
"###);
}
#[actix_rt::test]
async fn replace_documents_bad_csv_delimiter() {
let server = Server::new().await;
let index = server.index("test");
let (response, code) =
index.raw_add_documents("", Some("application/json"), "?csvDelimiter").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Invalid value in parameter `csvDelimiter`: expected a string of one character, but found an empty string",
"code": "invalid_index_csv_delimiter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_index_csv_delimiter"
}
"###);
let (response, code) =
index.raw_add_documents("", Some("application/json"), "?csvDelimiter=doggo").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Invalid value in parameter `csvDelimiter`: expected a string of one character, but found the following string of 5 characters: `doggo`",
"code": "invalid_index_csv_delimiter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_index_csv_delimiter"
}
"###);
let (response, code) = index
.raw_add_documents("", Some("application/json"), &format!("?csvDelimiter={}", encode("🍰")))
.await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "csv delimiter must be an ascii character. Found: `🍰`",
"code": "invalid_index_csv_delimiter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_index_csv_delimiter"
}
"###);
}
#[actix_rt::test]
async fn update_documents_bad_csv_delimiter() {
let server = Server::new().await;
let index = server.index("test");
let (response, code) =
index.raw_update_documents("", Some("application/json"), "?csvDelimiter").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Invalid value in parameter `csvDelimiter`: expected a string of one character, but found an empty string",
"code": "invalid_index_csv_delimiter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_index_csv_delimiter"
}
"###);
let (response, code) =
index.raw_update_documents("", Some("application/json"), "?csvDelimiter=doggo").await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Invalid value in parameter `csvDelimiter`: expected a string of one character, but found the following string of 5 characters: `doggo`",
"code": "invalid_index_csv_delimiter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_index_csv_delimiter"
}
"###);
let (response, code) = index
.raw_update_documents(
"",
Some("application/json"),
&format!("?csvDelimiter={}", encode("🍰")),
)
.await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "csv delimiter must be an ascii character. Found: `🍰`",
"code": "invalid_index_csv_delimiter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_index_csv_delimiter"
}
"###);
}
#[actix_rt::test]
async fn replace_documents_csv_delimiter_with_bad_content_type() {
let server = Server::new().await;
let index = server.index("test");
let (response, code) =
index.raw_add_documents("", Some("application/json"), "?csvDelimiter=a").await;
snapshot!(code, @"415 Unsupported Media Type");
snapshot!(json_string!(response), @r###"
{
"message": "The Content-Type `application/json` does not support the use of a csv delimiter. The csv delimiter can only be used with the Content-Type `text/csv`.",
"code": "invalid_content_type",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_content_type"
}
"###);
let (response, code) =
index.raw_add_documents("", Some("application/x-ndjson"), "?csvDelimiter=a").await;
snapshot!(code, @"415 Unsupported Media Type");
snapshot!(json_string!(response), @r###"
{
"message": "The Content-Type `application/x-ndjson` does not support the use of a csv delimiter. The csv delimiter can only be used with the Content-Type `text/csv`.",
"code": "invalid_content_type",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_content_type"
}
"###);
}
#[actix_rt::test]
async fn update_documents_csv_delimiter_with_bad_content_type() {
let server = Server::new().await;
let index = server.index("test");
let (response, code) =
index.raw_update_documents("", Some("application/json"), "?csvDelimiter=a").await;
snapshot!(code, @"415 Unsupported Media Type");
snapshot!(json_string!(response), @r###"
{
"message": "The Content-Type `application/json` does not support the use of a csv delimiter. The csv delimiter can only be used with the Content-Type `text/csv`.",
"code": "invalid_content_type",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_content_type"
}
"###);
let (response, code) =
index.raw_update_documents("", Some("application/x-ndjson"), "?csvDelimiter=a").await;
snapshot!(code, @"415 Unsupported Media Type");
snapshot!(json_string!(response), @r###"
{
"message": "The Content-Type `application/x-ndjson` does not support the use of a csv delimiter. The csv delimiter can only be used with the Content-Type `text/csv`.",
"code": "invalid_content_type",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_content_type"
}
"###);
}

View File

@@ -1,15 +1,8 @@
[package]
name = "milli"
version = "1.0.0"
authors = ["Kerollmops <clement@meilisearch.com>"]
edition = "2018"
publish = false
version.workspace = true
authors.workspace = true
description.workspace = true
homepage.workspace = true
readme.workspace = true
# edition.workspace = true
license.workspace = true
[dependencies]
bimap = { version = "0.6.2", features = ["serde"] }
@@ -19,7 +12,7 @@ byteorder = "1.4.3"
charabia = { version = "0.7.0", default-features = false }
concat-arrays = "0.1.2"
crossbeam-channel = "0.5.6"
deserr = "0.5.0"
deserr = "0.3.0"
either = "1.8.0"
flatten-serde-json = { path = "../flatten-serde-json" }
fst = "0.4.7"

View File

@@ -7,31 +7,45 @@ use serde::{Deserialize, Serialize};
use thiserror::Error;
use crate::error::is_reserved_keyword;
use crate::search::facet::BadGeoError;
use crate::{CriterionError, Error, UserError};
/// This error type is never supposed to be shown to the end user.
/// You must always cast it to a sort error or a criterion error.
#[derive(Error, Debug)]
#[derive(Debug)]
pub enum AscDescError {
#[error(transparent)]
GeoError(BadGeoError),
#[error("Invalid syntax for the asc/desc parameter: expected expression ending by `:asc` or `:desc`, found `{name}`.")]
InvalidLatitude,
InvalidLongitude,
InvalidSyntax { name: String },
#[error("`{name}` is a reserved keyword and thus can't be used as a asc/desc rule.")]
ReservedKeyword { name: String },
}
impl From<BadGeoError> for AscDescError {
fn from(geo_error: BadGeoError) -> Self {
AscDescError::GeoError(geo_error)
impl fmt::Display for AscDescError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
Self::InvalidLatitude => {
write!(f, "Latitude must be contained between -90 and 90 degrees.",)
}
Self::InvalidLongitude => {
write!(f, "Longitude must be contained between -180 and 180 degrees.",)
}
Self::InvalidSyntax { name } => {
write!(f, "Invalid syntax for the asc/desc parameter: expected expression ending by `:asc` or `:desc`, found `{}`.", name)
}
Self::ReservedKeyword { name } => {
write!(
f,
"`{}` is a reserved keyword and thus can't be used as a asc/desc rule.",
name
)
}
}
}
}
impl From<AscDescError> for CriterionError {
fn from(error: AscDescError) -> Self {
match error {
AscDescError::GeoError(_) => {
AscDescError::InvalidLatitude | AscDescError::InvalidLongitude => {
CriterionError::ReservedNameForSort { name: "_geoPoint".to_string() }
}
AscDescError::InvalidSyntax { name } => CriterionError::InvalidName { name },
@@ -71,9 +85,9 @@ impl FromStr for Member {
.map_err(|_| AscDescError::ReservedKeyword { name: text.to_string() })
})?;
if !(-90.0..=90.0).contains(&lat) {
return Err(BadGeoError::Lat(lat))?;
return Err(AscDescError::InvalidLatitude)?;
} else if !(-180.0..=180.0).contains(&lng) {
return Err(BadGeoError::Lng(lng))?;
return Err(AscDescError::InvalidLongitude)?;
}
Ok(Member::Geo([lat, lng]))
}
@@ -148,8 +162,10 @@ impl FromStr for AscDesc {
#[derive(Error, Debug)]
pub enum SortError {
#[error(transparent)]
ParseGeoError { error: BadGeoError },
#[error("{}", AscDescError::InvalidLatitude)]
InvalidLatitude,
#[error("{}", AscDescError::InvalidLongitude)]
InvalidLongitude,
#[error("Invalid syntax for the geo parameter: expected expression formated like \
`_geoPoint(latitude, longitude)` and ending by `:asc` or `:desc`, found `{name}`.")]
BadGeoPointUsage { name: String },
@@ -168,7 +184,8 @@ pub enum SortError {
impl From<AscDescError> for SortError {
fn from(error: AscDescError) -> Self {
match error {
AscDescError::GeoError(error) => SortError::ParseGeoError { error },
AscDescError::InvalidLatitude => SortError::InvalidLatitude,
AscDescError::InvalidLongitude => SortError::InvalidLongitude,
AscDescError::InvalidSyntax { name } => SortError::InvalidName { name },
AscDescError::ReservedKeyword { name } if name.starts_with("_geoPoint") => {
SortError::BadGeoPointUsage { name }
@@ -260,11 +277,11 @@ mod tests {
),
("_geoPoint(35, 85, 75):asc", ReservedKeyword { name: S("_geoPoint(35, 85, 75)") }),
("_geoPoint(18):asc", ReservedKeyword { name: S("_geoPoint(18)") }),
("_geoPoint(200, 200):asc", GeoError(BadGeoError::Lat(200.))),
("_geoPoint(90.000001, 0):asc", GeoError(BadGeoError::Lat(90.000001))),
("_geoPoint(0, -180.000001):desc", GeoError(BadGeoError::Lng(-180.000001))),
("_geoPoint(159.256, 130):asc", GeoError(BadGeoError::Lat(159.256))),
("_geoPoint(12, -2021):desc", GeoError(BadGeoError::Lng(-2021.))),
("_geoPoint(200, 200):asc", InvalidLatitude),
("_geoPoint(90.000001, 0):asc", InvalidLatitude),
("_geoPoint(0, -180.000001):desc", InvalidLongitude),
("_geoPoint(159.256, 130):asc", InvalidLatitude),
("_geoPoint(12, -2021):desc", InvalidLongitude),
];
for (req, expected_error) in invalid_req {

View File

@@ -1,5 +1,6 @@
use std::mem::take;
use heed::BytesDecode;
use itertools::Itertools;
use log::debug;
use ordered_float::OrderedFloat;
@@ -7,7 +8,7 @@ use roaring::RoaringBitmap;
use super::{Criterion, CriterionParameters, CriterionResult};
use crate::facet::FacetType;
use crate::heed_codec::facet::FacetGroupKeyCodec;
use crate::heed_codec::facet::{FacetGroupKeyCodec, OrderedF64Codec};
use crate::heed_codec::ByteSliceRefCodec;
use crate::search::criteria::{resolve_query_tree, CriteriaBuilder, InitialCandidates};
use crate::search::facet::{ascending_facet_sort, descending_facet_sort};
@@ -196,6 +197,38 @@ fn facet_ordered_iterative<'t>(
Ok(Box::new(number_iter.chain(string_iter).map(Ok)) as Box<dyn Iterator<Item = _>>)
}
fn facet_extreme_value<'t>(
mut extreme_it: impl Iterator<Item = heed::Result<(RoaringBitmap, &'t [u8])>> + 't,
) -> Result<Option<f64>> {
let extreme_value =
if let Some(extreme_value) = extreme_it.next() { extreme_value } else { return Ok(None) };
let (_, extreme_value) = extreme_value?;
Ok(OrderedF64Codec::bytes_decode(extreme_value))
}
pub fn facet_min_value<'t>(
index: &'t Index,
rtxn: &'t heed::RoTxn,
field_id: FieldId,
candidates: RoaringBitmap,
) -> Result<Option<f64>> {
let db = index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>();
let it = ascending_facet_sort(rtxn, db, field_id, candidates)?;
facet_extreme_value(it)
}
pub fn facet_max_value<'t>(
index: &'t Index,
rtxn: &'t heed::RoTxn,
field_id: FieldId,
candidates: RoaringBitmap,
) -> Result<Option<f64>> {
let db = index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>();
let it = descending_facet_sort(rtxn, db, field_id, candidates)?;
facet_extreme_value(it)
}
fn facet_ordered_set_based<'t>(
index: &'t Index,
rtxn: &'t heed::RoTxn,
@@ -203,23 +236,24 @@ fn facet_ordered_set_based<'t>(
is_ascending: bool,
candidates: RoaringBitmap,
) -> Result<Box<dyn Iterator<Item = heed::Result<RoaringBitmap>> + 't>> {
let make_iter = if is_ascending { ascending_facet_sort } else { descending_facet_sort };
let number_db =
index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>();
let string_db =
index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>();
let number_iter = make_iter(
rtxn,
index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(),
field_id,
candidates.clone(),
)?;
let (number_iter, string_iter) = if is_ascending {
let number_iter = ascending_facet_sort(rtxn, number_db, field_id, candidates.clone())?;
let string_iter = ascending_facet_sort(rtxn, string_db, field_id, candidates)?;
let string_iter = make_iter(
rtxn,
index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(),
field_id,
candidates,
)?;
(itertools::Either::Left(number_iter), itertools::Either::Left(string_iter))
} else {
let number_iter = descending_facet_sort(rtxn, number_db, field_id, candidates.clone())?;
let string_iter = descending_facet_sort(rtxn, string_db, field_id, candidates)?;
Ok(Box::new(number_iter.chain(string_iter)))
(itertools::Either::Right(number_iter), itertools::Either::Right(string_iter))
};
Ok(Box::new(number_iter.chain(string_iter).map(|res| res.map(|(doc_ids, _)| doc_ids))))
}
/// Returns an iterator over groups of the given candidates in ascending or descending order.

View File

@@ -123,7 +123,7 @@ impl<'t> Criterion for Attribute<'t> {
None => {
return Ok(Some(CriterionResult {
query_tree: Some(query_tree),
candidates: Some(allowed_candidates),
candidates: Some(RoaringBitmap::new()),
filtered_candidates: None,
initial_candidates: Some(self.initial_candidates.take()),
}));

View File

@@ -21,6 +21,7 @@ use crate::update::{MAX_LENGTH_FOR_PREFIX_PROXIMITY_DB, MAX_PROXIMITY_FOR_PREFIX
use crate::{AscDesc as AscDescName, DocumentId, FieldId, Index, Member, Result};
mod asc_desc;
pub use asc_desc::{facet_max_value, facet_min_value};
mod attribute;
mod exactness;
pub mod r#final;

View File

@@ -278,6 +278,65 @@ impl<'a> FacetDistribution<'a> {
}
}
pub fn compute_stats(&self) -> Result<BTreeMap<String, (f64, f64)>> {
let fields_ids_map = self.index.fields_ids_map(self.rtxn)?;
let filterable_fields = self.index.filterable_fields(self.rtxn)?;
let candidates = if let Some(candidates) = self.candidates.clone() {
candidates
} else {
return Ok(Default::default());
};
let fields = match &self.facets {
Some(facets) => {
let invalid_fields: HashSet<_> = facets
.iter()
.filter(|facet| !crate::is_faceted(facet, &filterable_fields))
.collect();
if !invalid_fields.is_empty() {
return Err(UserError::InvalidFacetsDistribution {
invalid_facets_name: invalid_fields.into_iter().cloned().collect(),
valid_facets_name: filterable_fields.into_iter().collect(),
}
.into());
} else {
facets.clone()
}
}
None => filterable_fields,
};
let mut distribution = BTreeMap::new();
for (fid, name) in fields_ids_map.iter() {
if crate::is_faceted(name, &fields) {
let min_value = if let Some(min_value) = crate::search::criteria::facet_min_value(
self.index,
self.rtxn,
fid,
candidates.clone(),
)? {
min_value
} else {
continue;
};
let max_value = if let Some(max_value) = crate::search::criteria::facet_max_value(
self.index,
self.rtxn,
fid,
candidates.clone(),
)? {
max_value
} else {
continue;
};
distribution.insert(name.to_string(), (min_value, max_value));
}
}
Ok(distribution)
}
pub fn execute(&self) -> Result<BTreeMap<String, BTreeMap<String, u64>>> {
let fields_ids_map = self.index.fields_ids_map(self.rtxn)?;
let filterable_fields = self.index.filterable_fields(self.rtxn)?;
@@ -537,4 +596,216 @@ mod tests {
milli_snap!(format!("{map:?}"), "candidates_0_5_000", @"825f23a4090d05756f46176987b7d992");
}
#[test]
fn facet_stats() {
let mut index = TempIndex::new_with_map_size(4096 * 10_000);
index.index_documents_config.autogenerate_docids = true;
index
.update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") }))
.unwrap();
let facet_values = (0..1000).into_iter().collect::<Vec<_>>();
let mut documents = vec![];
for i in 0..1000 {
let document = serde_json::json!({
"colour": facet_values[i % 1000],
})
.as_object()
.unwrap()
.clone();
documents.push(document);
}
let documents = documents_batch_reader_from_objects(documents);
index.add_documents(documents).unwrap();
let txn = index.read_txn().unwrap();
let map = FacetDistribution::new(&txn, &index)
.facets(std::iter::once("colour"))
.compute_stats()
.unwrap();
milli_snap!(format!("{map:?}"), "no_candidates", @"{}");
let map = FacetDistribution::new(&txn, &index)
.facets(std::iter::once("colour"))
.candidates((0..1000).into_iter().collect())
.compute_stats()
.unwrap();
milli_snap!(format!("{map:?}"), "candidates_0_1000", @r###"{"colour": (0.0, 999.0)}"###);
let map = FacetDistribution::new(&txn, &index)
.facets(std::iter::once("colour"))
.candidates((217..777).into_iter().collect())
.compute_stats()
.unwrap();
milli_snap!(format!("{map:?}"), "candidates_217_777", @r###"{"colour": (217.0, 776.0)}"###);
}
#[test]
fn facet_stats_array() {
let mut index = TempIndex::new_with_map_size(4096 * 10_000);
index.index_documents_config.autogenerate_docids = true;
index
.update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") }))
.unwrap();
let facet_values = (0..1000).into_iter().collect::<Vec<_>>();
let mut documents = vec![];
for i in 0..1000 {
let document = serde_json::json!({
"colour": [facet_values[i % 1000], facet_values[i % 1000] + 1000],
})
.as_object()
.unwrap()
.clone();
documents.push(document);
}
let documents = documents_batch_reader_from_objects(documents);
index.add_documents(documents).unwrap();
let txn = index.read_txn().unwrap();
let map = FacetDistribution::new(&txn, &index)
.facets(std::iter::once("colour"))
.compute_stats()
.unwrap();
milli_snap!(format!("{map:?}"), "no_candidates", @"{}");
let map = FacetDistribution::new(&txn, &index)
.facets(std::iter::once("colour"))
.candidates((0..1000).into_iter().collect())
.compute_stats()
.unwrap();
milli_snap!(format!("{map:?}"), "candidates_0_1000", @r###"{"colour": (0.0, 1999.0)}"###);
let map = FacetDistribution::new(&txn, &index)
.facets(std::iter::once("colour"))
.candidates((217..777).into_iter().collect())
.compute_stats()
.unwrap();
milli_snap!(format!("{map:?}"), "candidates_217_777", @r###"{"colour": (217.0, 1776.0)}"###);
}
#[test]
fn facet_stats_mixed_array() {
let mut index = TempIndex::new_with_map_size(4096 * 10_000);
index.index_documents_config.autogenerate_docids = true;
index
.update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") }))
.unwrap();
let facet_values = (0..1000).into_iter().collect::<Vec<_>>();
let mut documents = vec![];
for i in 0..1000 {
let document = serde_json::json!({
"colour": [facet_values[i % 1000], format!("{}", facet_values[i % 1000] + 1000)],
})
.as_object()
.unwrap()
.clone();
documents.push(document);
}
let documents = documents_batch_reader_from_objects(documents);
index.add_documents(documents).unwrap();
let txn = index.read_txn().unwrap();
let map = FacetDistribution::new(&txn, &index)
.facets(std::iter::once("colour"))
.compute_stats()
.unwrap();
milli_snap!(format!("{map:?}"), "no_candidates", @"{}");
let map = FacetDistribution::new(&txn, &index)
.facets(std::iter::once("colour"))
.candidates((0..1000).into_iter().collect())
.compute_stats()
.unwrap();
milli_snap!(format!("{map:?}"), "candidates_0_1000", @r###"{"colour": (0.0, 999.0)}"###);
let map = FacetDistribution::new(&txn, &index)
.facets(std::iter::once("colour"))
.candidates((217..777).into_iter().collect())
.compute_stats()
.unwrap();
milli_snap!(format!("{map:?}"), "candidates_217_777", @r###"{"colour": (217.0, 776.0)}"###);
}
#[test]
fn facet_mixed_values() {
let mut index = TempIndex::new_with_map_size(4096 * 10_000);
index.index_documents_config.autogenerate_docids = true;
index
.update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") }))
.unwrap();
let facet_values = (0..1000).into_iter().collect::<Vec<_>>();
let mut documents = vec![];
for i in 0..1000 {
let document = if i % 2 == 0 {
serde_json::json!({
"colour": [facet_values[i % 1000], facet_values[i % 1000] + 1000],
})
} else {
serde_json::json!({
"colour": format!("{}", facet_values[i % 1000] + 10000),
})
};
let document = document.as_object().unwrap().clone();
documents.push(document);
}
let documents = documents_batch_reader_from_objects(documents);
index.add_documents(documents).unwrap();
let txn = index.read_txn().unwrap();
let map = FacetDistribution::new(&txn, &index)
.facets(std::iter::once("colour"))
.compute_stats()
.unwrap();
milli_snap!(format!("{map:?}"), "no_candidates", @"{}");
let map = FacetDistribution::new(&txn, &index)
.facets(std::iter::once("colour"))
.candidates((0..1000).into_iter().collect())
.compute_stats()
.unwrap();
milli_snap!(format!("{map:?}"), "candidates_0_1000", @r###"{"colour": (0.0, 1998.0)}"###);
let map = FacetDistribution::new(&txn, &index)
.facets(std::iter::once("colour"))
.candidates((217..777).into_iter().collect())
.compute_stats()
.unwrap();
milli_snap!(format!("{map:?}"), "candidates_217_777", @r###"{"colour": (218.0, 1776.0)}"###);
}
}

View File

@@ -34,15 +34,20 @@ pub fn ascending_facet_sort<'t>(
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
field_id: u16,
candidates: RoaringBitmap,
) -> Result<Box<dyn Iterator<Item = Result<RoaringBitmap>> + 't>> {
) -> Result<impl Iterator<Item = Result<(RoaringBitmap, &'t [u8])>> + 't> {
let highest_level = get_highest_level(rtxn, db, field_id)?;
if let Some(first_bound) = get_first_facet_value::<ByteSliceRefCodec>(rtxn, db, field_id)? {
let first_key = FacetGroupKey { field_id, level: highest_level, left_bound: first_bound };
let iter = db.range(rtxn, &(first_key..)).unwrap().take(usize::MAX);
Ok(Box::new(AscendingFacetSort { rtxn, db, field_id, stack: vec![(candidates, iter)] }))
Ok(itertools::Either::Left(AscendingFacetSort {
rtxn,
db,
field_id,
stack: vec![(candidates, iter)],
}))
} else {
Ok(Box::new(std::iter::empty()))
Ok(itertools::Either::Right(std::iter::empty()))
}
}
@@ -60,7 +65,7 @@ struct AscendingFacetSort<'t, 'e> {
}
impl<'t, 'e> Iterator for AscendingFacetSort<'t, 'e> {
type Item = Result<RoaringBitmap>;
type Item = Result<(RoaringBitmap, &'t [u8])>;
fn next(&mut self) -> Option<Self::Item> {
'outer: loop {
@@ -90,7 +95,8 @@ impl<'t, 'e> Iterator for AscendingFacetSort<'t, 'e> {
*documents_ids -= &bitmap;
if level == 0 {
return Some(Ok(bitmap));
// Since the level is 0, the left_bound is the exact value.
return Some(Ok((bitmap, left_bound)));
}
let starting_key_below =
FacetGroupKey { field_id: self.field_id, level: level - 1, left_bound };
@@ -130,7 +136,7 @@ mod tests {
let mut results = String::new();
let iter = ascending_facet_sort(&txn, index.content, 0, candidates).unwrap();
for el in iter {
let docids = el.unwrap();
let (docids, _) = el.unwrap();
results.push_str(&display_bitmap(&docids));
results.push('\n');
}
@@ -152,7 +158,7 @@ mod tests {
let mut results = String::new();
let iter = ascending_facet_sort(&txn, index.content, 0, candidates.clone()).unwrap();
for el in iter {
let docids = el.unwrap();
let (docids, _) = el.unwrap();
results.push_str(&display_bitmap(&docids));
results.push('\n');
}
@@ -161,7 +167,7 @@ mod tests {
let mut results = String::new();
let iter = ascending_facet_sort(&txn, index.content, 1, candidates).unwrap();
for el in iter {
let docids = el.unwrap();
let (docids, _) = el.unwrap();
results.push_str(&display_bitmap(&docids));
results.push('\n');
}
@@ -183,7 +189,7 @@ mod tests {
let mut results = String::new();
let iter = ascending_facet_sort(&txn, index.content, 0, candidates.clone()).unwrap();
for el in iter {
let docids = el.unwrap();
let (docids, _) = el.unwrap();
results.push_str(&display_bitmap(&docids));
results.push('\n');
}
@@ -192,7 +198,7 @@ mod tests {
let mut results = String::new();
let iter = ascending_facet_sort(&txn, index.content, 1, candidates).unwrap();
for el in iter {
let docids = el.unwrap();
let (docids, _) = el.unwrap();
results.push_str(&display_bitmap(&docids));
results.push('\n');
}
@@ -214,7 +220,7 @@ mod tests {
let mut results = String::new();
let iter = ascending_facet_sort(&txn, index.content, 3, candidates.clone()).unwrap();
for el in iter {
let docids = el.unwrap();
let (docids, _) = el.unwrap();
results.push_str(&display_bitmap(&docids));
results.push('\n');
}

View File

@@ -17,21 +17,21 @@ pub fn descending_facet_sort<'t>(
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
field_id: u16,
candidates: RoaringBitmap,
) -> Result<Box<dyn Iterator<Item = Result<RoaringBitmap>> + 't>> {
) -> Result<impl Iterator<Item = Result<(RoaringBitmap, &'t [u8])>> + 't> {
let highest_level = get_highest_level(rtxn, db, field_id)?;
if let Some(first_bound) = get_first_facet_value::<ByteSliceRefCodec>(rtxn, db, field_id)? {
let first_key = FacetGroupKey { field_id, level: highest_level, left_bound: first_bound };
let last_bound = get_last_facet_value::<ByteSliceRefCodec>(rtxn, db, field_id)?.unwrap();
let last_key = FacetGroupKey { field_id, level: highest_level, left_bound: last_bound };
let iter = db.rev_range(rtxn, &(first_key..=last_key))?.take(usize::MAX);
Ok(Box::new(DescendingFacetSort {
Ok(itertools::Either::Left(DescendingFacetSort {
rtxn,
db,
field_id,
stack: vec![(candidates, iter, Bound::Included(last_bound))],
}))
} else {
Ok(Box::new(std::iter::empty()))
Ok(itertools::Either::Right(std::iter::empty()))
}
}
@@ -50,7 +50,7 @@ struct DescendingFacetSort<'t> {
}
impl<'t> Iterator for DescendingFacetSort<'t> {
type Item = Result<RoaringBitmap>;
type Item = Result<(RoaringBitmap, &'t [u8])>;
fn next(&mut self) -> Option<Self::Item> {
'outer: loop {
@@ -77,7 +77,8 @@ impl<'t> Iterator for DescendingFacetSort<'t> {
*documents_ids -= &bitmap;
if level == 0 {
return Some(Ok(bitmap));
// Since we're at the level 0 the left_bound is the exact value.
return Some(Ok((bitmap, left_bound)));
}
let starting_key_below =
FacetGroupKey { field_id, level: level - 1, left_bound };
@@ -146,7 +147,7 @@ mod tests {
let db = index.content.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>();
let iter = descending_facet_sort(&txn, db, 0, candidates).unwrap();
for el in iter {
let docids = el.unwrap();
let (docids, _) = el.unwrap();
results.push_str(&display_bitmap(&docids));
results.push('\n');
}
@@ -169,7 +170,7 @@ mod tests {
let db = index.content.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>();
let iter = descending_facet_sort(&txn, db, 0, candidates.clone()).unwrap();
for el in iter {
let docids = el.unwrap();
let (docids, _) = el.unwrap();
results.push_str(&display_bitmap(&docids));
results.push('\n');
}
@@ -179,7 +180,7 @@ mod tests {
let iter = descending_facet_sort(&txn, db, 1, candidates).unwrap();
for el in iter {
let docids = el.unwrap();
let (docids, _) = el.unwrap();
results.push_str(&display_bitmap(&docids));
results.push('\n');
}
@@ -200,7 +201,7 @@ mod tests {
let mut results = String::new();
let iter = descending_facet_sort(&txn, index.content, 0, candidates.clone()).unwrap();
for el in iter {
let docids = el.unwrap();
let (docids, _) = el.unwrap();
results.push_str(&display_bitmap(&docids));
results.push('\n');
}
@@ -209,7 +210,7 @@ mod tests {
let mut results = String::new();
let iter = descending_facet_sort(&txn, index.content, 1, candidates).unwrap();
for el in iter {
let docids = el.unwrap();
let (docids, _) = el.unwrap();
results.push_str(&display_bitmap(&docids));
results.push('\n');
}
@@ -231,7 +232,7 @@ mod tests {
let mut results = String::new();
let iter = descending_facet_sort(&txn, index.content, 3, candidates.clone()).unwrap();
for el in iter {
let docids = el.unwrap();
let (docids, _) = el.unwrap();
results.push_str(&display_bitmap(&docids));
results.push('\n');
}

View File

@@ -21,51 +21,18 @@ pub struct Filter<'a> {
condition: FilterCondition<'a>,
}
#[derive(Debug)]
pub enum BadGeoError {
Lat(f64),
Lng(f64),
BoundingBoxTopIsBelowBottom(f64, f64),
}
impl std::error::Error for BadGeoError {}
impl Display for BadGeoError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::BoundingBoxTopIsBelowBottom(top, bottom) => {
write!(f, "The top latitude `{top}` is below the bottom latitude `{bottom}`.")
}
Self::Lat(lat) => write!(
f,
"Bad latitude `{}`. Latitude must be contained between -90 and 90 degrees. ",
lat
),
Self::Lng(lng) => write!(
f,
"Bad longitude `{}`. Longitude must be contained between -180 and 180 degrees. ",
lng
),
}
}
}
#[derive(Debug)]
enum FilterError<'a> {
AttributeNotFilterable { attribute: &'a str, filterable_fields: HashSet<String> },
ParseGeoError(BadGeoError),
ReservedGeo(&'a str),
BadGeo(&'a str),
BadGeoLat(f64),
BadGeoLng(f64),
BadGeoBoundingBoxTopIsBelowBottom(f64, f64),
Reserved(&'a str),
TooDeep,
}
impl<'a> std::error::Error for FilterError<'a> {}
impl<'a> From<BadGeoError> for FilterError<'a> {
fn from(geo_error: BadGeoError) -> Self {
FilterError::ParseGeoError(geo_error)
}
}
impl<'a> Display for FilterError<'a> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
@@ -77,11 +44,7 @@ impl<'a> Display for FilterError<'a> {
attribute,
)
} else {
let filterables_list = filterable_fields
.iter()
.map(AsRef::as_ref)
.collect::<Vec<&str>>()
.join(" ");
let filterables_list = filterable_fields.iter().map(AsRef::as_ref).collect::<Vec<&str>>().join(" ");
write!(
f,
@@ -90,19 +53,20 @@ impl<'a> Display for FilterError<'a> {
filterables_list,
)
}
}
Self::TooDeep => write!(
f,
},
Self::TooDeep => write!(f,
"Too many filter conditions, can't process more than {} filters.",
MAX_FILTER_DEPTH
),
Self::ReservedGeo(keyword) => write!(f, "`{}` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` field coordinates.", keyword),
Self::Reserved(keyword) => write!(
f,
"`{}` is a reserved keyword and thus can't be used as a filter expression.",
keyword
),
Self::ParseGeoError(error) => write!(f, "{}", error),
Self::BadGeo(keyword) => write!(f, "`{}` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` field coordinates.", keyword),
Self::BadGeoBoundingBoxTopIsBelowBottom(top, bottom) => write!(f, "The top latitude `{top}` is below the bottom latitude `{bottom}`."),
Self::BadGeoLat(lat) => write!(f, "Bad latitude `{}`. Latitude must be contained between -90 and 90 degrees. ", lat),
Self::BadGeoLng(lng) => write!(f, "Bad longitude `{}`. Longitude must be contained between -180 and 180 degrees. ", lng),
}
}
}
@@ -334,10 +298,10 @@ impl<'a> Filter<'a> {
} else {
match fid.value() {
attribute @ "_geo" => {
Err(fid.as_external_error(FilterError::ReservedGeo(attribute)))?
Err(fid.as_external_error(FilterError::BadGeo(attribute)))?
}
attribute if attribute.starts_with("_geoPoint(") => {
Err(fid.as_external_error(FilterError::ReservedGeo("_geoPoint")))?
Err(fid.as_external_error(FilterError::BadGeo("_geoPoint")))?
}
attribute @ "_geoDistance" => {
Err(fid.as_external_error(FilterError::Reserved(attribute)))?
@@ -389,10 +353,14 @@ impl<'a> Filter<'a> {
let base_point: [f64; 2] =
[point[0].parse_finite_float()?, point[1].parse_finite_float()?];
if !(-90.0..=90.0).contains(&base_point[0]) {
return Err(point[0].as_external_error(BadGeoError::Lat(base_point[0])))?;
return Err(
point[0].as_external_error(FilterError::BadGeoLat(base_point[0]))
)?;
}
if !(-180.0..=180.0).contains(&base_point[1]) {
return Err(point[1].as_external_error(BadGeoError::Lng(base_point[1])))?;
return Err(
point[1].as_external_error(FilterError::BadGeoLng(base_point[1]))
)?;
}
let radius = radius.parse_finite_float()?;
let rtree = match index.geo_rtree(rtxn)? {
@@ -430,26 +398,27 @@ impl<'a> Filter<'a> {
bottom_right_point[1].parse_finite_float()?,
];
if !(-90.0..=90.0).contains(&top_left[0]) {
return Err(
top_left_point[0].as_external_error(BadGeoError::Lat(top_left[0]))
)?;
return Err(top_left_point[0]
.as_external_error(FilterError::BadGeoLat(top_left[0])))?;
}
if !(-180.0..=180.0).contains(&top_left[1]) {
return Err(
top_left_point[1].as_external_error(BadGeoError::Lng(top_left[1]))
)?;
return Err(top_left_point[1]
.as_external_error(FilterError::BadGeoLng(top_left[1])))?;
}
if !(-90.0..=90.0).contains(&bottom_right[0]) {
return Err(bottom_right_point[0]
.as_external_error(BadGeoError::Lat(bottom_right[0])))?;
.as_external_error(FilterError::BadGeoLat(bottom_right[0])))?;
}
if !(-180.0..=180.0).contains(&bottom_right[1]) {
return Err(bottom_right_point[1]
.as_external_error(BadGeoError::Lng(bottom_right[1])))?;
.as_external_error(FilterError::BadGeoLng(bottom_right[1])))?;
}
if top_left[0] < bottom_right[0] {
return Err(bottom_right_point[1].as_external_error(
BadGeoError::BoundingBoxTopIsBelowBottom(top_left[0], bottom_right[0]),
FilterError::BadGeoBoundingBoxTopIsBelowBottom(
top_left[0],
bottom_right[0],
),
))?;
}

View File

@@ -4,7 +4,7 @@ use heed::types::{ByteSlice, DecodeIgnore};
use heed::{BytesDecode, RoTxn};
pub use self::facet_distribution::{FacetDistribution, DEFAULT_VALUES_PER_FACET};
pub use self::filter::{BadGeoError, Filter};
pub use self::filter::Filter;
use crate::heed_codec::facet::{FacetGroupKeyCodec, FacetGroupValueCodec};
use crate::heed_codec::ByteSliceRefCodec;
mod facet_distribution;

View File

@@ -6,7 +6,7 @@ use roaring::RoaringBitmap;
use crate::facet::FacetType;
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupValue};
use crate::{make_db_snap_from_iter, obkv_to_json, ExternalDocumentsIds, Index};
use crate::{make_db_snap_from_iter, ExternalDocumentsIds, Index};
#[track_caller]
pub fn default_db_snapshot_settings_for_test(name: Option<&str>) -> (insta::Settings, String) {
@@ -427,26 +427,8 @@ pub fn snap_settings(index: &Index) -> String {
snap
}
pub fn snap_documents(index: &Index) -> String {
let mut snap = String::new();
let rtxn = index.read_txn().unwrap();
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
let display = fields_ids_map.ids().collect::<Vec<_>>();
for document in index.all_documents(&rtxn).unwrap() {
let doc = obkv_to_json(&display, &fields_ids_map, document.unwrap().1).unwrap();
snap.push_str(&serde_json::to_string(&doc).unwrap());
snap.push('\n');
}
snap
}
#[macro_export]
macro_rules! full_snap_of_db {
($index:ident, documents) => {{
$crate::snapshot_tests::snap_documents(&$index)
}};
($index:ident, settings) => {{
$crate::snapshot_tests::snap_settings(&$index)
}};

View File

@@ -395,7 +395,6 @@ pub fn validate_geo_from_json(id: &DocumentId, bytes: &[u8]) -> Result<StdResult
(Some(_), None) => Ok(Err(MissingLongitude { document_id: debug_id() })),
(None, None) => Ok(Err(MissingLatitudeAndLongitude { document_id: debug_id() })),
},
Value::Null => Ok(Ok(())),
value => Ok(Err(NotAnObject { document_id: debug_id(), value })),
}
}

View File

@@ -59,7 +59,6 @@ pub fn extract_geo_points<R: io::Read + io::Seek>(
} else if lat.is_some() && lng.is_none() {
return Err(GeoError::MissingLongitude { document_id: document_id() })?;
}
// else => the _geo object was `null`, there is nothing to do
}
writer_into_reader(writer)

View File

@@ -6,7 +6,6 @@ use roaring::RoaringBitmap;
use super::read_u32_ne_bytes;
use crate::heed_codec::CboRoaringBitmapCodec;
use crate::update::index_documents::transform::Operation;
use crate::Result;
pub type MergeFn = for<'a> fn(&[u8], &[Cow<'a, [u8]>]) -> Result<Cow<'a, [u8]>>;
@@ -58,6 +57,21 @@ pub fn keep_latest_obkv<'a>(_key: &[u8], obkvs: &[Cow<'a, [u8]>]) -> Result<Cow<
Ok(obkvs.last().unwrap().clone())
}
/// Merge all the obks in the order we see them.
pub fn merge_obkvs<'a>(_key: &[u8], obkvs: &[Cow<'a, [u8]>]) -> Result<Cow<'a, [u8]>> {
Ok(obkvs
.iter()
.cloned()
.reduce(|acc, current| {
let first = obkv::KvReader::new(&acc);
let second = obkv::KvReader::new(&current);
let mut buffer = Vec::new();
merge_two_obkvs(first, second, &mut buffer);
Cow::from(buffer)
})
.unwrap())
}
pub fn merge_two_obkvs(base: obkv::KvReaderU16, update: obkv::KvReaderU16, buffer: &mut Vec<u8>) {
use itertools::merge_join_by;
use itertools::EitherOrBoth::{Both, Left, Right};
@@ -74,41 +88,6 @@ pub fn merge_two_obkvs(base: obkv::KvReaderU16, update: obkv::KvReaderU16, buffe
writer.finish().unwrap();
}
/// Merge all the obks in the order we see them.
pub fn merge_obkvs_and_operations<'a>(
_key: &[u8],
obkvs: &[Cow<'a, [u8]>],
) -> Result<Cow<'a, [u8]>> {
// [add, add, delete, add, add]
// we can ignore everything that happened before the last delete.
let starting_position =
obkvs.iter().rposition(|obkv| obkv[0] == Operation::Deletion as u8).unwrap_or(0);
// [add, add, delete]
// if the last operation was a deletion then we simply return the deletion
if starting_position == obkvs.len() - 1 && obkvs.last().unwrap()[0] == Operation::Deletion as u8
{
return Ok(obkvs[obkvs.len() - 1].clone());
}
let mut buffer = Vec::new();
// (add, add, delete) [add, add]
// in the other case, no deletion will be encountered during the merge
let mut ret =
obkvs[starting_position..].iter().cloned().fold(Vec::new(), |mut acc, current| {
let first = obkv::KvReader::new(&acc);
let second = obkv::KvReader::new(&current[1..]);
merge_two_obkvs(first, second, &mut buffer);
// we want the result of the merge into our accumulator
std::mem::swap(&mut acc, &mut buffer);
acc
});
ret.insert(0, Operation::Addition as u8);
Ok(Cow::from(ret))
}
pub fn merge_cbo_roaring_bitmaps<'a>(
_key: &[u8],
values: &[Cow<'a, [u8]>],

View File

@@ -13,9 +13,9 @@ pub use grenad_helpers::{
GrenadParameters, MergeableReader,
};
pub use merge_functions::{
concat_u32s_array, keep_first, keep_latest_obkv, merge_cbo_roaring_bitmaps,
merge_obkvs_and_operations, merge_roaring_bitmaps, merge_two_obkvs,
roaring_bitmap_from_u32s_array, serialize_roaring_bitmap, MergeFn,
concat_u32s_array, keep_first, keep_latest_obkv, merge_cbo_roaring_bitmaps, merge_obkvs,
merge_roaring_bitmaps, merge_two_obkvs, roaring_bitmap_from_u32s_array,
serialize_roaring_bitmap, MergeFn,
};
use crate::MAX_WORD_LENGTH;

View File

@@ -79,7 +79,6 @@ pub struct IndexDocuments<'t, 'u, 'i, 'a, FP, FA> {
progress: FP,
should_abort: FA,
added_documents: u64,
deleted_documents: u64,
}
#[derive(Default, Debug, Clone)]
@@ -123,7 +122,6 @@ where
wtxn,
index,
added_documents: 0,
deleted_documents: 0,
})
}
@@ -168,30 +166,6 @@ where
Ok((self, Ok(indexed_documents)))
}
/// Remove a batch of documents from the current builder.
///
/// Returns the number of documents deleted from the builder.
pub fn remove_documents(
mut self,
to_delete: Vec<String>,
) -> Result<(Self, StdResult<u64, UserError>)> {
// Early return when there is no document to add
if to_delete.is_empty() {
return Ok((self, Ok(0)));
}
let deleted_documents = self
.transform
.as_mut()
.expect("Invalid document deletion state")
.remove_documents(to_delete, self.wtxn, &self.should_abort)?
as u64;
self.deleted_documents += deleted_documents;
Ok((self, Ok(deleted_documents)))
}
#[logging_timer::time("IndexDocuments::{}")]
pub fn execute(mut self) -> Result<DocumentAdditionResult> {
if self.added_documents == 0 {
@@ -1905,328 +1879,4 @@ mod tests {
index.add_documents(doc1).unwrap();
}
#[test]
fn add_and_delete_documents_in_single_transform() {
let mut index = TempIndex::new();
index.index_documents_config.update_method = IndexDocumentsMethod::UpdateDocuments;
let mut wtxn = index.write_txn().unwrap();
let builder = IndexDocuments::new(
&mut wtxn,
&index,
&index.indexer_config,
index.index_documents_config.clone(),
|_| (),
|| false,
)
.unwrap();
let documents = documents!([
{ "id": 1, "doggo": "kevin" },
{ "id": 2, "doggo": { "name": "bob", "age": 20 } },
{ "id": 3, "name": "jean", "age": 25 },
]);
let (builder, added) = builder.add_documents(documents).unwrap();
insta::assert_display_snapshot!(added.unwrap(), @"3");
let (builder, removed) = builder.remove_documents(vec![S("2")]).unwrap();
insta::assert_display_snapshot!(removed.unwrap(), @"1");
let addition = builder.execute().unwrap();
insta::assert_debug_snapshot!(addition, @r###"
DocumentAdditionResult {
indexed_documents: 3,
number_of_documents: 2,
}
"###);
wtxn.commit().unwrap();
db_snap!(index, documents, @r###"
{"id":1,"doggo":"kevin"}
{"id":3,"name":"jean","age":25}
"###);
}
#[test]
fn add_update_and_delete_documents_in_single_transform() {
let mut index = TempIndex::new();
index.index_documents_config.update_method = IndexDocumentsMethod::UpdateDocuments;
let mut wtxn = index.write_txn().unwrap();
let builder = IndexDocuments::new(
&mut wtxn,
&index,
&index.indexer_config,
index.index_documents_config.clone(),
|_| (),
|| false,
)
.unwrap();
let documents = documents!([
{ "id": 1, "doggo": "kevin" },
{ "id": 2, "doggo": { "name": "bob", "age": 20 } },
{ "id": 3, "name": "jean", "age": 25 },
]);
let (builder, added) = builder.add_documents(documents).unwrap();
insta::assert_display_snapshot!(added.unwrap(), @"3");
let documents = documents!([
{ "id": 2, "catto": "jorts" },
{ "id": 3, "legs": 4 },
]);
let (builder, added) = builder.add_documents(documents).unwrap();
insta::assert_display_snapshot!(added.unwrap(), @"2");
let (builder, removed) = builder.remove_documents(vec![S("1"), S("2")]).unwrap();
insta::assert_display_snapshot!(removed.unwrap(), @"2");
let addition = builder.execute().unwrap();
insta::assert_debug_snapshot!(addition, @r###"
DocumentAdditionResult {
indexed_documents: 5,
number_of_documents: 1,
}
"###);
wtxn.commit().unwrap();
db_snap!(index, documents, @r###"
{"id":3,"name":"jean","age":25,"legs":4}
"###);
}
#[test]
fn add_document_and_in_another_transform_update_and_delete_documents() {
let mut index = TempIndex::new();
index.index_documents_config.update_method = IndexDocumentsMethod::UpdateDocuments;
let mut wtxn = index.write_txn().unwrap();
let builder = IndexDocuments::new(
&mut wtxn,
&index,
&index.indexer_config,
index.index_documents_config.clone(),
|_| (),
|| false,
)
.unwrap();
let documents = documents!([
{ "id": 1, "doggo": "kevin" },
{ "id": 2, "doggo": { "name": "bob", "age": 20 } },
{ "id": 3, "name": "jean", "age": 25 },
]);
let (builder, added) = builder.add_documents(documents).unwrap();
insta::assert_display_snapshot!(added.unwrap(), @"3");
let addition = builder.execute().unwrap();
insta::assert_debug_snapshot!(addition, @r###"
DocumentAdditionResult {
indexed_documents: 3,
number_of_documents: 3,
}
"###);
wtxn.commit().unwrap();
db_snap!(index, documents, @r###"
{"id":1,"doggo":"kevin"}
{"id":2,"doggo":{"name":"bob","age":20}}
{"id":3,"name":"jean","age":25}
"###);
// A first batch of documents has been inserted
let mut wtxn = index.write_txn().unwrap();
let builder = IndexDocuments::new(
&mut wtxn,
&index,
&index.indexer_config,
index.index_documents_config.clone(),
|_| (),
|| false,
)
.unwrap();
let documents = documents!([
{ "id": 2, "catto": "jorts" },
{ "id": 3, "legs": 4 },
]);
let (builder, added) = builder.add_documents(documents).unwrap();
insta::assert_display_snapshot!(added.unwrap(), @"2");
let (builder, removed) = builder.remove_documents(vec![S("1"), S("2")]).unwrap();
insta::assert_display_snapshot!(removed.unwrap(), @"2");
let addition = builder.execute().unwrap();
insta::assert_debug_snapshot!(addition, @r###"
DocumentAdditionResult {
indexed_documents: 2,
number_of_documents: 1,
}
"###);
wtxn.commit().unwrap();
db_snap!(index, documents, @r###"
{"id":3,"name":"jean","age":25,"legs":4}
"###);
}
#[test]
fn delete_document_and_then_add_documents_in_the_same_transform() {
let mut index = TempIndex::new();
index.index_documents_config.update_method = IndexDocumentsMethod::UpdateDocuments;
let mut wtxn = index.write_txn().unwrap();
let builder = IndexDocuments::new(
&mut wtxn,
&index,
&index.indexer_config,
index.index_documents_config.clone(),
|_| (),
|| false,
)
.unwrap();
let (builder, removed) = builder.remove_documents(vec![S("1"), S("2")]).unwrap();
insta::assert_display_snapshot!(removed.unwrap(), @"0");
let documents = documents!([
{ "id": 2, "doggo": { "name": "jean", "age": 20 } },
{ "id": 3, "name": "bob", "age": 25 },
]);
let (builder, added) = builder.add_documents(documents).unwrap();
insta::assert_display_snapshot!(added.unwrap(), @"2");
let addition = builder.execute().unwrap();
insta::assert_debug_snapshot!(addition, @r###"
DocumentAdditionResult {
indexed_documents: 2,
number_of_documents: 2,
}
"###);
wtxn.commit().unwrap();
db_snap!(index, documents, @r###"
{"id":2,"doggo":{"name":"jean","age":20}}
{"id":3,"name":"bob","age":25}
"###);
}
#[test]
fn delete_the_same_document_multiple_time() {
let mut index = TempIndex::new();
index.index_documents_config.update_method = IndexDocumentsMethod::UpdateDocuments;
let mut wtxn = index.write_txn().unwrap();
let builder = IndexDocuments::new(
&mut wtxn,
&index,
&index.indexer_config,
index.index_documents_config.clone(),
|_| (),
|| false,
)
.unwrap();
let (builder, removed) =
builder.remove_documents(vec![S("1"), S("2"), S("1"), S("2")]).unwrap();
insta::assert_display_snapshot!(removed.unwrap(), @"0");
let documents = documents!([
{ "id": 1, "doggo": "kevin" },
{ "id": 2, "doggo": { "name": "jean", "age": 20 } },
{ "id": 3, "name": "bob", "age": 25 },
]);
let (builder, added) = builder.add_documents(documents).unwrap();
insta::assert_display_snapshot!(added.unwrap(), @"3");
let (builder, removed) =
builder.remove_documents(vec![S("1"), S("2"), S("1"), S("2")]).unwrap();
insta::assert_display_snapshot!(removed.unwrap(), @"2");
let addition = builder.execute().unwrap();
insta::assert_debug_snapshot!(addition, @r###"
DocumentAdditionResult {
indexed_documents: 3,
number_of_documents: 1,
}
"###);
wtxn.commit().unwrap();
db_snap!(index, documents, @r###"
{"id":3,"name":"bob","age":25}
"###);
}
#[test]
fn add_document_and_in_another_transform_delete_the_document_then_add_it_again() {
let mut index = TempIndex::new();
index.index_documents_config.update_method = IndexDocumentsMethod::UpdateDocuments;
let mut wtxn = index.write_txn().unwrap();
let builder = IndexDocuments::new(
&mut wtxn,
&index,
&index.indexer_config,
index.index_documents_config.clone(),
|_| (),
|| false,
)
.unwrap();
let documents = documents!([
{ "id": 1, "doggo": "kevin" },
]);
let (builder, added) = builder.add_documents(documents).unwrap();
insta::assert_display_snapshot!(added.unwrap(), @"1");
let addition = builder.execute().unwrap();
insta::assert_debug_snapshot!(addition, @r###"
DocumentAdditionResult {
indexed_documents: 1,
number_of_documents: 1,
}
"###);
wtxn.commit().unwrap();
db_snap!(index, documents, @r###"
{"id":1,"doggo":"kevin"}
"###);
// A first batch of documents has been inserted
let mut wtxn = index.write_txn().unwrap();
let builder = IndexDocuments::new(
&mut wtxn,
&index,
&index.indexer_config,
index.index_documents_config.clone(),
|_| (),
|| false,
)
.unwrap();
let (builder, removed) = builder.remove_documents(vec![S("1")]).unwrap();
insta::assert_display_snapshot!(removed.unwrap(), @"1");
let documents = documents!([
{ "id": 1, "catto": "jorts" },
]);
let (builder, added) = builder.add_documents(documents).unwrap();
insta::assert_display_snapshot!(added.unwrap(), @"1");
let addition = builder.execute().unwrap();
insta::assert_debug_snapshot!(addition, @r###"
DocumentAdditionResult {
indexed_documents: 1,
number_of_documents: 1,
}
"###);
wtxn.commit().unwrap();
db_snap!(index, documents, @r###"
{"id":1,"catto":"jorts"}
"###);
}
}

View File

@@ -12,9 +12,7 @@ use roaring::RoaringBitmap;
use serde_json::Value;
use smartstring::SmartString;
use super::helpers::{
create_sorter, create_writer, keep_latest_obkv, merge_obkvs_and_operations, MergeFn,
};
use super::helpers::{create_sorter, create_writer, keep_latest_obkv, merge_obkvs, MergeFn};
use super::{IndexDocumentsMethod, IndexerConfig};
use crate::documents::{DocumentsBatchIndex, EnrichedDocument, EnrichedDocumentsBatchReader};
use crate::error::{Error, InternalError, UserError};
@@ -52,12 +50,8 @@ pub struct Transform<'a, 'i> {
pub index_documents_method: IndexDocumentsMethod,
available_documents_ids: AvailableDocumentsIds,
// Both grenad follows the same format:
// key | value
// u32 | 1 byte for the Operation byte, the rest is the obkv of the document stored
original_sorter: grenad::Sorter<MergeFn>,
flattened_sorter: grenad::Sorter<MergeFn>,
replaced_documents_ids: RoaringBitmap,
new_documents_ids: RoaringBitmap,
// To increase the cache locality and decrease the heap usage we use compact smartstring.
@@ -65,14 +59,6 @@ pub struct Transform<'a, 'i> {
documents_count: usize,
}
/// This enum is specific to the grenad sorter stored in the transform.
/// It's used as the first byte of the grenads and tells you if the document id was an addition or a deletion.
#[repr(u8)]
pub enum Operation {
Addition,
Deletion,
}
/// Create a mapping between the field ids found in the document batch and the one that were
/// already present in the index.
///
@@ -108,7 +94,7 @@ impl<'a, 'i> Transform<'a, 'i> {
// with the same user id must be merged or fully replaced in the same batch.
let merge_function = match index_documents_method {
IndexDocumentsMethod::ReplaceDocuments => keep_latest_obkv,
IndexDocumentsMethod::UpdateDocuments => merge_obkvs_and_operations,
IndexDocumentsMethod::UpdateDocuments => merge_obkvs,
};
// We initialize the sorter with the user indexing settings.
@@ -165,7 +151,9 @@ impl<'a, 'i> Transform<'a, 'i> {
FA: Fn() -> bool + Sync,
{
let (mut cursor, fields_index) = reader.into_cursor_and_fields_index();
let external_documents_ids = self.index.external_documents_ids(wtxn)?;
let mapping = create_fields_mapping(&mut self.fields_ids_map, &fields_index)?;
let primary_key = cursor.primary_key().to_string();
@@ -173,7 +161,6 @@ impl<'a, 'i> Transform<'a, 'i> {
self.fields_ids_map.insert(&primary_key).ok_or(UserError::AttributeLimitReached)?;
let mut obkv_buffer = Vec::new();
let mut document_sorter_buffer = Vec::new();
let mut documents_count = 0;
let mut docid_buffer: Vec<u8> = Vec::new();
let mut field_buffer: Vec<(u16, Cow<[u8]>)> = Vec::new();
@@ -225,13 +212,10 @@ impl<'a, 'i> Transform<'a, 'i> {
Entry::Occupied(entry) => *entry.get() as u32,
Entry::Vacant(entry) => {
// If the document was already in the db we mark it as a replaced document.
// It'll be deleted later.
// It'll be deleted later. We keep its original docid to insert it in the grenad.
if let Some(docid) = external_documents_ids.get(entry.key()) {
// If it was already in the list of replaced documents it means it was deleted
// by the remove_document method. We should starts as if it never existed.
if self.replaced_documents_ids.insert(docid) {
original_docid = Some(docid);
}
self.replaced_documents_ids.insert(docid);
original_docid = Some(docid);
}
let docid = self
.available_documents_ids
@@ -264,46 +248,26 @@ impl<'a, 'i> Transform<'a, 'i> {
skip_insertion = true;
} else {
// we associate the base document with the new key, everything will get merged later.
document_sorter_buffer.clear();
document_sorter_buffer.push(Operation::Addition as u8);
document_sorter_buffer.extend_from_slice(base_obkv);
self.original_sorter.insert(docid.to_be_bytes(), &document_sorter_buffer)?;
self.original_sorter.insert(docid.to_be_bytes(), base_obkv)?;
match self.flatten_from_fields_ids_map(KvReader::new(base_obkv))? {
Some(flattened_obkv) => {
// we recreate our buffer with the flattened documents
document_sorter_buffer.clear();
document_sorter_buffer.push(Operation::Addition as u8);
document_sorter_buffer.extend_from_slice(&flattened_obkv);
self.flattened_sorter
.insert(docid.to_be_bytes(), &document_sorter_buffer)?
Some(buffer) => {
self.flattened_sorter.insert(docid.to_be_bytes(), &buffer)?
}
None => self
.flattened_sorter
.insert(docid.to_be_bytes(), &document_sorter_buffer)?,
None => self.flattened_sorter.insert(docid.to_be_bytes(), base_obkv)?,
}
}
}
if !skip_insertion {
self.new_documents_ids.insert(docid);
document_sorter_buffer.clear();
document_sorter_buffer.push(Operation::Addition as u8);
document_sorter_buffer.extend_from_slice(&obkv_buffer);
// We use the extracted/generated user id as the key for this document.
self.original_sorter.insert(docid.to_be_bytes(), &document_sorter_buffer)?;
self.original_sorter.insert(docid.to_be_bytes(), obkv_buffer.clone())?;
match self.flatten_from_fields_ids_map(KvReader::new(&obkv_buffer))? {
Some(flattened_obkv) => {
document_sorter_buffer.clear();
document_sorter_buffer.push(Operation::Addition as u8);
document_sorter_buffer.extend_from_slice(&flattened_obkv);
self.flattened_sorter
.insert(docid.to_be_bytes(), &document_sorter_buffer)?
Some(buffer) => self.flattened_sorter.insert(docid.to_be_bytes(), &buffer)?,
None => {
self.flattened_sorter.insert(docid.to_be_bytes(), obkv_buffer.clone())?
}
None => self
.flattened_sorter
.insert(docid.to_be_bytes(), &document_sorter_buffer)?,
}
}
documents_count += 1;
@@ -329,73 +293,6 @@ impl<'a, 'i> Transform<'a, 'i> {
Ok(documents_count)
}
/// The counter part of `read_documents` that removes documents either from the transform or the database.
/// It can be called before, after or in between two calls of the `read_documents`.
///
/// It needs to update all the internal datastructure in the transform.
/// - If the document is coming from the database -> it's marked as a to_delete document
/// - If the document to remove was inserted by the `read_documents` method before AND was present in the db,
/// it's marked as `to_delete` + added into the grenad to ensure we don't reinsert it.
/// - If the document to remove was inserted by the `read_documents` method before but was NOT present in the db,
/// it's added into the grenad to ensure we don't insert it + removed from the list of new documents ids.
/// - If the document to remove was not present in either the db or the transform we do nothing.
pub fn remove_documents<FA>(
&mut self,
mut to_remove: Vec<String>,
wtxn: &mut heed::RwTxn,
should_abort: FA,
) -> Result<usize>
where
FA: Fn() -> bool + Sync,
{
// there may be duplicates in the documents to remove.
to_remove.sort_unstable();
to_remove.dedup();
let external_documents_ids = self.index.external_documents_ids(wtxn)?;
let mut documents_deleted = 0;
for to_remove in to_remove {
if should_abort() {
return Err(Error::InternalError(InternalError::AbortedIndexation));
}
match self.new_external_documents_ids_builder.entry((*to_remove).into()) {
// if the document was added in a previous iteration of the transform we make it as deleted in the sorters.
Entry::Occupied(entry) => {
let doc_id = *entry.get() as u32;
self.original_sorter
.insert(doc_id.to_be_bytes(), [Operation::Deletion as u8])?;
self.flattened_sorter
.insert(doc_id.to_be_bytes(), [Operation::Deletion as u8])?;
// we must NOT update the list of replaced_documents_ids
// Either:
// 1. It's already in it and there is nothing to do
// 2. It wasn't in it because the document was created by a previous batch and since
// we're removing it there is nothing to do.
self.new_documents_ids.remove(doc_id);
entry.remove_entry();
}
Entry::Vacant(entry) => {
// If the document was already in the db we mark it as a `to_delete` document.
// It'll be deleted later. We don't need to push anything to the sorters.
if let Some(docid) = external_documents_ids.get(entry.key()) {
self.replaced_documents_ids.insert(docid);
} else {
// if the document is nowehere to be found, there is nothing to do and we must NOT
// increment the count of documents_deleted
continue;
}
}
};
documents_deleted += 1;
}
Ok(documents_deleted)
}
// Flatten a document from the fields ids map contained in self and insert the new
// created fields. Returns `None` if the document doesn't need to be flattened.
fn flatten_from_fields_ids_map(&mut self, obkv: KvReader<FieldId>) -> Result<Option<Vec<u8>>> {
@@ -590,11 +487,6 @@ impl<'a, 'i> Transform<'a, 'i> {
let mut documents_count = 0;
while let Some((key, val)) = iter.next()? {
if val[0] == Operation::Deletion as u8 {
continue;
}
let val = &val[1..];
// send a callback to show at which step we are
documents_count += 1;
progress_callback(UpdateIndexingStep::ComputeIdsAndMergeDocuments {
@@ -626,18 +518,9 @@ impl<'a, 'i> Transform<'a, 'i> {
self.indexer_settings.chunk_compression_level,
tempfile::tempfile()?,
);
// Once we have written all the documents into the final sorter, we write the nested documents
// into this writer.
// We get rids of the `Operation` byte and skip the deleted documents as well.
let mut iter = self.flattened_sorter.into_stream_merger_iter()?;
while let Some((key, val)) = iter.next()? {
if val[0] == Operation::Deletion as u8 {
continue;
}
let val = &val[1..];
writer.insert(key, val)?;
}
// Once we have written all the documents into the final sorter, we write the documents
// into this writer, extract the file and reset the seek to be able to read it again.
self.flattened_sorter.write_into_stream_writer(&mut writer)?;
let mut flattened_documents = writer.into_inner()?;
flattened_documents.rewind()?;
@@ -818,45 +701,3 @@ impl TransformOutput {
.collect())
}
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn merge_obkvs() {
let mut doc_0 = Vec::new();
let mut kv_writer = KvWriter::new(&mut doc_0);
kv_writer.insert(0_u8, [0]).unwrap();
kv_writer.finish().unwrap();
doc_0.insert(0, Operation::Addition as u8);
let ret = merge_obkvs_and_operations(&[], &[Cow::from(doc_0.as_slice())]).unwrap();
assert_eq!(*ret, doc_0);
let ret = merge_obkvs_and_operations(
&[],
&[Cow::from([Operation::Deletion as u8].as_slice()), Cow::from(doc_0.as_slice())],
)
.unwrap();
assert_eq!(*ret, doc_0);
let ret = merge_obkvs_and_operations(
&[],
&[Cow::from(doc_0.as_slice()), Cow::from([Operation::Deletion as u8].as_slice())],
)
.unwrap();
assert_eq!(*ret, [Operation::Deletion as u8]);
let ret = merge_obkvs_and_operations(
&[],
&[
Cow::from([Operation::Addition as u8, 1].as_slice()),
Cow::from([Operation::Deletion as u8].as_slice()),
Cow::from(doc_0.as_slice()),
],
)
.unwrap();
assert_eq!(*ret, doc_0);
}
}

View File

@@ -2,7 +2,7 @@ use std::collections::{BTreeSet, HashMap, HashSet};
use std::result::Result as StdResult;
use charabia::{Tokenizer, TokenizerBuilder};
use deserr::{DeserializeError, Deserr};
use deserr::{DeserializeError, DeserializeFromValue};
use itertools::Itertools;
use serde::{Deserialize, Deserializer, Serialize, Serializer};
use time::OffsetDateTime;
@@ -23,9 +23,9 @@ pub enum Setting<T> {
NotSet,
}
impl<T, E> Deserr<E> for Setting<T>
impl<T, E> DeserializeFromValue<E> for Setting<T>
where
T: Deserr<E>,
T: DeserializeFromValue<E>,
E: DeserializeError,
{
fn deserialize_from_value<V: deserr::IntoValue>(

View File

@@ -1,16 +1,9 @@
[package]
name = "permissive-json-pointer"
version = "1.0.0"
edition = "2021"
description = "A permissive json pointer"
readme = "README.md"
publish = false
version.workspace = true
authors.workspace = true
# description.workspace = true
homepage.workspace = true
# readme.workspace = true
edition.workspace = true
license.workspace = true
[dependencies]
serde_json = "1.0"